From 646ade2bb70d7a0ec8f871bb1f661912661d8edf Mon Sep 17 00:00:00 2001 From: chessai Date: Fri, 3 Nov 2023 17:48:36 -0500 Subject: [PATCH 1/2] pact breakdown tool --- cabal.project | 7 + src/Chainweb/Pact/Backend/PactState.hs | 220 +++++++++++++++++++++---- tools/cwtool/CwTool.hs | 8 +- 3 files changed, 203 insertions(+), 32 deletions(-) diff --git a/cabal.project b/cabal.project index b358111d00..bcf6dfa83e 100644 --- a/cabal.project +++ b/cabal.project @@ -131,6 +131,13 @@ source-repository-package tag: 2f67d546ea6608fc6ebe5f2f6976503cbf340442 --sha256: 0x137akvbh4kr3qagksw74xdj2xz5vjnx1fbr41bb54a0lkcb8mm +-- version that builds sqlite with -DSQLITE_ENABLE_DBSTAT_VTAB +source-repository-package + type: git + location: https://github.com/chessai/direct-sqlite + tag: 2f1d2b4c617c2f1a07a6b281538da61cfa74c6af + --sha256: 0ysfb08vrzz3j8bklmby2r3fgf8aha3csda4lvhkypgr6xxfspvs + -- -------------------------------------------------------------------------- -- -- Relaxed Bounds diff --git a/src/Chainweb/Pact/Backend/PactState.hs b/src/Chainweb/Pact/Backend/PactState.hs index 02f712526a..696078fec5 100644 --- a/src/Chainweb/Pact/Backend/PactState.hs +++ b/src/Chainweb/Pact/Backend/PactState.hs @@ -1,8 +1,6 @@ {-# LANGUAGE BangPatterns #-} -{-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DuplicateRecordFields #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE ImportQualifiedPost #-} {-# LANGUAGE InstanceSigs #-} {-# LANGUAGE LambdaCase #-} @@ -12,9 +10,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeApplications #-} -{-# LANGUAGE ViewPatterns #-} -- | -- Module: Chainweb.Pact.Backend.PactState @@ -41,11 +37,13 @@ module Chainweb.Pact.Backend.PactState , UserTable(..) , UserTableDiff(..) - , main + , pactDiffMain + , pactBreakdownMain ) where -import Data.IORef (newIORef, readIORef, writeIORef) +import Data.IORef (newIORef, readIORef, writeIORef, atomicModifyIORef') +import Data.Word (Word64) import Control.Concurrent.MVar (MVar, putMVar, takeMVar, newEmptyMVar) import UnliftIO.Async (pooledMapConcurrentlyN_) import Control.Lens (over) @@ -60,6 +58,7 @@ import Data.ByteString.Lazy qualified as BSL import Data.Foldable qualified as F import Data.Int (Int64) import Data.List qualified as List +import Data.Map (Map) import Data.Map.Strict qualified as M import Data.Ord (Down(..)) import Data.Text (Text) @@ -74,7 +73,7 @@ import Patience.Delta (Delta(..)) import Chainweb.BlockHeight (BlockHeight(..)) import Chainweb.Utils (sshow, HasTextRepresentation, fromText, toText, int) -import Chainweb.Version (ChainwebVersion(..), ChainwebVersionName, unsafeChainId) +import Chainweb.Version (ChainwebVersion(..), ChainwebVersionName, ChainId, unsafeChainId, chainIdToText) import Chainweb.Version.Mainnet (mainnet) import Chainweb.Version.Registry (lookupVersionByName) import Chainweb.Version.Utils (chainIdsAt) @@ -83,6 +82,7 @@ import Chainweb.Pact.Backend.Utils (withSqliteDb) import Chainweb.Pact.Backend.Compaction qualified as C import System.Exit (exitFailure) +import System.IO qualified as IO import System.Logger (LogLevel(..), setLoggerScope, loggerFunIO) import Data.LogMessage (TextLog(..), toLogMessage) @@ -91,11 +91,64 @@ import Pact.Types.SQLite qualified as Pact import Streaming.Prelude (Stream, Of) import Streaming.Prelude qualified as S +checkpointerTables :: [Utf8] +checkpointerTables = ["BlockHistory", "VersionedTableCreation", "VersionedTableMutation", "TransactionIndex"] + +compactionTables :: [Utf8] +compactionTables = ["CompactGrandHash", "CompactActiveRow"] + +sysTables :: [Utf8] +sysTables = ["SYS:usertables", "SYS:KeySets", "SYS:Modules", "SYS:Namespaces", "SYS:Pacts"] + excludedTables :: [Utf8] excludedTables = checkpointerTables ++ compactionTables + +data TableType + = TableTypeSystem + | TableTypeCompaction + | TableTypeUser + deriving stock (Eq) + +instance ToJSON TableType where + toJSON = \case + TableTypeSystem -> "system" + TableTypeCompaction -> "compaction" + TableTypeUser -> "user" + +prettyTableType :: TableType -> String +prettyTableType = \case + TableTypeSystem -> "System table" + TableTypeCompaction -> "Compaction table" + TableTypeUser -> "User table" + +data SizedTable = SizedTable + { tableName :: Text + , tableSizeBytes :: Word64 + , tableType :: TableType + } + +instance ToJSON SizedTable where + toJSON tbl = Aeson.object + [ "table_name" .= tbl.tableName + , "table_size_bytes" .= tbl.tableSizeBytes + , "table_type" .= tbl.tableType + ] + +getTableSizesBytes :: Database -> IO [SizedTable] +getTableSizesBytes db = do + let qryText = "SELECT name, SUM(\"pgsize\") table_size FROM \"dbstat\" GROUP BY name ORDER BY table_size DESC" + Pact.qry db qryText [] [RText, RInt] >>= mapM go where - checkpointerTables = ["BlockHistory", "VersionedTableCreation", "VersionedTableMutation", "TransactionIndex"] - compactionTables = ["CompactGrandHash", "CompactActiveRow"] + go :: [SType] -> IO SizedTable + go = \case + [SText tbl, SInt tblSize] -> do + let tblType = + if | tbl `elem` checkpointerTables -> TableTypeSystem + | tbl `elem` sysTables -> TableTypeSystem + | tbl `elem` compactionTables -> TableTypeCompaction + | otherwise -> TableTypeUser + pure (SizedTable (utf8ToText tbl) (fromIntegral tblSize) tblType) + _ -> error "getTableSizesBytes: expected (text, int)" getLatestBlockHeight :: Database -> IO BlockHeight getLatestBlockHeight db = do @@ -150,7 +203,7 @@ getPactUserTables db numTables = do getLatestPactState :: Database -> Stream (Of UserTable) IO () getLatestPactState db = do - numTablesVar <- liftIO $ newEmptyMVar + numTablesVar <- liftIO newEmptyMVar let go :: Word -> Stream (Of UserTable) IO () -> Stream (Of UserTable) IO () go !tablesRemaining s = do @@ -246,8 +299,7 @@ instance ToJSON PactRow where getActiveRows :: UserTable -> UserTable getActiveRows (UserTable name rows) = UserTable name - $ List.map takeHead - $ List.map (List.sortOn (Down . txId)) + $ List.map (takeHead . List.sortOn (Down . txId)) $ List.groupBy (\x y -> rowKey x == rowKey y) $ List.sortOn rowKey rows where @@ -259,7 +311,101 @@ getActiveRows (UserTable name rows) = UserTable name utf8ToText :: Utf8 -> Text utf8ToText (Utf8 u) = Text.decodeUtf8 u -data Config = Config +data ChainSizeInfo = ChainSizeInfo + { totalSizeBytes :: Word64 + , tableSizes :: [SizedTable] + } + +instance ToJSON ChainSizeInfo where + toJSON cInfo = Aeson.object + [ "total_size_bytes" .= cInfo.totalSizeBytes + , "table_sizes" .= cInfo.tableSizes + ] + +mkChainSizeInfo :: [SizedTable] -> ChainSizeInfo +mkChainSizeInfo tbls = ChainSizeInfo + { totalSizeBytes = List.foldl' (\acc tbl -> acc + tbl.tableSizeBytes) 0 tbls + , tableSizes = tbls + } + +data PactBreakdown = PactBreakdown + { totalSizeBytes :: Word64 + , sizes :: Map ChainId ChainSizeInfo + } + +instance ToJSON PactBreakdown where + toJSON b = Aeson.object + [ "total_size_bytes" .= b.totalSizeBytes + , "chain_sizes" .= b.sizes + ] + +reportBreakdown :: PactBreakdown -> IO () +reportBreakdown breakdown = do + IO.withFile "report.txt" IO.AppendMode $ \h -> do + let put = IO.hPutStrLn h + put $ "Total Size of All Chains: " ++ showBytes breakdown.totalSizeBytes + forM_ (M.toAscList breakdown.sizes) $ \(cid, cInfo) -> do + put "" + put $ "Chain " ++ Text.unpack (chainIdToText cid) + put $ "Total Size: " ++ showBytes cInfo.totalSizeBytes + forM_ cInfo.tableSizes $ \tbl -> do + put $ Text.unpack tbl.tableName ++ + " (" ++ prettyTableType tbl.tableType ++ "): " ++ + showBytes tbl.tableSizeBytes + +data PactBreakdownConfig = PactBreakdownConfig + { pactDbDir :: FilePath + , chainwebVersion :: ChainwebVersion + , numThreads :: Int + } + +pactBreakdownMain :: IO () +pactBreakdownMain = do + cfg <- execParser opts + + cids <- getCids cfg.pactDbDir cfg.chainwebVersion + + sizesRef <- newIORef @(Map ChainId [SizedTable]) M.empty + + flip (pooledMapConcurrentlyN_ cfg.numThreads) cids $ \cid -> do + C.withDefaultLogger Error $ \logger -> do + let resetDb = False + withSqliteDb cid logger cfg.pactDbDir resetDb $ \(SQLiteEnv db _) -> do + sizedTables <- getTableSizesBytes db + atomicModifyIORef' sizesRef $ \m -> (M.insert cid sizedTables m, ()) + + sizes <- readIORef sizesRef + let chainSizeInfos = M.map mkChainSizeInfo sizes + let breakdown = PactBreakdown + { totalSizeBytes = M.foldl' (\acc cInfo -> acc + cInfo.totalSizeBytes) 0 chainSizeInfos + , sizes = chainSizeInfos + } + reportBreakdown breakdown + + where + opts :: ParserInfo PactBreakdownConfig + opts = info (parser <**> helper) + (fullDesc <> progDesc "Pact DB compare-and-compare") + + parser :: Parser PactBreakdownConfig + parser = PactBreakdownConfig + <$> strOption + (long "pact-database-dir" + <> metavar "PACT_DB_DIRECTORY" + <> help "Pact database directory") + <*> (fmap (lookupVersionByName . fromTextSilly @ChainwebVersionName) $ strOption + (long "graph-version" + <> metavar "CHAINWEB_VERSION" + <> help "Chainweb version for graph. Only needed for non-standard graphs." + <> value (toText (_versionName mainnet)) + <> showDefault)) + <*> option auto + (long "threads" + <> metavar "NUM_THREADS" + <> help "Number of threads on which to run compaction." + <> value 4) + +data PactDiffConfig = PactDiffConfig { pactDbDir :: FilePath , compactDir :: FilePath , chainwebVersion :: ChainwebVersion @@ -267,22 +413,15 @@ data Config = Config , numThreads :: Int } -main :: IO () -main = do +pactDiffMain :: IO () +pactDiffMain = do cfg <- execParser opts when (cfg.pactDbDir == cfg.compactDir) $ do Text.putStrLn "Pact database directory and compacted Pact database directory cannot be the same." exitFailure - cids <- do - -- Get the latest block height on chain 0 for the purpose of calculating all - -- the chain ids at the current (version,height) pair - latestBlockHeight <- C.withDefaultLogger Error $ \logger -> do - let resetDb = False - withSqliteDb (unsafeChainId 0) logger cfg.pactDbDir resetDb $ \(SQLiteEnv db _) -> do - getLatestBlockHeight db - pure $ List.sort $ F.toList $ chainIdsAt cfg.chainwebVersion latestBlockHeight + cids <- getCids cfg.pactDbDir cfg.chainwebVersion flip (pooledMapConcurrentlyN_ cfg.numThreads) cids $ \cid -> do C.withPerChainFileLogger cfg.logDir cid Debug $ \logger' -> do @@ -307,12 +446,12 @@ main = do TextLog $ "Non-empty diff." exitFailure where - opts :: ParserInfo Config + opts :: ParserInfo PactDiffConfig opts = info (parser <**> helper) (fullDesc <> progDesc "Pact DB compare-and-compare") - parser :: Parser Config - parser = Config + parser :: Parser PactDiffConfig + parser = PactDiffConfig <$> strOption (long "pact-database-dir" <> metavar "PACT_DB_DIRECTORY" @@ -339,7 +478,28 @@ main = do <> help "Number of threads on which to run compaction." <> value 4) - fromTextSilly :: HasTextRepresentation a => Text -> a - fromTextSilly t = case fromText t of - Just a -> a - Nothing -> error "fromText failed" +fromTextSilly :: HasTextRepresentation a => Text -> a +fromTextSilly t = case fromText t of + Just a -> a + Nothing -> error "fromText failed" + +getCids :: FilePath -> ChainwebVersion -> IO [ChainId] +getCids pactDbDir chainwebVersion = do + -- Get the latest block height on chain 0 for the purpose of calculating all + -- the chain ids at the current (version,height) pair + latestBlockHeight <- C.withDefaultLogger Error $ \logger -> do + let resetDb = False + withSqliteDb (unsafeChainId 0) logger pactDbDir resetDb $ \(SQLiteEnv db _) -> do + getLatestBlockHeight db + pure $ List.sort $ F.toList $ chainIdsAt chainwebVersion latestBlockHeight + +showBytes :: Word64 -> String +showBytes bytes + | bytes > oneMB = show (w2d bytes / w2d oneMB) ++ " MB" + | otherwise = show bytes ++ " bytes" + where + oneMB :: Word64 + oneMB = 1024 * 1024 + + w2d :: Word64 -> Double + w2d = fromIntegral diff --git a/tools/cwtool/CwTool.hs b/tools/cwtool/CwTool.hs index 78a52e82e3..eeefac6e64 100644 --- a/tools/cwtool/CwTool.hs +++ b/tools/cwtool/CwTool.hs @@ -10,7 +10,7 @@ import System.Exit import Text.Printf import Chainweb.Pact.Backend.Compaction (main) -import Chainweb.Pact.Backend.PactState (main) +import Chainweb.Pact.Backend.PactState (pactDiffMain, pactBreakdownMain) import qualified CheckpointerDBChecksum import qualified Ea @@ -109,7 +109,11 @@ topLevelCommands = , CommandSpec "compact-compare" "Compare pre- and post-compaction state" - Chainweb.Pact.Backend.PactState.main + Chainweb.Pact.Backend.PactState.pactDiffMain + , CommandSpec + "pact-breakdown" + "Show a breakdown of the Pact State" + Chainweb.Pact.Backend.PactState.pactBreakdownMain , CommandSpec "calculate-release" "Calculate next service date and block heights for upgrades" From 118cded5ec5208cb4d89d48be649c141f07af763 Mon Sep 17 00:00:00 2001 From: chessai Date: Mon, 6 Nov 2023 11:16:42 -0600 Subject: [PATCH 2/2] lots more work on granular breakdown --- src/Chainweb/Pact/Backend/PactState.hs | 453 ++++++++++++++++++------- 1 file changed, 332 insertions(+), 121 deletions(-) diff --git a/src/Chainweb/Pact/Backend/PactState.hs b/src/Chainweb/Pact/Backend/PactState.hs index 771bf34201..ed5dae1256 100644 --- a/src/Chainweb/Pact/Backend/PactState.hs +++ b/src/Chainweb/Pact/Backend/PactState.hs @@ -1,17 +1,21 @@ {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE ImportQualifiedPost #-} {-# LANGUAGE InstanceSigs #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MonoLocalBinds #-} {-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE OverloadedRecordDot #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE StrictData #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} +{-# options_ghc -fno-warn-unused-top-binds #-} + -- | -- Module: Chainweb.Pact.Backend.PactState -- Copyright: Copyright © 2023 Kadena LLC. @@ -28,20 +32,22 @@ -- module Chainweb.Pact.Backend.PactState - ( getPactTables - , getPactUserTables + ( getPactTableNames + , getPactTables , getLatestPactState , getLatestBlockHeight , PactRow(..) - , UserTable(..) - , UserTableDiff(..) + , Table(..) + , TableDiff(..) , pactDiffMain , pactBreakdownMain ) where +import Data.String (IsString) +import Data.Bifunctor (first) import Data.IORef (newIORef, readIORef, atomicModifyIORef') import Data.Word (Word64) import Control.Concurrent.MVar (MVar, putMVar, takeMVar, newEmptyMVar) @@ -49,12 +55,13 @@ import UnliftIO.Async (pooledMapConcurrentlyN_) import Control.Lens (over) import Control.Monad (forM, forM_, when) import Control.Monad.IO.Class (MonadIO(liftIO)) -import Data.Aeson (ToJSON(..), (.=)) -import Data.Aeson qualified as Aeson import Data.Vector (Vector) import Data.Vector qualified as Vector import Data.ByteString (ByteString) -import Data.ByteString.Lazy qualified as BSL +import Data.ByteString qualified as BS +import Data.ByteString.Builder qualified as BB +import Data.ByteString.Char8 qualified as BS8 +import Data.ByteString.Lazy.Char8 qualified as BSL8 import Data.Foldable qualified as F import Data.Int (Int64) import Data.List qualified as List @@ -66,6 +73,7 @@ import Data.Text qualified as Text import Data.Text.IO qualified as Text import Data.Text.Encoding qualified as Text import Database.SQLite3.Direct (Utf8(..), Database) +import GHC.Records (HasField(..)) import Options.Applicative import Patience qualified import Patience.Map qualified as PatienceM @@ -86,11 +94,19 @@ import System.IO qualified as IO import System.Logger (LogLevel(..), setLoggerScope, loggerFunIO) import Data.LogMessage (TextLog(..), toLogMessage) +import Pact.JSON.Encode qualified as J +import Pact.JSON.Decode qualified as JD +import Pact.JSON.Encode ((.=)) +import Pact.Types.RowData (RowDataVersion(..), RowData(..), RowDataValue(..)) +import Pact.Types.Exp (Literal(..)) +import Pact.Types.Term (ObjectMap(..), Guard(..), ModRef(..)) import Pact.Types.SQLite (SType(..), RType(..)) import Pact.Types.SQLite qualified as Pact import Streaming.Prelude (Stream, Of) import Streaming.Prelude qualified as S +import Unsafe.Coerce (unsafeCoerce) + checkpointerTables :: [Utf8] checkpointerTables = ["BlockHistory", "VersionedTableCreation", "VersionedTableMutation", "TransactionIndex"] @@ -100,55 +116,93 @@ compactionTables = ["CompactGrandHash", "CompactActiveRow"] sysTables :: [Utf8] sysTables = ["SYS:usertables", "SYS:KeySets", "SYS:Modules", "SYS:Namespaces", "SYS:Pacts"] -excludedTables :: [Utf8] -excludedTables = checkpointerTables ++ compactionTables +nonUserTables :: [Utf8] +nonUserTables = checkpointerTables ++ compactionTables data TableType - = TableTypeSystem - | TableTypeCompaction - | TableTypeUser - deriving stock (Eq) + = System + | Compaction + | User + deriving stock (Eq, Ord, Show) -instance ToJSON TableType where - toJSON = \case - TableTypeSystem -> "system" - TableTypeCompaction -> "compaction" - TableTypeUser -> "user" +instance J.Encode TableType where + build = \case + System -> J.text "system" + Compaction -> J.text "compaction" + User -> J.text "user" prettyTableType :: TableType -> String prettyTableType = \case - TableTypeSystem -> "System table" - TableTypeCompaction -> "Compaction table" - TableTypeUser -> "User table" + System -> "System table" + Compaction -> "Compaction table" + User -> "User table" + +mkTableType :: Utf8 -> TableType +mkTableType tbl + | tbl `elem` checkpointerTables = System + | tbl `elem` sysTables = System + | tbl `elem` compactionTables = Compaction + | otherwise = User + +buildW64 :: Word64 -> J.Builder +buildW64 = unsafeCoerce BB.word64Dec + +buildI64 :: Int64 -> J.Builder +buildI64 = unsafeCoerce BB.int64Dec + +data Sized a = Sized + { sizeBytes :: Word64 + , item :: a + } + deriving stock (Eq, Ord, Show) + +instance (J.Encode a) => J.Encode (Sized a) where + build s = J.object + [ "size_bytes" .= buildW64 s.sizeBytes + , "item" .= s.item + ] + +data Table = Table + { name :: Text + , typ :: TableType + , rows :: [PactRow] + } + deriving stock (Eq, Ord, Show) + +instance J.Encode Table where + build tbl = J.object + [ "name" .= tbl.name + , "type" .= tbl.typ + ] data SizedTable = SizedTable - { tableName :: Text - , tableSizeBytes :: Word64 - , tableType :: TableType + { name :: Text + , typ :: TableType + , sizeBytes :: Word64 + , rows :: [SizedPactRow] } + deriving stock (Eq, Ord, Show) -instance ToJSON SizedTable where - toJSON tbl = Aeson.object - [ "table_name" .= tbl.tableName - , "table_size_bytes" .= tbl.tableSizeBytes - , "table_type" .= tbl.tableType +instance J.Encode SizedTable where + build tbl = J.object + [ "name" .= tbl.name + , "type" .= tbl.typ + , "size_bytes" .= buildW64 tbl.sizeBytes + , "sized_rows" .= J.array tbl.rows ] -getTableSizesBytes :: Database -> IO [SizedTable] -getTableSizesBytes db = do - let qryText = "SELECT name, SUM(\"pgsize\") table_size FROM \"dbstat\" GROUP BY name ORDER BY table_size DESC" - Pact.qry db qryText [] [RText, RInt] >>= mapM go - where - go :: [SType] -> IO SizedTable - go = \case - [SText tbl, SInt tblSize] -> do - let tblType = - if | tbl `elem` checkpointerTables -> TableTypeSystem - | tbl `elem` sysTables -> TableTypeSystem - | tbl `elem` compactionTables -> TableTypeCompaction - | otherwise -> TableTypeUser - pure (SizedTable (utf8ToText tbl) (fromIntegral tblSize) tblType) - _ -> error "getTableSizesBytes: expected (text, int)" +getPactSizedTableNames :: Database -> IO (Vector (Sized Utf8)) +getPactSizedTableNames db = do + let sortedTableNames :: [[SType]] -> [Sized Utf8] + sortedTableNames rows = + List.sortOn (\s -> s.item) + $ flip List.map rows $ \case + [SText tbl, SInt tblSize] -> Sized (fromIntegral @_ @Word64 tblSize) tbl + _ -> error "getPactTables.sortedTableNames: expected (text, int)" + + let qryText = "SELECT name, SUM(\"pgsize\") table_size FROM \"dbstat\" WHERE name NOT LIKE \"sqlite_%\" AND name NOT LIKE \"%_ix\" AND name NOT LIKE \"transactionIndexByBH\" GROUP BY name ORDER BY table_size DESC" + tables <- sortedTableNames <$> Pact.qry db qryText [] [RText, RInt] + pure (Vector.fromList tables) getLatestBlockHeight :: Database -> IO BlockHeight getLatestBlockHeight db = do @@ -157,12 +211,12 @@ getLatestBlockHeight db = do [[SInt bh]] -> pure (BlockHeight (int bh)) _ -> error "getLatestBlockHeight: expected int" -getPactTables :: Database -> IO (Vector Utf8) -getPactTables db = do +getPactTableNames :: Database -> IO (Vector Utf8) +getPactTableNames db = do let sortedTableNames :: [[SType]] -> [Utf8] sortedTableNames rows = M.elems $ M.fromListWith const $ flip List.map rows $ \case [SText u] -> (Text.toLower (utf8ToText u), u) - _ -> error "getPactUserTables.sortedTableNames: expected text" + _ -> error "getPactTables.sortedTableNames: expected text" tables <- fmap sortedTableNames $ do let qryText = @@ -175,39 +229,77 @@ getPactTables db = do pure (Vector.fromList tables) --- | Get all of the rows for each user table. The tables will be sorted. +-- | Get all of the rows for each table. The tables will be sorted +-- lexicographically by name. -- -- The 'MVar' 'Word' argument is supposed to be supplied as a 'newEmptyMVar'. -- This will get filled with the number of tables, once it is known. -getPactUserTables :: Database -> MVar Word -> Stream (Of UserTable) IO () -getPactUserTables db numTables = do +-- +-- The ['Utf8'] argument are tables to exclude. +getPactTables :: () + => [Utf8] + -> Database + -> MVar Word + -> Stream (Of Table) IO () +getPactTables excludedTables db numTables = do let fmtTable x = "\"" <> x <> "\"" - tables <- liftIO $ getPactTables db + tables <- liftIO $ getPactTableNames db liftIO $ putMVar numTables (fromIntegral (Vector.length tables)) forM_ tables $ \tbl -> do - if tbl `notElem` excludedTables - then do + when (tbl `notElem` excludedTables) $ do let qryText = "SELECT rowkey, rowdata, txid FROM " <> fmtTable tbl userRows <- liftIO $ Pact.qry db qryText [] [RText, RBlob, RInt] shapedRows <- forM userRows $ \case [SText (Utf8 rowKey), SBlob rowData, SInt txId] -> do pure $ PactRow {..} - _ -> error "getPactUserTables: unexpected shape of user table row" - S.yield $ UserTable (utf8ToText tbl) shapedRows - else do - pure () + _ -> error "getPactTables: unexpected shape of user table row" + S.yield $ Table + { name = utf8ToText tbl + , typ = mkTableType tbl + , rows = shapedRows + } -getLatestPactState :: Database -> Stream (Of UserTable) IO () +getPactSizedTables :: () + => [Utf8] + -> Database + -> Stream (Of SizedTable) IO () +getPactSizedTables excludedTables db = do + let fmtTable :: (IsString s, Monoid s) => s -> s + fmtTable x = "\"" <> x <> "\"" + + tables <- liftIO $ getPactSizedTableNames db + + forM_ tables $ \(Sized sz tbl) -> do + when (tbl `notElem` excludedTables) $ do + let qryText = "SELECT rowkey, rowdata, txid FROM " + <> fmtTable tbl + userRows <- liftIO $ Pact.qry db qryText [] [RText, RBlob, RInt] + shapedRows <- forM userRows $ \case + [SText (Utf8 rowKey), SBlob rowData, SInt txId] -> do + pure $ sizeTagPactRow (utf8ToText tbl, rowKey) $ PactRow {..} + _ -> error "getPactSizedTables: unexpected shape of user table row" + let rows = + List.sortOn (\r -> Down r.sizedRowData.size) + $ List.filter (\r -> r.sizedRowData.size > oneKB) shapedRows + when (not (null rows)) $ do + S.yield $ SizedTable + { name = utf8ToText tbl + , typ = mkTableType tbl + , sizeBytes = sz + , rows = rows + } + +getLatestPactState :: Database -> Stream (Of Table) IO () getLatestPactState db = do numTablesVar <- liftIO newEmptyMVar - let go :: Word -> Stream (Of UserTable) IO () -> Stream (Of UserTable) IO () - go !tablesRepactDiffMaining s = do - if tablesRepactDiffMaining == 0 + let go :: Word -> Stream (Of Table) IO () -> Stream (Of Table) IO () + go !tablesRemaining s = do + if tablesRemaining == 0 then do pure () else do @@ -217,9 +309,9 @@ getLatestPactState db = do pure () Right (userTable, rest) -> do S.yield (getActiveRows userTable) - go (tablesRepactDiffMaining - 1) rest + go (tablesRemaining - 1) rest - e <- liftIO $ S.next (getPactUserTables db numTablesVar) + e <- liftIO $ S.next (getPactTables nonUserTables db numTablesVar) case e of Left () -> do pure () @@ -231,7 +323,7 @@ getLatestPactState db = do -- This assumes the same tables (essentially zipWith). -- Note that this assumes we got the state from `getLatestPactState`, --- because `getPactUserTables` sorts the table names, and `getLatestPactState` +-- because `getPactTables` sorts the table names, and `getLatestPactState` -- sorts the [PactRow] by rowKey. -- -- If we ever find two tables that are not the same, we throw an error. @@ -239,69 +331,88 @@ getLatestPactState db = do -- This diminishes the utility of comparing two pact states that are known to be -- at different heights, but that hurts our ability to perform the diff in -- constant memory. -diffLatestPactState :: Stream (Of UserTable) IO () -> Stream (Of UserTable) IO () -> Stream (Of UserTableDiff) IO () +diffLatestPactState :: Stream (Of Table) IO () -> Stream (Of Table) IO () -> Stream (Of TableDiff) IO () diffLatestPactState s1 s2 = do - let diff :: UserTable -> UserTable -> UserTableDiff - diff ut1 ut2 - | ut1.tableName /= ut2.tableName = error "diffLatestPactState: mismatched table names" - | otherwise = UserTableDiff ut1.tableName + let diff :: Table -> Table -> TableDiff + diff tbl1 tbl2 + | tbl1.name /= tbl2.name = error "diffLatestPactState: mismatched table names" + | otherwise = TableDiff tbl1.name $ List.filter (not . PatienceM.isSame) $ Patience.pairItems (\x y -> x.rowKey == y.rowKey) - $ Patience.diff ut1.rows ut2.rows + $ Patience.diff tbl1.rows tbl2.rows S.zipWith diff s1 s2 -data UserTableDiff = UserTableDiff - { tableName :: !Text +data TableDiff = TableDiff + { name :: !Text , rowDiff :: [Delta PactRow] } deriving stock (Eq, Ord, Show) -instance ToJSON UserTableDiff where - toJSON utd = Aeson.object - [ "table_name" .= utd.tableName - , "row_diff" .= List.map deltaToObject utd.rowDiff +instance J.Encode TableDiff where + build td = J.object + [ "table_name" .= td.name + , "row_diff" .= J.array (List.map deltaToObject td.rowDiff) ] where - deltaToObject :: (ToJSON a) => Delta a -> Aeson.Value + deltaToObject :: (J.Encode a) => Delta a -> J.Builder deltaToObject = \case - Old x -> Aeson.object + Old x -> J.object [ "old" .= x ] - New x -> Aeson.object + New x -> J.object [ "new" .= x ] - Delta x y -> Aeson.object + Delta x y -> J.object [ "old" .= x , "new" .= y ] - Same _ -> Aeson.Null + Same _ -> J.null -data UserTable = UserTable - { tableName :: !Text - , rows :: [PactRow] +data PactRow = PactRow + { rowKey :: ByteString + , rowData :: ByteString + , txId :: Int64 } deriving stock (Eq, Ord, Show) -data PactRow = PactRow - { rowKey :: !ByteString - , rowData :: !ByteString - , txId :: !Int64 +instance J.Encode PactRow where + build pr = J.object + [ "row_key" .= Text.decodeUtf8 pr.rowKey + , "row_data" .= Text.decodeUtf8 pr.rowData + , "tx_id" .= buildI64 pr.txId + ] + +sizeTagPactRow :: (Text, ByteString) -> PactRow -> SizedPactRow +sizeTagPactRow dbgInfo pr = SizedPactRow + { rowKey = pr.rowKey + , sizedRowData = sizeTagRowData dbgInfo oneMB pr.rowData + , txId = pr.txId + } + +data SizedPactRow = SizedPactRow + { rowKey :: ByteString + , sizedRowData :: SizedRowData + , txId :: Int64 } deriving stock (Eq, Ord, Show) -instance ToJSON PactRow where - toJSON pr = Aeson.object +instance J.Encode SizedPactRow where + build pr = J.object [ "row_key" .= Text.decodeUtf8 pr.rowKey - , "row_data" .= Text.decodeUtf8 pr.rowData - , "tx_id" .= pr.txId + , "sized_row_data" .= pr.sizedRowData + , "tx_id" .= buildI64 pr.txId ] -getActiveRows :: UserTable -> UserTable -getActiveRows (UserTable name rows) = UserTable name - $ List.map (takeHead . List.sortOn (Down . txId)) - $ List.groupBy (\x y -> rowKey x == rowKey y) - $ List.sortOn rowKey rows +getActiveRows :: Table -> Table +getActiveRows tbl = Table + { name = tbl.name + , typ = tbl.typ + , rows = + List.map (takeHead . List.sortOn (\rd -> Down rd.txId)) + $ List.groupBy (\rd1 rd2 -> rd1.rowKey == rd2.rowKey) + $ List.sortOn (\rd -> rd.rowKey) tbl.rows + } where takeHead :: [a] -> a takeHead = \case @@ -311,20 +422,111 @@ getActiveRows (UserTable name rows) = UserTable name utf8ToText :: Utf8 -> Text utf8ToText (Utf8 u) = Text.decodeUtf8 u +data SizedRowDataValue + = SRDLiteral Word64 Literal + | SRDList Word64 (Vector SizedRowDataValue) + | SRDObject Word64 (ObjectMap SizedRowDataValue) + | SRDGuard Word64 (Guard SizedRowDataValue) + | SRDModRef Word64 ModRef + deriving stock (Eq, Ord, Show) + +instance J.Encode SizedRowDataValue where + build = \case + SRDLiteral sz l -> J.object + [ "size" .= buildW64 sz + , "literal" .= l + ] + SRDList sz v -> J.object + [ "size" .= buildW64 sz + , "list" .= J.array v + ] + SRDObject sz o -> J.object + [ "size" .= buildW64 sz + , "object" .= o + ] + SRDGuard sz g -> J.object + [ "size" .= buildW64 sz + , "guard" .= g + ] + SRDModRef sz (ModRef refName refSpec _) -> J.object + [ "size" .= buildW64 sz + , "mod_ref" .= J.object + [ "ref_spec" .= fmap J.array refSpec + , "ref_name" .= refName + ] + ] + +jsonSize :: (J.Encode a) => a -> Word64 +jsonSize x = fromIntegral (BS.length (J.encodeStrict x)) + +sizeTagRowDataValue :: RowDataValue -> SizedRowDataValue +sizeTagRowDataValue = go + where + go :: RowDataValue -> SizedRowDataValue + go rdv = + let + topLevelSize = jsonSize rdv + in + case rdv of + RDLiteral l -> SRDLiteral topLevelSize l + RDList ls -> SRDList topLevelSize (recur ls) + RDObject o -> SRDObject topLevelSize (recur o) + RDGuard g -> SRDGuard topLevelSize (recur g) + RDModRef m -> SRDModRef topLevelSize m + + recur :: (Functor f) => f RowDataValue -> f SizedRowDataValue + recur = fmap go + +data SizedRowData + = SRDUnderThreshold Word64 + | SRDAboveThreshold Word64 RowDataVersion (ObjectMap SizedRowDataValue) + deriving stock (Eq, Ord, Show) + +instance HasField "size" SizedRowData Word64 where + getField = \case + SRDUnderThreshold sz -> sz + SRDAboveThreshold sz _ _ -> sz + +instance J.Encode SizedRowData where + build = \case + SRDUnderThreshold w -> J.object + [ "tag" .= J.text "under_threshold" + , "size" .= buildW64 w + ] + SRDAboveThreshold sz v d -> J.object + [ "tag" .= J.text "above_threshold" + , "version" .= v + , "size" .= buildW64 sz + , "sized_data" .= d + ] + +sizeTagRowData :: (Text, ByteString) -> Word64 -> ByteString -> SizedRowData +sizeTagRowData (tblName, rowKey) threshold rdBytes + | len < threshold = SRDUnderThreshold len + | tblName == "SYS:Modules" = SRDUnderThreshold len + | otherwise = case JD.eitherDecodeStrict' @RowData rdBytes of + Left err -> error $ "sizeTagRowData: (tblName = " ++ Text.unpack tblName ++ ", rowKey = " ++ BS8.unpack rowKey ++ ") invalid rowData: " ++ err + Right rd -> SRDAboveThreshold + len + rd._rdVersion + (fmap sizeTagRowDataValue rd._rdData) + where + len = fromIntegral @_ @Word64 (BS.length rdBytes) + data ChainSizeInfo = ChainSizeInfo { totalSizeBytes :: Word64 - , tableSizes :: [SizedTable] + , tableSizes :: Vector SizedTable } -instance ToJSON ChainSizeInfo where - toJSON cInfo = Aeson.object - [ "total_size_bytes" .= cInfo.totalSizeBytes - , "table_sizes" .= cInfo.tableSizes +instance J.Encode ChainSizeInfo where + build cInfo = J.object + [ "total_size_bytes" .= buildW64 cInfo.totalSizeBytes + , "table_sizes" .= J.array cInfo.tableSizes ] -mkChainSizeInfo :: [SizedTable] -> ChainSizeInfo +mkChainSizeInfo :: Vector SizedTable -> ChainSizeInfo mkChainSizeInfo tbls = ChainSizeInfo - { totalSizeBytes = List.foldl' (\acc tbl -> acc + tbl.tableSizeBytes) 0 tbls + { totalSizeBytes = Vector.foldl' (\acc tbl -> acc + tbl.sizeBytes) 0 tbls , tableSizes = tbls } @@ -333,10 +535,10 @@ data PactBreakdown = PactBreakdown , sizes :: Map ChainId ChainSizeInfo } -instance ToJSON PactBreakdown where - toJSON b = Aeson.object - [ "total_size_bytes" .= b.totalSizeBytes - , "chain_sizes" .= b.sizes +instance J.Encode PactBreakdown where + build b = J.object + [ "total_size_bytes" .= buildW64 b.totalSizeBytes + , "chain_sizes" .= J.Object (List.map (first chainIdToText) (M.toList b.sizes)) ] reportBreakdown :: PactBreakdown -> IO () @@ -349,9 +551,9 @@ reportBreakdown breakdown = do put $ "Chain " ++ Text.unpack (chainIdToText cid) put $ "Total Size: " ++ showBytes cInfo.totalSizeBytes forM_ cInfo.tableSizes $ \tbl -> do - put $ Text.unpack tbl.tableName ++ - " (" ++ prettyTableType tbl.tableType ++ "): " ++ - showBytes tbl.tableSizeBytes + put $ Text.unpack tbl.name ++ + " (" ++ prettyTableType tbl.typ ++ "): " ++ + showBytes tbl.sizeBytes data PactBreakdownConfig = PactBreakdownConfig { pactDbDir :: FilePath @@ -365,14 +567,20 @@ pactBreakdownMain = do cids <- getCids cfg.pactDbDir cfg.chainwebVersion - sizesRef <- newIORef @(Map ChainId [SizedTable]) M.empty + sizesRef <- newIORef @(Map ChainId (Vector SizedTable)) M.empty flip (pooledMapConcurrentlyN_ cfg.numThreads) cids $ \cid -> do C.withDefaultLogger Error $ \logger -> do let resetDb = False withSqliteDb cid logger cfg.pactDbDir resetDb $ \(SQLiteEnv db _) -> do - sizedTables <- getTableSizesBytes db - atomicModifyIORef' sizesRef $ \m -> (M.insert cid sizedTables m, ()) + e <- S.next (getPactSizedTables nonUserTables db) + case e of + Left () -> do + -- the stream was entirely empty, but we need an entry + atomicModifyIORef' sizesRef $ \m -> (M.insert cid Vector.empty m, ()) + Right (tbl, rest) -> do + sizedTables <- (tbl :) <$> S.toList_ rest + atomicModifyIORef' sizesRef $ \m -> (M.insert cid (Vector.fromList sizedTables) m, ()) sizes <- readIORef sizesRef let chainSizeInfos = M.map mkChainSizeInfo sizes @@ -380,7 +588,7 @@ pactBreakdownMain = do { totalSizeBytes = M.foldl' (\acc cInfo -> acc + cInfo.totalSizeBytes) 0 chainSizeInfos , sizes = chainSizeInfos } - reportBreakdown breakdown + BSL8.hPut IO.stdout (J.encode breakdown) where opts :: ParserInfo PactBreakdownConfig @@ -443,9 +651,9 @@ pactDiffMain = do withSqliteDb cid logger cfg.firstDbDir resetDb $ \(SQLiteEnv db1 _) -> do withSqliteDb cid logger cfg.secondDbDir resetDb $ \(SQLiteEnv db2 _) -> do let diff = diffLatestPactState (getLatestPactState db1) (getLatestPactState db2) - diffy <- S.foldMap_ id $ flip S.mapM diff $ \utd -> do + diffy <- S.foldMap_ id $ flip S.mapM diff $ \td -> do loggerFunIO logger Warn $ toLogMessage $ - TextLog $ Text.decodeUtf8 $ BSL.toStrict $ Aeson.encode utd + TextLog $ Text.decodeUtf8 $ J.encodeStrict td pure Difference atomicModifyIORef' diffyRef $ \m -> (M.insert cid diffy m, ()) @@ -508,8 +716,11 @@ showBytes bytes | bytes > oneMB = show (w2d bytes / w2d oneMB) ++ " MB" | otherwise = show bytes ++ " bytes" where - oneMB :: Word64 - oneMB = 1024 * 1024 - w2d :: Word64 -> Double w2d = fromIntegral + +oneKB :: Word64 +oneKB = 1024 + +oneMB :: Word64 +oneMB = oneKB * oneKB