From 1f5de0e9cb0b3433bc0e26c8eecde2b215ac2553 Mon Sep 17 00:00:00 2001 From: William Manley Date: Mon, 7 Aug 2023 16:55:56 +0100 Subject: [PATCH 1/4] Storage: Add Indexes for common queries In #306 it's reported that SQLite is slow. This adds indexes so lookups will be faster. The intention is to speed up the bottom 3 queries from https://github.com/github/stack-graphs/issues/306#issuecomment-1665402600 as they are "the ones to focus on". Specifically this should speed up: > * Load graph data for a file > > SELECT value FROM graphs WHERE file = ? > > Called many times during path stitching > > * Load paths for a file node > > SELECT file,value from file_paths WHERE file = ? AND local_id = ? > > Called many times during path stitching. > > * Load paths for root node > > SELECT file, value from root_paths WHERE symbol_stack = ? > > Called many times during path stitching. If the data that you're fetching is in the index then SQLite won't even bother reading the row proper, just pulling the data streight from the index (good for data locality). As such I've included not just the columns we're using in the `WHERE` clause, but also the ones from `SELECT` too. I've not actually tested the performance impact as I'm not familiar with this project (this is more of a drive-by) and don't know what benchmarks to use. --- stack-graphs/src/storage.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/stack-graphs/src/storage.rs b/stack-graphs/src/storage.rs index ee39c457e..62f573d5d 100644 --- a/stack-graphs/src/storage.rs +++ b/stack-graphs/src/storage.rs @@ -58,6 +58,12 @@ const SCHEMA: &str = r#" ) STRICT; "#; +const INDEXES: &str = r#" + CREATE INDEX IF NOT EXISTS graph_file_value ON graph(file, value); + CREATE INDEX IF NOT EXISTS file_paths_file_local_id_value ON file_paths(file, local_id, value); + CREATE INDEX IF NOT EXISTS root_paths_symbol_stack_file_value ON root_paths(symbol_stack, file, value); + "#; + const PRAGMAS: &str = r#" PRAGMA journal_mode = WAL; PRAGMA foreign_keys = false; @@ -147,6 +153,7 @@ impl SQLiteWriter { pub fn open_in_memory() -> Result { let mut conn = Connection::open_in_memory()?; Self::init(&mut conn)?; + Self::init_indexes(&mut conn)?; Ok(Self { conn }) } @@ -161,6 +168,7 @@ impl SQLiteWriter { } else { check_version(&conn)?; } + Self::init_indexes(&mut conn)?; Ok(Self { conn }) } @@ -173,6 +181,13 @@ impl SQLiteWriter { Ok(()) } + fn init_indexes(conn: &mut Connection) -> Result<()> { + let tx = conn.transaction()?; + tx.execute_batch(INDEXES)?; + tx.commit()?; + Ok(()) + } + /// Clean all data from the database. pub fn clean_all(&mut self) -> Result { let tx = self.conn.transaction()?; From 07e72e167f174f865fcc72533cf800019056678d Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Wed, 9 Aug 2023 11:39:40 +0200 Subject: [PATCH 2/4] Do not use covering indices to reduce database size --- stack-graphs/src/storage.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stack-graphs/src/storage.rs b/stack-graphs/src/storage.rs index 62f573d5d..006f9610f 100644 --- a/stack-graphs/src/storage.rs +++ b/stack-graphs/src/storage.rs @@ -59,9 +59,9 @@ const SCHEMA: &str = r#" "#; const INDEXES: &str = r#" - CREATE INDEX IF NOT EXISTS graph_file_value ON graph(file, value); - CREATE INDEX IF NOT EXISTS file_paths_file_local_id_value ON file_paths(file, local_id, value); - CREATE INDEX IF NOT EXISTS root_paths_symbol_stack_file_value ON root_paths(symbol_stack, file, value); + CREATE INDEX IF NOT EXISTS idx_graphs_file ON graphs(file); + CREATE INDEX IF NOT EXISTS idx_file_paths_local_id ON file_paths(file, local_id); + CREATE INDEX IF NOT EXISTS idx_root_paths_symbol_stack ON root_paths(symbol_stack); "#; const PRAGMAS: &str = r#" From 134b7bc6ed05d07a5ad2ecdac4e8a9e41c45e48f Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Wed, 9 Aug 2023 11:42:15 +0200 Subject: [PATCH 3/4] Also initialize indices from reader --- stack-graphs/src/storage.rs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/stack-graphs/src/storage.rs b/stack-graphs/src/storage.rs index 006f9610f..808697a4a 100644 --- a/stack-graphs/src/storage.rs +++ b/stack-graphs/src/storage.rs @@ -153,7 +153,7 @@ impl SQLiteWriter { pub fn open_in_memory() -> Result { let mut conn = Connection::open_in_memory()?; Self::init(&mut conn)?; - Self::init_indexes(&mut conn)?; + init_indexes(&mut conn)?; Ok(Self { conn }) } @@ -168,7 +168,7 @@ impl SQLiteWriter { } else { check_version(&conn)?; } - Self::init_indexes(&mut conn)?; + init_indexes(&mut conn)?; Ok(Self { conn }) } @@ -181,13 +181,6 @@ impl SQLiteWriter { Ok(()) } - fn init_indexes(conn: &mut Connection) -> Result<()> { - let tx = conn.transaction()?; - tx.execute_batch(INDEXES)?; - tx.commit()?; - Ok(()) - } - /// Clean all data from the database. pub fn clean_all(&mut self) -> Result { let tx = self.conn.transaction()?; @@ -447,9 +440,10 @@ impl SQLiteReader { path.as_ref().to_string_lossy().to_string(), )); } - let conn = Connection::open(path)?; + let mut conn = Connection::open(path)?; set_pragmas_and_functions(&conn)?; check_version(&conn)?; + init_indexes(&mut conn)?; Ok(Self { conn, loaded_graphs: HashSet::new(), @@ -796,6 +790,13 @@ fn set_pragmas_and_functions(conn: &Connection) -> Result<()> { Ok(()) } +fn init_indexes(conn: &mut Connection) -> Result<()> { + let tx = conn.transaction()?; + tx.execute_batch(INDEXES)?; + tx.commit()?; + Ok(()) +} + fn status_for_file>( conn: &Connection, file: &str, From ddafce47d537e81ad684e63cc2fc2ea8852ae47c Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Wed, 9 Aug 2023 11:50:40 +0200 Subject: [PATCH 4/4] Update authors --- AUTHORS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AUTHORS b/AUTHORS index 0e98f68bf..69afa1c7b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,3 +1,5 @@ Douglas Creager Hendrik van Antwerpen Akshay +blusk +William Manley