From 9e3e17b906c1c13a307c64039561db6709afe85d Mon Sep 17 00:00:00 2001
From: Ezra Varady <76978395+ezra-varady@users.noreply.github.com>
Date: Mon, 9 Oct 2023 06:56:36 -1000
Subject: [PATCH] Add parallel tests (#192)

Uses pg_regress  to run tests in parallel against the database.
Allows custom DB initialization and finalization which can be used to load relevant data in the beginning
and check relevant invariants in the end
---
 CMakeLists.txt                            |  8 ++-
 CONTRIBUTING.md                           |  6 ++-
 ci/scripts/run-tests-linux.sh             |  1 +
 ci/scripts/run-tests-mac.sh               |  2 +-
 scripts/run_all_tests.sh                  | 42 ++++++++++++++--
 test/parallel/expected/begin.out          | 23 +++++++++
 test/parallel/expected/end.out            | 13 +++++
 test/parallel/expected/insert.out         | 13 +++++
 test/parallel/expected/insert2.out        | 13 +++++
 test/parallel/expected/insert3.out        | 13 +++++
 test/parallel/expected/select.out         | 38 ++++++++++++++
 test/parallel/sql/begin.sql               |  6 +++
 test/parallel/sql/end.sql                 |  3 ++
 test/parallel/sql/insert.sql              | 13 +++++
 test/parallel/sql/insert2.sql             | 13 +++++
 test/parallel/sql/insert3.sql             | 13 +++++
 test/parallel/sql/select.sql              | 11 ++++
 test/parallel/sql/utils/common.sql        | 61 +++++++++++++++++++++++
 test/parallel/sql/utils/random_array.sql  | 11 ++++
 test/parallel/sql/utils/sift10k_array.sql |  5 ++
 test/parallel_schedule.txt                | 10 ++++
 test/sql/utils/random_array.sql           | 11 ++++
 test/sql/utils/small_world_array.sql      |  2 +-
 test/test_runner.sh                       | 39 +++++++++++----
 24 files changed, 351 insertions(+), 19 deletions(-)
 create mode 100644 test/parallel/expected/begin.out
 create mode 100644 test/parallel/expected/end.out
 create mode 100644 test/parallel/expected/insert.out
 create mode 100644 test/parallel/expected/insert2.out
 create mode 100644 test/parallel/expected/insert3.out
 create mode 100644 test/parallel/expected/select.out
 create mode 100644 test/parallel/sql/begin.sql
 create mode 100644 test/parallel/sql/end.sql
 create mode 100644 test/parallel/sql/insert.sql
 create mode 100644 test/parallel/sql/insert2.sql
 create mode 100644 test/parallel/sql/insert3.sql
 create mode 100644 test/parallel/sql/select.sql
 create mode 100644 test/parallel/sql/utils/common.sql
 create mode 100644 test/parallel/sql/utils/random_array.sql
 create mode 100644 test/parallel/sql/utils/sift10k_array.sql
 create mode 100644 test/parallel_schedule.txt
 create mode 100644 test/sql/utils/random_array.sql

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2e9520f4e..938075f64 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -192,7 +192,13 @@ add_custom_target(
 # TEST
 add_custom_target(
   test
-  COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_all_tests.sh
+  COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_all_tests.sh --regression
+  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/test
+)
+
+add_custom_target(
+  test-parallel
+  COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_all_tests.sh --parallel
   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/test
 )
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6691eaa17..dc8f6be78 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -8,7 +8,11 @@ make test
 
 # only run regression tests that have $FILTER in regression sql file path
 make test FILTER=hnsw
+
+# run parallel tests
+make test-parallel
 ```
+Running `make test` will run the lantern regression tests, these run independent of one another. At the moment the tests for `make test-parallel` are under development, they can be found in `test/parallel`. The goal of the parallel tests is to generate a more realistic workload on the index to discover timing errors and other bugs dependent on more complex use, they run in the same database. 
 
 ## Running benchmarks
 This requires Python to be installed. Please check the `Dockerfile.dev` for pip requirements.
@@ -30,7 +34,7 @@ If you build Lantern in a different directory, make sure to update `.vscode` con
 
 ## Debugging the C codebase
 
-If you make changes to the C codebase, in addition to `make test`, you can also use the `livedebug.py` utility in a `tmux` session to easily attach `gdb` to the psql backend and find out what breaks.
+If you make changes to the C codebase, in addition to `make test` and `make parallel-test`, you can also use the `livedebug.py` utility in a `tmux` session to easily attach `gdb` to the psql backend and find out what breaks.
 Below is a short recording demonstrating the use of `livedebug.py`:
 
 [![asciicast](https://asciinema.org/a/jTsbWdOcTvUl4iAJlAw3Cszbt.svg)](https://asciinema.org/a/jTsbWdOcTvUl4iAJlAw3Cszbt)
diff --git a/ci/scripts/run-tests-linux.sh b/ci/scripts/run-tests-linux.sh
index 3ede685af..de8fb3e48 100755
--- a/ci/scripts/run-tests-linux.sh
+++ b/ci/scripts/run-tests-linux.sh
@@ -27,5 +27,6 @@ echo "port = 5432" >> ${PGDATA}/postgresql.conf
 GCOV_PREFIX=$WORKDIR/build/CMakeFiles/lantern.dir/ GCOV_PREFIX_STRIP=5 POSTGRES_HOST_AUTH_METHOD=trust /usr/lib/postgresql/$PG_VERSION/bin/postgres 1>/tmp/pg-out.log 2>/tmp/pg-error.log &
 # Wait for start and run tests
 wait_for_pg && cd $WORKDIR/build && make test && \
+make test-parallel && \
 killall postgres && \
 gcovr -r $WORKDIR/src/ --object-directory $WORKDIR/build/ --xml /tmp/coverage.xml
diff --git a/ci/scripts/run-tests-mac.sh b/ci/scripts/run-tests-mac.sh
index eef9af453..68faa9ec2 100755
--- a/ci/scripts/run-tests-mac.sh
+++ b/ci/scripts/run-tests-mac.sh
@@ -22,4 +22,4 @@ wait_for_pg(){
 # Start database
 brew services start postgresql@$PG_VERSION
 
-wait_for_pg && cd $WORKDIR/build && make test
+wait_for_pg && cd $WORKDIR/build && make test && make test-parallel
diff --git a/scripts/run_all_tests.sh b/scripts/run_all_tests.sh
index 7b6e79cda..7496442b3 100755
--- a/scripts/run_all_tests.sh
+++ b/scripts/run_all_tests.sh
@@ -67,13 +67,33 @@ fi
 # Check if pgvector is available
 pgvector_installed=$($PSQL -U $DB_USER -d postgres -c "SELECT 1 FROM pg_available_extensions WHERE name = 'vector'" -tA | tail -n 1 | tr -d '\n')
 
+# Settings
+REGRESSION=0
+PARALLEL=0
+while [[ "$#" -gt 0 ]]; do
+    case $1 in
+        --regression) REGRESSION=1 ;;
+        --parallel) PARALLEL=1 ;;
+    esac
+    shift
+done
+
 # Generate schedule.txt
 rm -rf $TMP_OUTDIR/schedule.txt
+if [ "$PARALLEL" -eq 1 ]; then
+    SCHEDULE='parallel_schedule.txt'
+else
+    SCHEDULE='schedule.txt'
+fi
 if [ -n "$FILTER" ]; then
-    if [[ "$pgvector_installed" == "1" ]]; then
-        TEST_FILES=$(cat schedule.txt | grep -E '^(test:|test_pgvector:)' | sed -E -e 's/^test:|test_pgvector://' | tr " " "\n" | sed -e '/^$/d')
+    if [ "$PARALLEL" -eq 1 ]; then
+    	TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_begin:|test_end:)' | sed -E -e 's/^test:|test_begin:|test_end://' | tr " " "\n" | sed -e '/^$/d')
     else
-        TEST_FILES=$(cat schedule.txt | grep '^test:' | sed -e 's/^test://' | tr " " "\n" | sed -e '/^$/d')
+	    if [[ "$pgvector_installed" == "1" ]]; then
+		TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_pgvector:)' | sed -E -e 's/^test:|test_pgvector://' | tr " " "\n" | sed -e '/^$/d')
+	    else
+		TEST_FILES=$(cat $SCHEDULE | grep '^test:' | sed -e 's/^test://' | tr " " "\n" | sed -e '/^$/d')
+	    fi
     fi
 
     while IFS= read -r f; do
@@ -95,11 +115,18 @@ else
             if [ "$pgvector_installed" == "1" ]; then
                 echo "test: $test_name" >> $TMP_OUTDIR/schedule.txt
             fi
+        elif [[ "$line" =~ ^test_begin: ]]; then
+            test_name=$(echo "$line" | sed -e 's/test_begin:/test:/')
+            echo "$test_name" >> $TMP_OUTDIR/schedule.txt
+        elif [[ "$line" =~ ^test_end: ]]; then
+            test_name=$(echo "$line" | sed -e 's/test_end:/test:/')
+            echo "$test_name" >> $TMP_OUTDIR/schedule.txt
         else
             echo "$line" >> $TMP_OUTDIR/schedule.txt
         fi
-    done < schedule.txt
+    done < $SCHEDULE
 fi
+unset $SCHEDULE
 SCHEDULE=$TMP_OUTDIR/schedule.txt
 
 function print_diff {
@@ -116,4 +143,9 @@ function print_diff {
 
 trap print_diff ERR
 
-DB_USER=$DB_USER $(pg_config --pkglibdir)/pgxs/src/test/regress/pg_regress --user=$DB_USER --schedule=$SCHEDULE --outputdir=$TMP_OUTDIR --launcher=./test_runner.sh
+if [ "$PARALLEL" -eq 1 ]; then
+    cd parallel
+    PARALLEL=$PARALLEL DB_USER=$DB_USER $(pg_config --pkglibdir)/pgxs/src/test/regress/pg_regress --user=$DB_USER --schedule=$SCHEDULE --outputdir=$TMP_OUTDIR --launcher=../test_runner.sh
+else
+    PARALLEL=$PARALLEL DB_USER=$DB_USER $(pg_config --pkglibdir)/pgxs/src/test/regress/pg_regress --user=$DB_USER --schedule=$SCHEDULE --outputdir=$TMP_OUTDIR --launcher=./test_runner.sh
+fi
diff --git a/test/parallel/expected/begin.out b/test/parallel/expected/begin.out
new file mode 100644
index 000000000..9aed0e804
--- /dev/null
+++ b/test/parallel/expected/begin.out
@@ -0,0 +1,23 @@
+-- This file handles initializing the database before parallel tests are run
+\ir utils/sift10k_array.sql
+CREATE TABLE IF NOT EXISTS sift_base10k (
+     id SERIAL PRIMARY KEY,
+     v REAL[128]
+);
+\copy sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' with csv;
+\ir utils/random_array.sql
+CREATE OR REPLACE FUNCTION random_int_array(dim integer, min integer, max integer) RETURNS integer[] AS $BODY$
+begin
+        return (select array_agg(round(random() * (max - min)) + min) from generate_series (0, dim - 1));
+end
+$BODY$ LANGUAGE plpgsql;
+CREATE OR REPLACE FUNCTION random_array(dim integer, min real, max real) RETURNS REAL[] AS $BODY$
+begin
+        return (select array_agg(random() * (max - min) + min) from generate_series (0, dim - 1));
+end
+$BODY$ LANGUAGE plpgsql;
+CREATE SEQUENCE serial START 10001;
+CREATE INDEX ON sift_base10k  USING HNSW (v) WITH (M=5, ef=20, ef_construction=20);
+INFO:  done init usearch index
+INFO:  inserted 10000 elements
+INFO:  done saving 10000 vectors
diff --git a/test/parallel/expected/end.out b/test/parallel/expected/end.out
new file mode 100644
index 000000000..59130eafc
--- /dev/null
+++ b/test/parallel/expected/end.out
@@ -0,0 +1,13 @@
+-- This file contains invariants to be checked after the parallel tests have run
+SELECT COUNT(*) FROM sift_base10k;
+ count 
+-------
+ 10030
+(1 row)
+
+SELECT * from sift_base10k WHERE id=4444;
+  id  |                                                                                                                                                                             v                                                                                                                                                                             
+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 4444 | {55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}
+(1 row)
+
diff --git a/test/parallel/expected/insert.out b/test/parallel/expected/insert.out
new file mode 100644
index 000000000..604cf2ccd
--- /dev/null
+++ b/test/parallel/expected/insert.out
@@ -0,0 +1,13 @@
+BEGIN;
+INSERT INTO sift_base10k (id, v) VALUES 
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128));
+COMMIT;
diff --git a/test/parallel/expected/insert2.out b/test/parallel/expected/insert2.out
new file mode 100644
index 000000000..604cf2ccd
--- /dev/null
+++ b/test/parallel/expected/insert2.out
@@ -0,0 +1,13 @@
+BEGIN;
+INSERT INTO sift_base10k (id, v) VALUES 
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128));
+COMMIT;
diff --git a/test/parallel/expected/insert3.out b/test/parallel/expected/insert3.out
new file mode 100644
index 000000000..604cf2ccd
--- /dev/null
+++ b/test/parallel/expected/insert3.out
@@ -0,0 +1,13 @@
+BEGIN;
+INSERT INTO sift_base10k (id, v) VALUES 
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128));
+COMMIT;
diff --git a/test/parallel/expected/select.out b/test/parallel/expected/select.out
new file mode 100644
index 000000000..b4c502600
--- /dev/null
+++ b/test/parallel/expected/select.out
@@ -0,0 +1,38 @@
+SELECT v AS v1111 FROM sift_base10k WHERE id = 1111 \gset
+SELECT v AS v2222 FROM sift_base10k WHERE id = 2222 \gset
+SELECT v AS v3333 FROM sift_base10k WHERE id = 3333 \gset
+SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset
+-- Make sure that our index queries will actually run against the index
+EXPLAIN (COSTS false) SELECT id FROM sift_base10k ORDER BY  v <-> :'v1111'  ASC LIMIT 1;
+                                                                                                                                                                                   QUERY PLAN                                                                                                                                                                                    
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Limit
+   ->  Index Scan using sift_base10k_v_idx on sift_base10k
+         Order By: (v <-> '{21,24,5,0,0,26,22,6,16,16,10,9,0,18,114,19,13,13,9,1,2,53,111,19,39,32,5,0,4,9,10,13,6,10,8,0,2,130,77,4,2,0,0,0,3,130,130,11,130,0,0,0,0,37,130,84,130,5,0,1,17,11,4,28,17,39,3,3,30,77,28,3,20,0,0,1,49,125,13,7,130,6,0,0,0,5,11,61,130,2,0,1,12,84,48,73,1,12,2,0,31,57,9,2,16,12,1,0,32,36,0,1,63,6,3,1,0,0,24,51,9,0,0,0,0,44,88,48}'::real[])
+(3 rows)
+
+-- Do the queries
+SELECT id FROM sift_base10k ORDER BY  v <-> :'v1111'  ASC LIMIT 1;
+  id  
+------
+ 1111
+(1 row)
+
+SELECT id FROM sift_base10k ORDER BY  v <-> :'v2222'  ASC LIMIT 1;
+  id  
+------
+ 2222
+(1 row)
+
+SELECT id FROM sift_base10k ORDER BY  v <-> :'v3333'  ASC LIMIT 1;
+  id  
+------
+ 3333
+(1 row)
+
+SELECT id FROM sift_base10k ORDER BY  v <-> :'v4444'  ASC LIMIT 1;
+  id  
+------
+ 4444
+(1 row)
+
diff --git a/test/parallel/sql/begin.sql b/test/parallel/sql/begin.sql
new file mode 100644
index 000000000..df0ba3a77
--- /dev/null
+++ b/test/parallel/sql/begin.sql
@@ -0,0 +1,6 @@
+-- This file handles initializing the database before parallel tests are run
+\ir utils/sift10k_array.sql
+\ir utils/random_array.sql
+
+CREATE SEQUENCE serial START 10001;
+CREATE INDEX ON sift_base10k  USING HNSW (v) WITH (M=5, ef=20, ef_construction=20);
diff --git a/test/parallel/sql/end.sql b/test/parallel/sql/end.sql
new file mode 100644
index 000000000..bac24d93a
--- /dev/null
+++ b/test/parallel/sql/end.sql
@@ -0,0 +1,3 @@
+-- This file contains invariants to be checked after the parallel tests have run
+SELECT COUNT(*) FROM sift_base10k;
+SELECT * from sift_base10k WHERE id=4444;
diff --git a/test/parallel/sql/insert.sql b/test/parallel/sql/insert.sql
new file mode 100644
index 000000000..604cf2ccd
--- /dev/null
+++ b/test/parallel/sql/insert.sql
@@ -0,0 +1,13 @@
+BEGIN;
+INSERT INTO sift_base10k (id, v) VALUES 
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128));
+COMMIT;
diff --git a/test/parallel/sql/insert2.sql b/test/parallel/sql/insert2.sql
new file mode 100644
index 000000000..604cf2ccd
--- /dev/null
+++ b/test/parallel/sql/insert2.sql
@@ -0,0 +1,13 @@
+BEGIN;
+INSERT INTO sift_base10k (id, v) VALUES 
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128));
+COMMIT;
diff --git a/test/parallel/sql/insert3.sql b/test/parallel/sql/insert3.sql
new file mode 100644
index 000000000..604cf2ccd
--- /dev/null
+++ b/test/parallel/sql/insert3.sql
@@ -0,0 +1,13 @@
+BEGIN;
+INSERT INTO sift_base10k (id, v) VALUES 
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128)),
+    (nextval('serial'), random_array(128, 0, 128));
+COMMIT;
diff --git a/test/parallel/sql/select.sql b/test/parallel/sql/select.sql
new file mode 100644
index 000000000..b56dfdc40
--- /dev/null
+++ b/test/parallel/sql/select.sql
@@ -0,0 +1,11 @@
+SELECT v AS v1111 FROM sift_base10k WHERE id = 1111 \gset
+SELECT v AS v2222 FROM sift_base10k WHERE id = 2222 \gset
+SELECT v AS v3333 FROM sift_base10k WHERE id = 3333 \gset
+SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset
+-- Make sure that our index queries will actually run against the index
+EXPLAIN (COSTS false) SELECT id FROM sift_base10k ORDER BY  v <-> :'v1111'  ASC LIMIT 1;
+-- Do the queries
+SELECT id FROM sift_base10k ORDER BY  v <-> :'v1111'  ASC LIMIT 1;
+SELECT id FROM sift_base10k ORDER BY  v <-> :'v2222'  ASC LIMIT 1;
+SELECT id FROM sift_base10k ORDER BY  v <-> :'v3333'  ASC LIMIT 1;
+SELECT id FROM sift_base10k ORDER BY  v <-> :'v4444'  ASC LIMIT 1;
diff --git a/test/parallel/sql/utils/common.sql b/test/parallel/sql/utils/common.sql
new file mode 100644
index 000000000..ac084f040
--- /dev/null
+++ b/test/parallel/sql/utils/common.sql
@@ -0,0 +1,61 @@
+-- test helper functions that should exist in all test runs live here
+-- there is no need to explicitly include this file in other tests as the test runner will
+-- run this before running the actual test
+
+CREATE EXTENSION pageinspect;
+
+\set ON_ERROR_STOP on
+
+-- retrieves details for all indices associated with a given table, similar to \di+
+-- the output of \di+ is not consistent across postgres versions
+-- todo:: add a columns to this function which returning number of used DB pages
+CREATE OR REPLACE FUNCTION ldb_get_indexes(tblname text)
+RETURNS TABLE(
+    indexname name,
+    size text,
+    indexdef text,
+    total_index_size text
+) AS
+$BODY$
+BEGIN
+    RETURN QUERY
+    WITH total_size_data AS (
+        SELECT
+            SUM(pg_relation_size(indexrelid)) as total_size
+        FROM
+            pg_index 
+        WHERE
+            indisvalid
+            AND indrelid = tblname::regclass
+    )
+    SELECT
+        idx.indexname,
+        pg_size_pretty(pg_relation_size(idx.indexname::REGCLASS)) as size,
+        idx.indexdef,
+        pg_size_pretty(total_size_data.total_size) as total_index_size
+    FROM
+        pg_indexes idx,
+        total_size_data
+    WHERE
+        idx.tablename = tblname;
+END;
+$BODY$
+LANGUAGE plpgsql;
+
+-- Determines if the provided SQL query (with an EXPLAIN prefix) uses an "Index Scan" 
+-- by examining its execution plan. This function helps ensure consistent analysis 
+-- across varying Postgres versions where EXPLAIN output may differ.
+CREATE OR REPLACE FUNCTION has_index_scan(explain_query text) RETURNS boolean AS $$
+DECLARE
+    plan_row RECORD;
+    found boolean := false;
+BEGIN
+    FOR plan_row IN EXECUTE explain_query LOOP
+        IF position('Index Scan' in plan_row."QUERY PLAN") > 0 THEN
+            found := true;
+            EXIT;
+        END IF;
+    END LOOP;
+    RETURN found;
+END;
+$$ LANGUAGE plpgsql;
diff --git a/test/parallel/sql/utils/random_array.sql b/test/parallel/sql/utils/random_array.sql
new file mode 100644
index 000000000..043b59748
--- /dev/null
+++ b/test/parallel/sql/utils/random_array.sql
@@ -0,0 +1,11 @@
+CREATE OR REPLACE FUNCTION random_int_array(dim integer, min integer, max integer) RETURNS integer[] AS $BODY$
+begin
+        return (select array_agg(round(random() * (max - min)) + min) from generate_series (0, dim - 1));
+end
+$BODY$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION random_array(dim integer, min real, max real) RETURNS REAL[] AS $BODY$
+begin
+        return (select array_agg(random() * (max - min) + min) from generate_series (0, dim - 1));
+end
+$BODY$ LANGUAGE plpgsql;
diff --git a/test/parallel/sql/utils/sift10k_array.sql b/test/parallel/sql/utils/sift10k_array.sql
new file mode 100644
index 000000000..cdee704d1
--- /dev/null
+++ b/test/parallel/sql/utils/sift10k_array.sql
@@ -0,0 +1,5 @@
+CREATE TABLE IF NOT EXISTS sift_base10k (
+     id SERIAL PRIMARY KEY,
+     v REAL[128]
+);
+\copy sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' with csv;
\ No newline at end of file
diff --git a/test/parallel_schedule.txt b/test/parallel_schedule.txt
new file mode 100644
index 000000000..a94d529b9
--- /dev/null
+++ b/test/parallel_schedule.txt
@@ -0,0 +1,10 @@
+# schedule.txt rules:
+# - every test that needs to be run must appear in a 'test:' line
+# - every test that needs to be run iff pgvector is installed appears in a 'test_pgvector:' line
+# - 'test' lines may have multiple space-separated tests. All tests in a single 'test' line will be run in parallel
+# parallel_schedule.txt notes:
+# - Begin runs before and end runs after the actual tests, they run in the same database, but begin runs before to handle the necessary setup and end runs after to check invariants.
+
+test_begin: begin
+test: insert insert2 insert3 select
+test_end: end
diff --git a/test/sql/utils/random_array.sql b/test/sql/utils/random_array.sql
new file mode 100644
index 000000000..043b59748
--- /dev/null
+++ b/test/sql/utils/random_array.sql
@@ -0,0 +1,11 @@
+CREATE OR REPLACE FUNCTION random_int_array(dim integer, min integer, max integer) RETURNS integer[] AS $BODY$
+begin
+        return (select array_agg(round(random() * (max - min)) + min) from generate_series (0, dim - 1));
+end
+$BODY$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION random_array(dim integer, min real, max real) RETURNS REAL[] AS $BODY$
+begin
+        return (select array_agg(random() * (max - min) + min) from generate_series (0, dim - 1));
+end
+$BODY$ LANGUAGE plpgsql;
diff --git a/test/sql/utils/small_world_array.sql b/test/sql/utils/small_world_array.sql
index 5d4bed5c8..82e1d9653 100644
--- a/test/sql/utils/small_world_array.sql
+++ b/test/sql/utils/small_world_array.sql
@@ -12,4 +12,4 @@ INSERT INTO small_world (id, b, v) VALUES
     ('100', FALSE, '{1,0,0}'),
     ('101', FALSE, '{1,0,1}'),
     ('110', FALSE, '{1,1,0}'),
-    ('111', TRUE,  '{1,1,1}');
\ No newline at end of file
+    ('111', TRUE,  '{1,1,1}');
diff --git a/test/test_runner.sh b/test/test_runner.sh
index 6167cf3ea..dd888165c 100755
--- a/test/test_runner.sh
+++ b/test/test_runner.sh
@@ -2,11 +2,18 @@
 
 # Get current test file name
 TESTFILE_NAME=${PGAPPNAME##pg_regress/}
-# Set different name for each test database
-# As pg_regress does not support cleaning db after each test
-TEST_CASE_DB="ldb_test_${TESTFILE_NAME}"
+
+if [ "$PARALLEL" -eq 0 ]; then
+    # Set different name for each test database
+    # As pg_regress does not support cleaning db after each test
+    TEST_CASE_DB="ldb_test_${TESTFILE_NAME}"
+else
+    # parallel tests all run in the same database
+    TEST_CASE_DB="ldb_parallel"
+fi
+
 # Set database user
-if [ -z $DB_USER ]
+if [ -z "$DB_USER" ]
 then
      echo "ERROR: DB_USER environment variable is not set before test_runner.sh is run by pg_regress"
      exit 1
@@ -20,16 +27,28 @@ function drop_db {
 EOF
 }
 
-trap drop_db EXIT
+# If these aren't parallel tests always drop the db after the test
+# if they are though we only want to drop after end which is where we check invariants
+# this allows the parallel tests to be run against the same db 
+if [ "$PARALLEL" -eq 0 ]; then
+    trap drop_db EXIT
+elif [[ "$TESTFILE_NAME" =~ ^end ]]; then
+    trap drop_db EXIT
+fi
 
 
-# Change directory to sql so sql imports will work correctly
+# Change directory to sql directory so sql imports will work correctly
 cd sql/
+
 # install lantern extension
-psql "$@" -U ${DB_USER} -d postgres -v ECHO=none -q -c "DROP DATABASE IF EXISTS ${TEST_CASE_DB};" 2>/dev/null
-psql "$@" -U ${DB_USER} -d postgres -v ECHO=none -q -c "CREATE DATABASE ${TEST_CASE_DB};" 2>/dev/null
-psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -c "SET client_min_messages=error; CREATE EXTENSION lantern;" 2>/dev/null
-psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -f utils/common.sql 2>/dev/null
+# if tests are parallel we only do this for the begin tests as we won't be dropping the database until the end
+# begin will handle initialization specific to the tests but expects the database already exists
+if [ "$PARALLEL" -eq 0 ] || ( [[ "$TESTFILE_NAME" =~ ^begin ]] && [ "$PARALLEL" -eq 1 ] ); then
+    psql "$@" -U ${DB_USER} -d postgres -v ECHO=none -q -c "DROP DATABASE IF EXISTS ${TEST_CASE_DB};" 2>/dev/null
+    psql "$@" -U ${DB_USER} -d postgres -v ECHO=none -q -c "CREATE DATABASE ${TEST_CASE_DB};" 2>/dev/null
+    psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -c "SET client_min_messages=error; CREATE EXTENSION lantern;" 2>/dev/null
+    psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -f utils/common.sql 2>/dev/null
+fi
 
 # Exclude debug/inconsistent output from psql
 # So tests will always have the same output