From 9e3e17b906c1c13a307c64039561db6709afe85d Mon Sep 17 00:00:00 2001 From: Ezra Varady <76978395+ezra-varady@users.noreply.github.com> Date: Mon, 9 Oct 2023 06:56:36 -1000 Subject: [PATCH] Add parallel tests (#192) Uses pg_regress to run tests in parallel against the database. Allows custom DB initialization and finalization which can be used to load relevant data in the beginning and check relevant invariants in the end --- CMakeLists.txt | 8 ++- CONTRIBUTING.md | 6 ++- ci/scripts/run-tests-linux.sh | 1 + ci/scripts/run-tests-mac.sh | 2 +- scripts/run_all_tests.sh | 42 ++++++++++++++-- test/parallel/expected/begin.out | 23 +++++++++ test/parallel/expected/end.out | 13 +++++ test/parallel/expected/insert.out | 13 +++++ test/parallel/expected/insert2.out | 13 +++++ test/parallel/expected/insert3.out | 13 +++++ test/parallel/expected/select.out | 38 ++++++++++++++ test/parallel/sql/begin.sql | 6 +++ test/parallel/sql/end.sql | 3 ++ test/parallel/sql/insert.sql | 13 +++++ test/parallel/sql/insert2.sql | 13 +++++ test/parallel/sql/insert3.sql | 13 +++++ test/parallel/sql/select.sql | 11 ++++ test/parallel/sql/utils/common.sql | 61 +++++++++++++++++++++++ test/parallel/sql/utils/random_array.sql | 11 ++++ test/parallel/sql/utils/sift10k_array.sql | 5 ++ test/parallel_schedule.txt | 10 ++++ test/sql/utils/random_array.sql | 11 ++++ test/sql/utils/small_world_array.sql | 2 +- test/test_runner.sh | 39 +++++++++++---- 24 files changed, 351 insertions(+), 19 deletions(-) create mode 100644 test/parallel/expected/begin.out create mode 100644 test/parallel/expected/end.out create mode 100644 test/parallel/expected/insert.out create mode 100644 test/parallel/expected/insert2.out create mode 100644 test/parallel/expected/insert3.out create mode 100644 test/parallel/expected/select.out create mode 100644 test/parallel/sql/begin.sql create mode 100644 test/parallel/sql/end.sql create mode 100644 test/parallel/sql/insert.sql create mode 100644 test/parallel/sql/insert2.sql create mode 100644 test/parallel/sql/insert3.sql create mode 100644 test/parallel/sql/select.sql create mode 100644 test/parallel/sql/utils/common.sql create mode 100644 test/parallel/sql/utils/random_array.sql create mode 100644 test/parallel/sql/utils/sift10k_array.sql create mode 100644 test/parallel_schedule.txt create mode 100644 test/sql/utils/random_array.sql diff --git a/CMakeLists.txt b/CMakeLists.txt index 2e9520f4e..938075f64 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -192,7 +192,13 @@ add_custom_target( # TEST add_custom_target( test - COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_all_tests.sh + COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_all_tests.sh --regression + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/test +) + +add_custom_target( + test-parallel + COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_all_tests.sh --parallel WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/test ) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6691eaa17..dc8f6be78 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,7 +8,11 @@ make test # only run regression tests that have $FILTER in regression sql file path make test FILTER=hnsw + +# run parallel tests +make test-parallel ``` +Running `make test` will run the lantern regression tests, these run independent of one another. At the moment the tests for `make test-parallel` are under development, they can be found in `test/parallel`. The goal of the parallel tests is to generate a more realistic workload on the index to discover timing errors and other bugs dependent on more complex use, they run in the same database. ## Running benchmarks This requires Python to be installed. Please check the `Dockerfile.dev` for pip requirements. @@ -30,7 +34,7 @@ If you build Lantern in a different directory, make sure to update `.vscode` con ## Debugging the C codebase -If you make changes to the C codebase, in addition to `make test`, you can also use the `livedebug.py` utility in a `tmux` session to easily attach `gdb` to the psql backend and find out what breaks. +If you make changes to the C codebase, in addition to `make test` and `make parallel-test`, you can also use the `livedebug.py` utility in a `tmux` session to easily attach `gdb` to the psql backend and find out what breaks. Below is a short recording demonstrating the use of `livedebug.py`: [![asciicast](https://asciinema.org/a/jTsbWdOcTvUl4iAJlAw3Cszbt.svg)](https://asciinema.org/a/jTsbWdOcTvUl4iAJlAw3Cszbt) diff --git a/ci/scripts/run-tests-linux.sh b/ci/scripts/run-tests-linux.sh index 3ede685af..de8fb3e48 100755 --- a/ci/scripts/run-tests-linux.sh +++ b/ci/scripts/run-tests-linux.sh @@ -27,5 +27,6 @@ echo "port = 5432" >> ${PGDATA}/postgresql.conf GCOV_PREFIX=$WORKDIR/build/CMakeFiles/lantern.dir/ GCOV_PREFIX_STRIP=5 POSTGRES_HOST_AUTH_METHOD=trust /usr/lib/postgresql/$PG_VERSION/bin/postgres 1>/tmp/pg-out.log 2>/tmp/pg-error.log & # Wait for start and run tests wait_for_pg && cd $WORKDIR/build && make test && \ +make test-parallel && \ killall postgres && \ gcovr -r $WORKDIR/src/ --object-directory $WORKDIR/build/ --xml /tmp/coverage.xml diff --git a/ci/scripts/run-tests-mac.sh b/ci/scripts/run-tests-mac.sh index eef9af453..68faa9ec2 100755 --- a/ci/scripts/run-tests-mac.sh +++ b/ci/scripts/run-tests-mac.sh @@ -22,4 +22,4 @@ wait_for_pg(){ # Start database brew services start postgresql@$PG_VERSION -wait_for_pg && cd $WORKDIR/build && make test +wait_for_pg && cd $WORKDIR/build && make test && make test-parallel diff --git a/scripts/run_all_tests.sh b/scripts/run_all_tests.sh index 7b6e79cda..7496442b3 100755 --- a/scripts/run_all_tests.sh +++ b/scripts/run_all_tests.sh @@ -67,13 +67,33 @@ fi # Check if pgvector is available pgvector_installed=$($PSQL -U $DB_USER -d postgres -c "SELECT 1 FROM pg_available_extensions WHERE name = 'vector'" -tA | tail -n 1 | tr -d '\n') +# Settings +REGRESSION=0 +PARALLEL=0 +while [[ "$#" -gt 0 ]]; do + case $1 in + --regression) REGRESSION=1 ;; + --parallel) PARALLEL=1 ;; + esac + shift +done + # Generate schedule.txt rm -rf $TMP_OUTDIR/schedule.txt +if [ "$PARALLEL" -eq 1 ]; then + SCHEDULE='parallel_schedule.txt' +else + SCHEDULE='schedule.txt' +fi if [ -n "$FILTER" ]; then - if [[ "$pgvector_installed" == "1" ]]; then - TEST_FILES=$(cat schedule.txt | grep -E '^(test:|test_pgvector:)' | sed -E -e 's/^test:|test_pgvector://' | tr " " "\n" | sed -e '/^$/d') + if [ "$PARALLEL" -eq 1 ]; then + TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_begin:|test_end:)' | sed -E -e 's/^test:|test_begin:|test_end://' | tr " " "\n" | sed -e '/^$/d') else - TEST_FILES=$(cat schedule.txt | grep '^test:' | sed -e 's/^test://' | tr " " "\n" | sed -e '/^$/d') + if [[ "$pgvector_installed" == "1" ]]; then + TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_pgvector:)' | sed -E -e 's/^test:|test_pgvector://' | tr " " "\n" | sed -e '/^$/d') + else + TEST_FILES=$(cat $SCHEDULE | grep '^test:' | sed -e 's/^test://' | tr " " "\n" | sed -e '/^$/d') + fi fi while IFS= read -r f; do @@ -95,11 +115,18 @@ else if [ "$pgvector_installed" == "1" ]; then echo "test: $test_name" >> $TMP_OUTDIR/schedule.txt fi + elif [[ "$line" =~ ^test_begin: ]]; then + test_name=$(echo "$line" | sed -e 's/test_begin:/test:/') + echo "$test_name" >> $TMP_OUTDIR/schedule.txt + elif [[ "$line" =~ ^test_end: ]]; then + test_name=$(echo "$line" | sed -e 's/test_end:/test:/') + echo "$test_name" >> $TMP_OUTDIR/schedule.txt else echo "$line" >> $TMP_OUTDIR/schedule.txt fi - done < schedule.txt + done < $SCHEDULE fi +unset $SCHEDULE SCHEDULE=$TMP_OUTDIR/schedule.txt function print_diff { @@ -116,4 +143,9 @@ function print_diff { trap print_diff ERR -DB_USER=$DB_USER $(pg_config --pkglibdir)/pgxs/src/test/regress/pg_regress --user=$DB_USER --schedule=$SCHEDULE --outputdir=$TMP_OUTDIR --launcher=./test_runner.sh +if [ "$PARALLEL" -eq 1 ]; then + cd parallel + PARALLEL=$PARALLEL DB_USER=$DB_USER $(pg_config --pkglibdir)/pgxs/src/test/regress/pg_regress --user=$DB_USER --schedule=$SCHEDULE --outputdir=$TMP_OUTDIR --launcher=../test_runner.sh +else + PARALLEL=$PARALLEL DB_USER=$DB_USER $(pg_config --pkglibdir)/pgxs/src/test/regress/pg_regress --user=$DB_USER --schedule=$SCHEDULE --outputdir=$TMP_OUTDIR --launcher=./test_runner.sh +fi diff --git a/test/parallel/expected/begin.out b/test/parallel/expected/begin.out new file mode 100644 index 000000000..9aed0e804 --- /dev/null +++ b/test/parallel/expected/begin.out @@ -0,0 +1,23 @@ +-- This file handles initializing the database before parallel tests are run +\ir utils/sift10k_array.sql +CREATE TABLE IF NOT EXISTS sift_base10k ( + id SERIAL PRIMARY KEY, + v REAL[128] +); +\copy sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' with csv; +\ir utils/random_array.sql +CREATE OR REPLACE FUNCTION random_int_array(dim integer, min integer, max integer) RETURNS integer[] AS $BODY$ +begin + return (select array_agg(round(random() * (max - min)) + min) from generate_series (0, dim - 1)); +end +$BODY$ LANGUAGE plpgsql; +CREATE OR REPLACE FUNCTION random_array(dim integer, min real, max real) RETURNS REAL[] AS $BODY$ +begin + return (select array_agg(random() * (max - min) + min) from generate_series (0, dim - 1)); +end +$BODY$ LANGUAGE plpgsql; +CREATE SEQUENCE serial START 10001; +CREATE INDEX ON sift_base10k USING HNSW (v) WITH (M=5, ef=20, ef_construction=20); +INFO: done init usearch index +INFO: inserted 10000 elements +INFO: done saving 10000 vectors diff --git a/test/parallel/expected/end.out b/test/parallel/expected/end.out new file mode 100644 index 000000000..59130eafc --- /dev/null +++ b/test/parallel/expected/end.out @@ -0,0 +1,13 @@ +-- This file contains invariants to be checked after the parallel tests have run +SELECT COUNT(*) FROM sift_base10k; + count +------- + 10030 +(1 row) + +SELECT * from sift_base10k WHERE id=4444; + id | v +------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 4444 | {55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26} +(1 row) + diff --git a/test/parallel/expected/insert.out b/test/parallel/expected/insert.out new file mode 100644 index 000000000..604cf2ccd --- /dev/null +++ b/test/parallel/expected/insert.out @@ -0,0 +1,13 @@ +BEGIN; +INSERT INTO sift_base10k (id, v) VALUES + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)); +COMMIT; diff --git a/test/parallel/expected/insert2.out b/test/parallel/expected/insert2.out new file mode 100644 index 000000000..604cf2ccd --- /dev/null +++ b/test/parallel/expected/insert2.out @@ -0,0 +1,13 @@ +BEGIN; +INSERT INTO sift_base10k (id, v) VALUES + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)); +COMMIT; diff --git a/test/parallel/expected/insert3.out b/test/parallel/expected/insert3.out new file mode 100644 index 000000000..604cf2ccd --- /dev/null +++ b/test/parallel/expected/insert3.out @@ -0,0 +1,13 @@ +BEGIN; +INSERT INTO sift_base10k (id, v) VALUES + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)); +COMMIT; diff --git a/test/parallel/expected/select.out b/test/parallel/expected/select.out new file mode 100644 index 000000000..b4c502600 --- /dev/null +++ b/test/parallel/expected/select.out @@ -0,0 +1,38 @@ +SELECT v AS v1111 FROM sift_base10k WHERE id = 1111 \gset +SELECT v AS v2222 FROM sift_base10k WHERE id = 2222 \gset +SELECT v AS v3333 FROM sift_base10k WHERE id = 3333 \gset +SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset +-- Make sure that our index queries will actually run against the index +EXPLAIN (COSTS false) SELECT id FROM sift_base10k ORDER BY v <-> :'v1111' ASC LIMIT 1; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Limit + -> Index Scan using sift_base10k_v_idx on sift_base10k + Order By: (v <-> '{21,24,5,0,0,26,22,6,16,16,10,9,0,18,114,19,13,13,9,1,2,53,111,19,39,32,5,0,4,9,10,13,6,10,8,0,2,130,77,4,2,0,0,0,3,130,130,11,130,0,0,0,0,37,130,84,130,5,0,1,17,11,4,28,17,39,3,3,30,77,28,3,20,0,0,1,49,125,13,7,130,6,0,0,0,5,11,61,130,2,0,1,12,84,48,73,1,12,2,0,31,57,9,2,16,12,1,0,32,36,0,1,63,6,3,1,0,0,24,51,9,0,0,0,0,44,88,48}'::real[]) +(3 rows) + +-- Do the queries +SELECT id FROM sift_base10k ORDER BY v <-> :'v1111' ASC LIMIT 1; + id +------ + 1111 +(1 row) + +SELECT id FROM sift_base10k ORDER BY v <-> :'v2222' ASC LIMIT 1; + id +------ + 2222 +(1 row) + +SELECT id FROM sift_base10k ORDER BY v <-> :'v3333' ASC LIMIT 1; + id +------ + 3333 +(1 row) + +SELECT id FROM sift_base10k ORDER BY v <-> :'v4444' ASC LIMIT 1; + id +------ + 4444 +(1 row) + diff --git a/test/parallel/sql/begin.sql b/test/parallel/sql/begin.sql new file mode 100644 index 000000000..df0ba3a77 --- /dev/null +++ b/test/parallel/sql/begin.sql @@ -0,0 +1,6 @@ +-- This file handles initializing the database before parallel tests are run +\ir utils/sift10k_array.sql +\ir utils/random_array.sql + +CREATE SEQUENCE serial START 10001; +CREATE INDEX ON sift_base10k USING HNSW (v) WITH (M=5, ef=20, ef_construction=20); diff --git a/test/parallel/sql/end.sql b/test/parallel/sql/end.sql new file mode 100644 index 000000000..bac24d93a --- /dev/null +++ b/test/parallel/sql/end.sql @@ -0,0 +1,3 @@ +-- This file contains invariants to be checked after the parallel tests have run +SELECT COUNT(*) FROM sift_base10k; +SELECT * from sift_base10k WHERE id=4444; diff --git a/test/parallel/sql/insert.sql b/test/parallel/sql/insert.sql new file mode 100644 index 000000000..604cf2ccd --- /dev/null +++ b/test/parallel/sql/insert.sql @@ -0,0 +1,13 @@ +BEGIN; +INSERT INTO sift_base10k (id, v) VALUES + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)); +COMMIT; diff --git a/test/parallel/sql/insert2.sql b/test/parallel/sql/insert2.sql new file mode 100644 index 000000000..604cf2ccd --- /dev/null +++ b/test/parallel/sql/insert2.sql @@ -0,0 +1,13 @@ +BEGIN; +INSERT INTO sift_base10k (id, v) VALUES + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)); +COMMIT; diff --git a/test/parallel/sql/insert3.sql b/test/parallel/sql/insert3.sql new file mode 100644 index 000000000..604cf2ccd --- /dev/null +++ b/test/parallel/sql/insert3.sql @@ -0,0 +1,13 @@ +BEGIN; +INSERT INTO sift_base10k (id, v) VALUES + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)), + (nextval('serial'), random_array(128, 0, 128)); +COMMIT; diff --git a/test/parallel/sql/select.sql b/test/parallel/sql/select.sql new file mode 100644 index 000000000..b56dfdc40 --- /dev/null +++ b/test/parallel/sql/select.sql @@ -0,0 +1,11 @@ +SELECT v AS v1111 FROM sift_base10k WHERE id = 1111 \gset +SELECT v AS v2222 FROM sift_base10k WHERE id = 2222 \gset +SELECT v AS v3333 FROM sift_base10k WHERE id = 3333 \gset +SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset +-- Make sure that our index queries will actually run against the index +EXPLAIN (COSTS false) SELECT id FROM sift_base10k ORDER BY v <-> :'v1111' ASC LIMIT 1; +-- Do the queries +SELECT id FROM sift_base10k ORDER BY v <-> :'v1111' ASC LIMIT 1; +SELECT id FROM sift_base10k ORDER BY v <-> :'v2222' ASC LIMIT 1; +SELECT id FROM sift_base10k ORDER BY v <-> :'v3333' ASC LIMIT 1; +SELECT id FROM sift_base10k ORDER BY v <-> :'v4444' ASC LIMIT 1; diff --git a/test/parallel/sql/utils/common.sql b/test/parallel/sql/utils/common.sql new file mode 100644 index 000000000..ac084f040 --- /dev/null +++ b/test/parallel/sql/utils/common.sql @@ -0,0 +1,61 @@ +-- test helper functions that should exist in all test runs live here +-- there is no need to explicitly include this file in other tests as the test runner will +-- run this before running the actual test + +CREATE EXTENSION pageinspect; + +\set ON_ERROR_STOP on + +-- retrieves details for all indices associated with a given table, similar to \di+ +-- the output of \di+ is not consistent across postgres versions +-- todo:: add a columns to this function which returning number of used DB pages +CREATE OR REPLACE FUNCTION ldb_get_indexes(tblname text) +RETURNS TABLE( + indexname name, + size text, + indexdef text, + total_index_size text +) AS +$BODY$ +BEGIN + RETURN QUERY + WITH total_size_data AS ( + SELECT + SUM(pg_relation_size(indexrelid)) as total_size + FROM + pg_index + WHERE + indisvalid + AND indrelid = tblname::regclass + ) + SELECT + idx.indexname, + pg_size_pretty(pg_relation_size(idx.indexname::REGCLASS)) as size, + idx.indexdef, + pg_size_pretty(total_size_data.total_size) as total_index_size + FROM + pg_indexes idx, + total_size_data + WHERE + idx.tablename = tblname; +END; +$BODY$ +LANGUAGE plpgsql; + +-- Determines if the provided SQL query (with an EXPLAIN prefix) uses an "Index Scan" +-- by examining its execution plan. This function helps ensure consistent analysis +-- across varying Postgres versions where EXPLAIN output may differ. +CREATE OR REPLACE FUNCTION has_index_scan(explain_query text) RETURNS boolean AS $$ +DECLARE + plan_row RECORD; + found boolean := false; +BEGIN + FOR plan_row IN EXECUTE explain_query LOOP + IF position('Index Scan' in plan_row."QUERY PLAN") > 0 THEN + found := true; + EXIT; + END IF; + END LOOP; + RETURN found; +END; +$$ LANGUAGE plpgsql; diff --git a/test/parallel/sql/utils/random_array.sql b/test/parallel/sql/utils/random_array.sql new file mode 100644 index 000000000..043b59748 --- /dev/null +++ b/test/parallel/sql/utils/random_array.sql @@ -0,0 +1,11 @@ +CREATE OR REPLACE FUNCTION random_int_array(dim integer, min integer, max integer) RETURNS integer[] AS $BODY$ +begin + return (select array_agg(round(random() * (max - min)) + min) from generate_series (0, dim - 1)); +end +$BODY$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION random_array(dim integer, min real, max real) RETURNS REAL[] AS $BODY$ +begin + return (select array_agg(random() * (max - min) + min) from generate_series (0, dim - 1)); +end +$BODY$ LANGUAGE plpgsql; diff --git a/test/parallel/sql/utils/sift10k_array.sql b/test/parallel/sql/utils/sift10k_array.sql new file mode 100644 index 000000000..cdee704d1 --- /dev/null +++ b/test/parallel/sql/utils/sift10k_array.sql @@ -0,0 +1,5 @@ +CREATE TABLE IF NOT EXISTS sift_base10k ( + id SERIAL PRIMARY KEY, + v REAL[128] +); +\copy sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' with csv; \ No newline at end of file diff --git a/test/parallel_schedule.txt b/test/parallel_schedule.txt new file mode 100644 index 000000000..a94d529b9 --- /dev/null +++ b/test/parallel_schedule.txt @@ -0,0 +1,10 @@ +# schedule.txt rules: +# - every test that needs to be run must appear in a 'test:' line +# - every test that needs to be run iff pgvector is installed appears in a 'test_pgvector:' line +# - 'test' lines may have multiple space-separated tests. All tests in a single 'test' line will be run in parallel +# parallel_schedule.txt notes: +# - Begin runs before and end runs after the actual tests, they run in the same database, but begin runs before to handle the necessary setup and end runs after to check invariants. + +test_begin: begin +test: insert insert2 insert3 select +test_end: end diff --git a/test/sql/utils/random_array.sql b/test/sql/utils/random_array.sql new file mode 100644 index 000000000..043b59748 --- /dev/null +++ b/test/sql/utils/random_array.sql @@ -0,0 +1,11 @@ +CREATE OR REPLACE FUNCTION random_int_array(dim integer, min integer, max integer) RETURNS integer[] AS $BODY$ +begin + return (select array_agg(round(random() * (max - min)) + min) from generate_series (0, dim - 1)); +end +$BODY$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION random_array(dim integer, min real, max real) RETURNS REAL[] AS $BODY$ +begin + return (select array_agg(random() * (max - min) + min) from generate_series (0, dim - 1)); +end +$BODY$ LANGUAGE plpgsql; diff --git a/test/sql/utils/small_world_array.sql b/test/sql/utils/small_world_array.sql index 5d4bed5c8..82e1d9653 100644 --- a/test/sql/utils/small_world_array.sql +++ b/test/sql/utils/small_world_array.sql @@ -12,4 +12,4 @@ INSERT INTO small_world (id, b, v) VALUES ('100', FALSE, '{1,0,0}'), ('101', FALSE, '{1,0,1}'), ('110', FALSE, '{1,1,0}'), - ('111', TRUE, '{1,1,1}'); \ No newline at end of file + ('111', TRUE, '{1,1,1}'); diff --git a/test/test_runner.sh b/test/test_runner.sh index 6167cf3ea..dd888165c 100755 --- a/test/test_runner.sh +++ b/test/test_runner.sh @@ -2,11 +2,18 @@ # Get current test file name TESTFILE_NAME=${PGAPPNAME##pg_regress/} -# Set different name for each test database -# As pg_regress does not support cleaning db after each test -TEST_CASE_DB="ldb_test_${TESTFILE_NAME}" + +if [ "$PARALLEL" -eq 0 ]; then + # Set different name for each test database + # As pg_regress does not support cleaning db after each test + TEST_CASE_DB="ldb_test_${TESTFILE_NAME}" +else + # parallel tests all run in the same database + TEST_CASE_DB="ldb_parallel" +fi + # Set database user -if [ -z $DB_USER ] +if [ -z "$DB_USER" ] then echo "ERROR: DB_USER environment variable is not set before test_runner.sh is run by pg_regress" exit 1 @@ -20,16 +27,28 @@ function drop_db { EOF } -trap drop_db EXIT +# If these aren't parallel tests always drop the db after the test +# if they are though we only want to drop after end which is where we check invariants +# this allows the parallel tests to be run against the same db +if [ "$PARALLEL" -eq 0 ]; then + trap drop_db EXIT +elif [[ "$TESTFILE_NAME" =~ ^end ]]; then + trap drop_db EXIT +fi -# Change directory to sql so sql imports will work correctly +# Change directory to sql directory so sql imports will work correctly cd sql/ + # install lantern extension -psql "$@" -U ${DB_USER} -d postgres -v ECHO=none -q -c "DROP DATABASE IF EXISTS ${TEST_CASE_DB};" 2>/dev/null -psql "$@" -U ${DB_USER} -d postgres -v ECHO=none -q -c "CREATE DATABASE ${TEST_CASE_DB};" 2>/dev/null -psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -c "SET client_min_messages=error; CREATE EXTENSION lantern;" 2>/dev/null -psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -f utils/common.sql 2>/dev/null +# if tests are parallel we only do this for the begin tests as we won't be dropping the database until the end +# begin will handle initialization specific to the tests but expects the database already exists +if [ "$PARALLEL" -eq 0 ] || ( [[ "$TESTFILE_NAME" =~ ^begin ]] && [ "$PARALLEL" -eq 1 ] ); then + psql "$@" -U ${DB_USER} -d postgres -v ECHO=none -q -c "DROP DATABASE IF EXISTS ${TEST_CASE_DB};" 2>/dev/null + psql "$@" -U ${DB_USER} -d postgres -v ECHO=none -q -c "CREATE DATABASE ${TEST_CASE_DB};" 2>/dev/null + psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -c "SET client_min_messages=error; CREATE EXTENSION lantern;" 2>/dev/null + psql "$@" -U ${DB_USER} -d ${TEST_CASE_DB} -v ECHO=none -q -f utils/common.sql 2>/dev/null +fi # Exclude debug/inconsistent output from psql # So tests will always have the same output