Skip to content

Commit

Permalink
Merge branch 'main' into modify_index_on_first_insert_empty_table
Browse files Browse the repository at this point in the history
  • Loading branch information
Ngalstyan4 authored Jan 14, 2024
2 parents 4c114a3 + 670c318 commit fa65ffa
Show file tree
Hide file tree
Showing 56 changed files with 1,977 additions and 188 deletions.
14 changes: 14 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ set (_update_files
sql/updates/0.0.8--0.0.9.sql
sql/updates/0.0.9--0.0.10.sql
sql/updates/0.0.10--0.0.11.sql
sql/updates/0.0.11--0.0.12.sql
)

add_custom_command(
Expand All @@ -203,6 +204,12 @@ add_custom_command(

add_custom_target(phony_always_runs ALL DEPENDS ${CMAKE_BINARY_DIR}/${_script_file})

set(VERSION_HEADER_TEMPLATE "${CMAKE_MODULE_PATH}/version.h.template")
set(VERSION_HEADER_OUTPUT "${CMAKE_BINARY_DIR}/include/version.h")
configure_file(${VERSION_HEADER_TEMPLATE} ${VERSION_HEADER_OUTPUT})

target_include_directories(lantern PUBLIC ${CMAKE_BINARY_DIR}/include)


# AUTO-GENERATE lantern.control file for PostgreSQL
set(CONTROL_TEMPLATE "${CMAKE_MODULE_PATH}/lantern.control.template")
Expand Down Expand Up @@ -247,6 +254,13 @@ add_custom_target(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/test
)


add_custom_target(
test-misc
COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_all_tests.sh --misc
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/test
)

# BENCHMARK
add_custom_target(
benchmark
Expand Down
14 changes: 14 additions & 0 deletions ci/scripts/build-linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@ function setup_postgres() {
rm -f /usr/bin/pg_config && ln -s /usr/lib/postgresql/$PG_VERSION/bin/pg_config /usr/bin/pg_config
}

function install_platform_specific_dependencies() {
# Currently lantern_extras binaries are only available for Linux x86_64
# We won't install onnxruntime as lantern_extras are used only for external index in tests
pushd /tmp
LANTERN_EXTRAS_VERSION=0.0.6
wget https://github.com/lanterndata/lantern_extras/releases/download/${LANTERN_EXTRAS_VERSION}/lantern-extras-${LANTERN_EXTRAS_VERSION}.tar -O lantern-extras.tar
tar xf lantern-extras.tar
pushd lantern-extras-${LANTERN_EXTRAS_VERSION}
make install
popd
rm -rf lantern-extras*
popd
}

function package_if_necessary() {
if [ -n "$BUILD_PACKAGES" ]; then
# Bundle debian packages
Expand Down
8 changes: 7 additions & 1 deletion ci/scripts/build-mac.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,20 @@ function setup_postgres() {
then
# Runner is github CI user
sh -c "sudo -u runner -i $cmd"
sh -c "sudo -u runner -i brew reinstall gettext"
sh -c "sudo -u runner -i brew unlink gettext"
sh -c "sudo -u runner -i brew link gettext --force"
else
sh -c $cmd
fi
}

function install_platform_specific_dependencies() {
:
}

function package_if_necessary() {
:
# TODO make and publish homebrew formula
}

function cleanup_environment() {
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ setup_environment
setup_locale_and_install_packages
setup_postgres
install_external_dependencies
install_platform_specific_dependencies
clone_or_use_source
build_and_install
package_if_necessary
Expand Down
7 changes: 5 additions & 2 deletions ci/scripts/run-tests-linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,11 @@ function run_db_tests(){
cd $WORKDIR/build && \
make test && \
make test-client && \
run_pgvector_tests && \
killall postgres && \
run_pgvector_tests
pg_pid=$(fuser -a 5432/tcp 2>/dev/null | awk "{print $1}" | awk '{$1=$1};1')
if [[ ! -z "$pg_pid" ]]; then
kill -9 $pg_pid
fi
gcovr -r $WORKDIR/src/ --object-directory $WORKDIR/build/ --xml /tmp/coverage.xml
fi
}
Expand Down
6 changes: 6 additions & 0 deletions cmake/version.h.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#ifndef LDB_HNSW_VERSION_H
#define LDB_HNSW_VERSION_H

#define LDB_BINARY_VERSION "@LANTERNDB_VERSION@"

#endif
35 changes: 29 additions & 6 deletions scripts/run_all_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,18 @@ fi

# Check if pgvector is available
pgvector_installed=$($PSQL -U $DB_USER -p $DB_PORT -d postgres -c "SELECT 1 FROM pg_available_extensions WHERE name = 'vector'" -tA | tail -n 1 | tr -d '\n')
lantern_extras_installed=$($PSQL -U $DB_USER -p $DB_PORT -d postgres -c "SELECT 1 FROM pg_available_extensions WHERE name = 'lantern_extras'" -tA | tail -n 1 | tr -d '\n')

# Settings
REGRESSION=0
PARALLEL=0
MISC=0
C_TESTS=0
while [[ "$#" -gt 0 ]]; do
case $1 in
--regression) REGRESSION=1 ;;
--parallel) PARALLEL=1 ;;
--misc) MISC=1 ;;
--client) C_TESTS=1 ;;
esac
shift
Expand All @@ -106,9 +109,12 @@ function print_test {
rm -rf $TMP_OUTDIR/schedule.txt
if [ "$PARALLEL" -eq 1 ]; then
SCHEDULE='parallel_schedule.txt'
elif [ "$MISC" -eq 1 ]; then
SCHEDULE='misc_schedule.txt'
else
SCHEDULE='schedule.txt'
fi

if [[ -n "$FILTER" || -n "$EXCLUDE" ]]; then
if [ "$PARALLEL" -eq 1 ]; then
TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_begin:|test_end:)' | sed -E -e 's/^test_begin:|test_end:/test:/' | tr " " "\n" | sed -e '/^$/d')
Expand All @@ -133,10 +139,14 @@ if [[ -n "$FILTER" || -n "$EXCLUDE" ]]; then
fi
fi
else
NEWLINE=$'\n'
TEST_FILES=$(cat $SCHEDULE | grep '^test:' | tr " " "\n" | sed -e '/^$/d')
if [[ "$pgvector_installed" == "1" ]]; then
TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_pgvector:)' | sed -e 's/^test_pgvector:/test:/' | tr " " "\n" | sed -e '/^$/d')
else
TEST_FILES=$(cat $SCHEDULE | grep '^test:' | tr " " "\n" | sed -e '/^$/d')
TEST_FILES="${TEST_FILES}${NEWLINE}$(cat $SCHEDULE | grep -E '^(test_pgvector:)' | sed -e 's/^test_pgvector:/test:/' | tr " " "\n" | sed -e '/^$/d')"
fi

if [[ "$lantern_extras_installed" ]]; then
TEST_FILES="${TEST_FILES}${NEWLINE}$(cat $SCHEDULE | grep -E '^(test_extras:)' | sed -e 's/^test_extras:/test:/' | tr " " "\n" | sed -e '/^$/d')"
fi
fi

Expand All @@ -163,12 +173,22 @@ if [[ -n "$FILTER" || -n "$EXCLUDE" ]]; then
exit 0
fi
else
if [ "$MISC" -eq 1 ]; then
echo "misc tests are not intended to be run in parallel, please include a FILTER"
exit 1
fi

while IFS= read -r line; do
if [[ "$line" =~ ^test_pgvector: ]]; then
test_name=$(echo "$line" | sed -e 's/test_pgvector://')
if [ "$pgvector_installed" == "1" ]; then
echo "test: $test_name" >> $TMP_OUTDIR/schedule.txt
fi
elif [[ "$line" =~ ^test_extras: ]]; then
test_name=$(echo "$line" | sed -e 's/test_extras://')
if [ "$lantern_extras_installed" == "1" ]; then
echo "test: $test_name" >> $TMP_OUTDIR/schedule.txt
fi
elif [[ "$line" =~ ^test_begin: ]]; then
test_name=$(echo "$line" | sed -e 's/test_begin:/test:/')
echo "$test_name" >> $TMP_OUTDIR/schedule.txt
Expand All @@ -180,7 +200,7 @@ else
fi
done < $SCHEDULE
fi
unset $SCHEDULE
unset SCHEDULE
SCHEDULE=$TMP_OUTDIR/schedule.txt

function print_diff {
Expand All @@ -199,7 +219,10 @@ trap print_diff ERR

if [ "$PARALLEL" -eq 1 ]; then
cd parallel
PARALLEL=$PARALLEL DB_USER=$DB_USER $(pg_config --pkglibdir)/pgxs/src/test/regress/pg_regress --user=$DB_USER --schedule=$SCHEDULE --outputdir=$TMP_OUTDIR --launcher=../test_runner.sh
MISC=$MISC PARALLEL=$PARALLEL DB_USER=$DB_USER $(pg_config --pkglibdir)/pgxs/src/test/regress/pg_regress --user=$DB_USER --schedule=$SCHEDULE --outputdir=$TMP_OUTDIR --launcher=../test_runner.sh
elif [ "$MISC" -eq 1 ]; then
cd misc
MISC=$MISC PARALLEL=$PARALLEL DB_USER=$DB_USER $(pg_config --pkglibdir)/pgxs/src/test/regress/pg_regress --user=$DB_USER --schedule=$SCHEDULE --outputdir=$TMP_OUTDIR --launcher=../test_runner.sh
else
PARALLEL=$PARALLEL DB_USER=$DB_USER $(pg_config --pkglibdir)/pgxs/src/test/regress/pg_regress --user=$DB_USER --schedule=$SCHEDULE --outputdir=$TMP_OUTDIR --launcher=./test_runner.sh
MISC=$MISC PARALLEL=$PARALLEL DB_USER=$DB_USER $(pg_config --pkglibdir)/pgxs/src/test/regress/pg_regress --user=$DB_USER --schedule=$SCHEDULE --outputdir=$TMP_OUTDIR --launcher=./test_runner.sh
fi
37 changes: 36 additions & 1 deletion scripts/test_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,28 @@
'16': ['0.0.4']
}

class Version:
def __init__(self, version: str):
self.version_numbers = [int(n) for n in version.split('.')]
def __lt__(self, other):
for i, v in enumerate(self.version_numbers):
if v < other.version_numbers[i]:
return True
return False
def __eq__(self, other):
for i, v in enumerate(self.version_numbers):
if v != other.version_numbers[i]:
return False
return True
def __le__(self, other):
return self < other or self == other
def __ne__(self, other):
return not self == other
def __gt__(self, other):
return not self == other and not self < other
def __ge__(self, other):
return not self < other

def shell(cmd, exit_on_error=True):
res = subprocess.run(cmd, shell=True)
if res.returncode != 0:
Expand Down Expand Up @@ -48,9 +70,17 @@ def update_from_tag(from_version: str, to_version: str):
res = shell('rm -f /tmp/ldb_update.lock')
res = shell('rm -f /tmp/ldb_update_finished')
res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={from_version} make test-parallel FILTER=begin")

if Version(from_version) > Version('0.0.11'):
# misc tests added at v0.0.10, won't work before that
# initialize misc tests to ensure that version mismatch results in an error
res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={from_version} make test-misc FILTER=begin")

repo.git.checkout(sha_before)
res = shell(f"cd {args.builddir} ; git submodule update && cmake .. && make -j4 && make install")
# res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={to_version} make test")
if Version(from_version) > Version('0.0.11'):
res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={from_version} make test-misc FILTER=version_mismatch")

# run the actual parallel tests after the upgrade
res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={to_version} make test-parallel EXCLUDE=begin")
Expand Down Expand Up @@ -98,9 +128,14 @@ def sort_versions(v1, v2):

# test updates from all tags
tag_pairs = [update_fname.split("--") for update_fname in os.listdir("sql/updates")]
tag_pairs = [(from_tag, to_tag.split('.sql')[0]) for from_tag, to_tag in tag_pairs]
repo = git.Repo(search_parent_directories=True)
tags_actual = [tag.name for tag in repo.tags]
tags_actual = [name[1:] if name[0] == 'v' else name for name in tags_actual]
tag_pairs = [(from_tag, to_tag) for from_tag, to_tag in tag_pairs if from_tag in tags_actual and to_tag in tags_actual]
from_tags = list(sorted([p[0] for p in tag_pairs], key=cmp_to_key(sort_versions)))
from_tags.reverse()
to_tags = list(sorted([p[1].split(".sql")[0] for p in tag_pairs], key=cmp_to_key(sort_versions)))
to_tags = list(sorted([p[1] for p in tag_pairs], key=cmp_to_key(sort_versions)))
latest_version = to_tags[-1]
print("Updating from tags", from_tags, "to ", latest_version)

Expand Down
3 changes: 3 additions & 0 deletions sql/lantern.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
CREATE FUNCTION hnsw_handler(internal) RETURNS index_am_handler
AS 'MODULE_PATHNAME' LANGUAGE C;

CREATE FUNCTION lantern_reindex_external_index(index regclass) RETURNS VOID
AS 'MODULE_PATHNAME', 'lantern_reindex_external_index' LANGUAGE C STABLE STRICT PARALLEL UNSAFE;

-- functions
CREATE FUNCTION ldb_generic_dist(real[], real[]) RETURNS real
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
Expand Down
2 changes: 2 additions & 0 deletions sql/updates/0.0.11--0.0.12.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CREATE FUNCTION lantern_reindex_external_index (index regclass) RETURNS VOID AS 'MODULE_PATHNAME',
'lantern_reindex_external_index' LANGUAGE C STABLE STRICT PARALLEL UNSAFE;
8 changes: 8 additions & 0 deletions src/hnsw.c
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,14 @@ Datum lantern_internal_continue_blockmap_group_initialization(PG_FUNCTION_
PG_RETURN_VOID();
}

PGDLLEXPORT PG_FUNCTION_INFO_V1(lantern_reindex_external_index);
Datum lantern_reindex_external_index(PG_FUNCTION_ARGS)
{
Oid indrelid = PG_GETARG_OID(0);
ldb_reindex_external_index(indrelid);
PG_RETURN_VOID();
}

/*
* Get data type for give oid
* */
Expand Down
1 change: 1 addition & 0 deletions src/hnsw.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ PGDLLEXPORT Datum hamming_dist_with_guard(PG_FUNCTION_ARGS);
PGDLLEXPORT Datum cos_dist(PG_FUNCTION_ARGS);
PGDLLEXPORT Datum cos_dist_with_guard(PG_FUNCTION_ARGS);
PGDLLEXPORT Datum vector_cos_dist(PG_FUNCTION_ARGS);
PGDLLEXPORT Datum lantern_reindex_external_index(PG_FUNCTION_ARGS);

HnswColumnType GetColumnTypeFromOid(Oid oid);
HnswColumnType GetIndexColumnType(Relation index);
Expand Down
Loading

0 comments on commit fa65ffa

Please sign in to comment.