From 02c653f36ed7de4ee2c7f7abe40c93b9e4bdbc8b Mon Sep 17 00:00:00 2001 From: Narek Galstyan Date: Tue, 23 Jan 2024 23:27:22 -0800 Subject: [PATCH] Release 0.0.12 (#265) Do a bunch of small updates and fixes * Improve docker instructions * Update clang format required version * Require cpp17 * Suppress autovectorization failure warning on clang * Allow non-root installs in dev dockerfile * Allow update script to run even when git fetch is not available * Deprecate unsupported updates * Allow update to 'latest' even when corresponding tag is missing * Rename latest update script according to the new convention * Actually fix "latest" testing mechanism * Add support to "latest" in Version class * Add Release ID that defaults to latest. Must be set explicitly when doing a release * Improve version mismatch error message * Use "latest" as version name when building locally * Add Release preparation instructions * Make sure ci/cd follows release prep instructions * Reorganize dev Dockerfile to allow non-root extension installs * Make update script more robust * Improve build parallelism in update tests * Add version mismatch check to build.c * Remove temporary files to make sure ALTER EXTENSION is run in update * Temporarily disable erroring out from inserts in case of version mismatch to allow update scripts that rebuild the index * Document version mismatch test status --- CMakeLists.txt | 38 ++++-- CONTRIBUTING.md | 9 ++ Dockerfile.dev | 29 +++-- README.md | 6 +- ci/scripts/build-docker.sh | 2 +- ci/scripts/build.sh | 2 +- cmake/lantern.control.template | 2 +- cmake/version.h.template | 3 +- scripts/test_updates.py | 108 +++++++++++++----- sql/{updates => old_updates}/0.0.4--0.0.5.sql | 0 src/hnsw/build.c | 8 ++ src/hnsw/insert.c | 4 +- src/hnsw/options.c | 9 +- src/hnsw/utils.c | 11 ++ 14 files changed, 170 insertions(+), 61 deletions(-) rename sql/{updates => old_updates}/0.0.4--0.0.5.sql (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 282877619..42de6271f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,10 @@ cmake_minimum_required(VERSION 3.3) -set(LANTERNDB_VERSION 0.0.11) +set(LANTERN_VERSION 0.0.12) project( LanternDB - VERSION ${LANTERNDB_VERSION} + VERSION ${LANTERN_VERSION} LANGUAGES C CXX) if (POLICY CMP0074) @@ -18,7 +18,9 @@ if(POLICY CMP0077) cmake_policy(SET CMP0077 NEW) endif() -# OPTIONS +set(RELEASE_ID "latest" CACHE STRING "Release ID placed in the binary. Must be set externally when doing a release") + +option(BUILD_FOR_DISTRIBUTING "Build LANTERN_VERSION info into the binary" OFF) option(MARCH_NATIVE "Build assuming the presence of all the features in the current CPU model" OFF) option(USEARCH_USE_SIMSIMD "Build usearch with SIMSIMD" OFF) @@ -28,6 +30,10 @@ option(BENCH "Enable benchmarking" OFF) option(FAILURE_POINTS "Enable failure points" ON) option(BUILD_C_TESTS "Build C client tests" OFF) +if (${BUILD_FOR_DISTRIBUTING}) + set(RELEASE_ID ${LANTERN_VERSION}) +endif() + if(CODECOVERAGE) message(STATUS "Code coverage is enabled.") # Note that --coverage is synonym for the necessary compiler and linker flags @@ -58,7 +64,7 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() message(STATUS "${CMAKE_COLOR_GREEN}Build type: ${CMAKE_BUILD_TYPE}${CMAKE_COLOR_RESET}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-conversion -Wno-unknown-pragmas") find_package(PostgreSQL REQUIRED) @@ -146,6 +152,13 @@ endforeach() if(APPLE) set(_link_flags "${_link_flags} -bundle_loader ${PG_BINARY} -undefined dynamic_lookup") endif() +if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + # suppress warnings from autovectorization failures such as: + # loop not vectorized: the optimizer was unable to perform the + # requested transformation; the transformation might be disabled + # or specified as part of an unsupported transformation ordering [-Wpass-failed=transform-warning] + target_compile_options(lantern PRIVATE -Wno-pass-failed) +endif() set_target_properties( lantern @@ -204,9 +217,8 @@ if (${LANTERNDB_COPYNODES}) target_compile_definitions(lantern PRIVATE LANTERNDB_COPYNODES) endif() -set(_script_file "lantern--${LANTERNDB_VERSION}.sql") +set(_script_file "lantern--${RELEASE_ID}.sql") set (_update_files - sql/updates/0.0.4--0.0.5.sql sql/updates/0.0.5--0.0.6.sql sql/updates/0.0.6--0.0.7.sql sql/updates/0.0.7--0.0.8.sql @@ -216,6 +228,16 @@ set (_update_files sql/updates/0.0.11--0.0.12.sql ) +# Generate version information for the binary +EXECUTE_PROCESS( + COMMAND git log -1 --format=%h + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + OUTPUT_VARIABLE GIT_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE +) +# OPTIONS +set(BUILD_ID "latest-${GIT_HASH}") + add_custom_command( OUTPUT ${CMAKE_BINARY_DIR}/${_script_file} COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/sql/lantern.sql ${CMAKE_BINARY_DIR}/${_script_file} @@ -311,8 +333,8 @@ if (CLANG_FORMAT) string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" CLANG_FORMAT_VERSION "${CLANG_FORMAT_VERSION}") - if(CLANG_FORMAT_VERSION VERSION_LESS 14) - message(WARNING "clang-format version ${CLANG_FORMAT_VERSION} found, need at least 14") + if(CLANG_FORMAT_VERSION VERSION_LESS 13) + message(WARNING "clang-format version ${CLANG_FORMAT_VERSION} found, need at least 13") set(CLANG_FORMAT OFF) endif() endif() diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index efadeeb03..98ee8b935 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -79,3 +79,12 @@ git clone https://git.postgresql.org/git/postgresql.git # release head only git clone --single-branch --branch REL_15_STABLE https://git.postgresql.org/git/postgresql.git --depth=1 ``` + +## Preparing a release + +1. Update LANTERN_VERSION variable at the top of CMakeLists.txt file +2. Prepare the SQL update script for the release + 1. If there already is an update script for the current release with a 'latest' suffix, rename it according to the version name being released + 2. If there is no such file, create an empty update file for the current release +3. Build the project with `cmake -DBUILD_FOR_DISTRIBUTING=YES` that will embed cmake version number into the binary. + Alternatively, if you want to embed a different version name into the binary, build with -DRELEASE_ID=\[version name\] where the version name is the name of the release and the name used in update file above diff --git a/Dockerfile.dev b/Dockerfile.dev index e66ec9e5e..fffe16ea7 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -1,5 +1,5 @@ ARG VERSION=15 -ARG PGVECTOR_VERSION=0.5.0 +ARG PGVECTOR_VERSION=0.5.1 # If you want to build the base image for different versions # Refer to the base image Dockerfile here https://github.com/var77/postgres-docker-debug @@ -17,28 +17,35 @@ WORKDIR /lantern USER root ENV DEBIAN_FRONTEND=noninteractive -RUN apt update && apt install curl -y && pip install GitPython libtmux lcov libpq5 && \ - wget -O pgvector.tar.gz https://github.com/pgvector/pgvector/archive/refs/tags/v${PGVECTOR_VERSION}.tar.gz && \ +RUN apt update && apt install -y curl lcov libpq5 + +# allow non-root users to install in the container to make it easier to run update-tests +RUN chmod 777 /usr/local/pgsql/lib/ /usr/local/pgsql/share/extension/ /usr/local/pgsql/include/server/ + +USER postgres + +RUN pip install GitPython libtmux + +# Build & Install pgvector +RUN wget -O pgvector.tar.gz https://github.com/pgvector/pgvector/archive/refs/tags/v${PGVECTOR_VERSION}.tar.gz && \ tar xzf pgvector.tar.gz && \ cd pgvector-${PGVECTOR_VERSION} && \ - make && make install + make -j && make install COPY . . -# Build lantern -RUN rm -rf build \ +# Build & Install lantern +RUN sudo rm -rf build \ && mkdir build \ && cd build \ && cmake -DCMAKE_BUILD_TYPE=Debug -DBUILD_C_TESTS=ON .. \ - && make install + && make -j install # Install benchmarking tools in build folder -RUN mkdir build/lantern \ - && git clone https://github.com/lanterndata/benchmark build/benchmark \ +RUN git clone https://github.com/lanterndata/benchmark build/benchmark \ && cd build/benchmark \ && pip install -r core/requirements.txt \ && pip install -r external/requirements.txt + ENV DATABASE_URL=postgres://postgres@localhost:5432/postgres ENV LANTERN_DATABASE_URL=postgres://postgres@localhost:5432/postgres - -USER postgres diff --git a/README.md b/README.md index e63c259a9..e4e27b436 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,12 @@ Lantern builds and uses [usearch](https://github.com/unum-cloud/usearch), a sing If you don’t have PostgreSQL already, use Lantern with [Docker](https://hub.docker.com/r/lanterndata/lantern) to get started quickly: ```bash -docker run -p 5432:5432 -e 'POSTGRES_PASSWORD=postgres' lanterndata/lantern:latest-pg15 +docker run -p 5432:5432 -e "POSTGRES_USER=$USER" 'POSTGRES_PASSWORD=postgres' lanterndata/lantern:latest-pg15 ``` -To install Lantern from source on top of PostgreSQL: +Then, you can connect to the database via `postgresql://$USER:postgres@localhost/postgres`. + +To install Lantern from source on top of your existing PostgreSQL: ``` git clone --recursive https://github.com/lanterndata/lantern.git diff --git a/ci/scripts/build-docker.sh b/ci/scripts/build-docker.sh index 47aa2447f..041911997 100755 --- a/ci/scripts/build-docker.sh +++ b/ci/scripts/build-docker.sh @@ -1,7 +1,7 @@ #!/bin/bash get_cmake_flags(){ - echo "-DMARCH_NATIVE=OFF" + echo "-DBUILD_FOR_DISTRIBUTING=YES -DMARCH_NATIVE=OFF" } export DEBIAN_FRONTEND=noninteractive diff --git a/ci/scripts/build.sh b/ci/scripts/build.sh index 59cffad6d..93af0aed2 100755 --- a/ci/scripts/build.sh +++ b/ci/scripts/build.sh @@ -54,7 +54,7 @@ function build_and_install() { mkdir build cd build - flags="-DMARCH_NATIVE=OFF" + flags="-DBUILD_FOR_DISTRIBUTING=YES -DMARCH_NATIVE=OFF" # Treat warnings as errors in CI/CD flags+=" -DCMAKE_COMPILE_WARNING_AS_ERROR=ON" diff --git a/cmake/lantern.control.template b/cmake/lantern.control.template index f98813c29..9ae7359da 100644 --- a/cmake/lantern.control.template +++ b/cmake/lantern.control.template @@ -1,4 +1,4 @@ comment = 'Lantern: Fast vector embedding processing in Postgres' -default_version = '@LANTERNDB_VERSION@' +default_version = '@RELEASE_ID@' module_pathname = '$libdir/lantern' relocatable = false diff --git a/cmake/version.h.template b/cmake/version.h.template index 7d9423ffb..f97884520 100644 --- a/cmake/version.h.template +++ b/cmake/version.h.template @@ -1,6 +1,7 @@ #ifndef LDB_HNSW_VERSION_H #define LDB_HNSW_VERSION_H -#define LDB_BINARY_VERSION "@LANTERNDB_VERSION@" +#define LDB_BINARY_VERSION "@RELEASE_ID@" +#define LDB_BUILD_ID "@BUILD_ID@" #endif diff --git a/scripts/test_updates.py b/scripts/test_updates.py index 6d0935ec1..07aed2158 100644 --- a/scripts/test_updates.py +++ b/scripts/test_updates.py @@ -3,22 +3,31 @@ import getpass import git import os -from functools import cmp_to_key - -INCOMPATIBLE_VERSIONS = { - '16': ['0.0.4'] -} +# placeholder used in sql update scripts as the next release version +LATEST="latest" class Version: def __init__(self, version: str): + self.latest = False + self.version = version + if version == LATEST: + self.latest = True + return + self.version_numbers = [int(n) for n in version.split('.')] def __lt__(self, other): + if self.latest: + return False + if other.latest: + return True for i, v in enumerate(self.version_numbers): if v < other.version_numbers[i]: return True return False def __eq__(self, other): + if self.latest or other.latest: + return self.latest == other.latest for i, v in enumerate(self.version_numbers): if v != other.version_numbers[i]: return False @@ -31,6 +40,14 @@ def __gt__(self, other): return not self == other and not self < other def __ge__(self, other): return not self < other + def __str__(self): + return self.version + def __repr__(self): + return self.version + +INCOMPATIBLE_VERSIONS = { + '16': [Version('0.0.4')] +} def shell(cmd, exit_on_error=True): res = subprocess.run(cmd, shell=True) @@ -44,18 +61,40 @@ def shell(cmd, exit_on_error=True): print("ERROR on command", cmd) +# Make sure lantern can smoothly be updated from from_version to to_version +# the function installs the DB at from_version, runs an upgrade via ALTER EXTENSION ... UPDATE +# and runs the test suit on the resulting DB +# Note: from_version must be a valid tag on the repo that has a corresponding release and SQL migration script +# to_version must be the value LATEST or follow the requirements above def update_from_tag(from_version: str, to_version: str): from_tag = "v" + from_version repo = git.Repo(search_parent_directories=True) - sha_before = repo.head.object.hexsha print(repo.remotes) - repo.remotes[0].fetch() + to_sha = repo.head.object.hexsha + + if to_version != LATEST: + to_tag = "v" + to_version + tag_names = [tag.name for tag in repo.tags] + if to_tag in tag_names: + to_sha = to_tag + else: + print(f"WARNING: to_version=${to_version} has not corresponding tag. assuming current HEAD corresponds to that version") + + try: + repo.remotes[0].fetch() + except Exception as e: + # fetching does not work in the dev dockerfile but it does not need to, + # since we are testing the updates on the local repo + if not "error: cannot run ssh" in str(e): + raise Exception(f"unknown fetch error: {e}") + + repo.git.checkout(from_tag) sha_after = repo.head.object.hexsha print(f"Updating from tag {from_tag}(sha: {sha_after}) to {to_version}") # run "mkdir build && cd build && cmake .. && make -j4 && make install" - res = shell(f"mkdir -p {args.builddir} ; cd {args.builddir} && git submodule update --recursive && cmake .. && make -j4 && make install") + res = shell(f"mkdir -p {args.builddir} ; cd {args.builddir} && git submodule update --init --recursive && cmake -DRELEASE_ID={from_version} .. && make -j install") res = shell(f"psql postgres -U {args.user} -c 'DROP DATABASE IF EXISTS {args.db};'") res = shell(f"psql postgres -U {args.user} -c 'CREATE DATABASE {args.db};'") @@ -76,13 +115,17 @@ def update_from_tag(from_version: str, to_version: str): # initialize misc tests to ensure that version mismatch results in an error res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={from_version} make test-misc FILTER=begin") - repo.git.checkout(sha_before) - res = shell(f"cd {args.builddir} ; git submodule update --recursive && cmake .. && make -j4 && make install") - # res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={to_version} make test") - if Version(from_version) > Version('0.0.11'): + repo.git.checkout(to_sha) + res = shell(f"cd {args.builddir} ; git submodule update --init --recursive && cmake -DRELEASE_ID={to_version} .. && make -j install") + + # todo:: currently version mismatch logic only prints a warning and not an error + # we need to teach the version matching function when an update script vs client script is running for proper error enforcement + if Version(from_version) > Version('0.1.1'): res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={from_version} make test-misc FILTER=version_mismatch") # run the actual parallel tests after the upgrade + res = shell('rm -f /tmp/ldb_update.lock') + res = shell('rm -f /tmp/ldb_update_finished') res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={to_version} make test-parallel EXCLUDE=begin") print(f"Update {from_version}->{to_version} Success!") @@ -93,11 +136,6 @@ def incompatible_version(pg_version, version_tag): return False return version_tag in INCOMPATIBLE_VERSIONS[pg_version] -def sort_versions(v1, v2): - a = int(v1.replace('.', '')) - b = int(v2.replace('.', '')) - - return a - b if __name__ == "__main__": @@ -127,25 +165,37 @@ def sort_versions(v1, v2): exit(1) # test updates from all tags - tag_pairs = [update_fname.split("--") for update_fname in os.listdir("sql/updates")] - tag_pairs = [(from_tag, to_tag.split('.sql')[0]) for from_tag, to_tag in tag_pairs] + version_pairs = [update_fname.split("--") for update_fname in os.listdir("sql/updates")] + version_pairs = [(from_version, to_version.split('.sql')[0]) for from_version, to_version in version_pairs] repo = git.Repo(search_parent_directories=True) tags_actual = [tag.name for tag in repo.tags] tags_actual = [name[1:] if name[0] == 'v' else name for name in tags_actual] - tag_pairs = [(from_tag, to_tag) for from_tag, to_tag in tag_pairs if from_tag in tags_actual and to_tag in tags_actual] - from_tags = list(sorted([p[0] for p in tag_pairs], key=cmp_to_key(sort_versions))) - from_tags.reverse() - to_tags = list(sorted([p[1] for p in tag_pairs], key=cmp_to_key(sort_versions))) - - if len(to_tags) > 0: - latest_version = to_tags[-1] - print("Updating from tags", from_tags, "to ", latest_version) + + version_pairs = [(from_v, to_v) for from_v, to_v in version_pairs] + from_versions = list(sorted([Version(p[0]) for p in version_pairs])) + from_versions.reverse() + to_versions = list(sorted([Version(p[1]) for p in version_pairs])) + for from_v in from_versions: + assert(str(from_v) in tags_actual) + + num_untagged = 0 + for to_v in to_versions: + if num_untagged != 0: + print(f"${to_v}, ${tags_actual}") + # only the last to_v may be untagged (when the release has not happened yet) + assert(num_untagged == 0) + if str(to_v) not in tags_actual: + num_untagged += 1 + + if len(to_versions) > 0: + latest_version = to_versions[-1] + print("Updating from tags", from_versions, "to ", latest_version) pg_version = None if not 'PG_VERSION' in os.environ else os.environ['PG_VERSION'] - for from_tag in from_tags: + for from_tag in from_versions: if incompatible_version(pg_version, from_tag): continue - update_from_tag(from_tag, latest_version) + update_from_tag(str(from_tag), str(latest_version)) diff --git a/sql/updates/0.0.4--0.0.5.sql b/sql/old_updates/0.0.4--0.0.5.sql similarity index 100% rename from sql/updates/0.0.4--0.0.5.sql rename to sql/old_updates/0.0.4--0.0.5.sql diff --git a/src/hnsw/build.c b/src/hnsw/build.c index ebc0a8c99..7b55ac080 100644 --- a/src/hnsw/build.c +++ b/src/hnsw/build.c @@ -599,6 +599,14 @@ IndexBuildResult *ldb_ambuild(Relation heap, Relation index, IndexInfo *indexInf IndexBuildResult *result; HnswBuildState buildstate; + // todo:: change the warning to error once VersionsMismatch learns how to differntiate when an update script is + // running - it is fine to temporarily have version mismatch when we are running an update script + if(!VersionsMatch()) { + elog(WARNING, + "Attempting to build lantern index, but the SQL version and binary version do not match. This can cause " + "errors. Please run `ALTER EXTENSION lantern UPDATE and reconnect"); + } + BuildIndex(heap, index, indexInfo, &buildstate); result = (IndexBuildResult *)palloc(sizeof(IndexBuildResult)); diff --git a/src/hnsw/insert.c b/src/hnsw/insert.c index 6e5e090cd..4e6ae0435 100644 --- a/src/hnsw/insert.c +++ b/src/hnsw/insert.c @@ -77,8 +77,10 @@ bool ldb_aminsert(Relation index, LDB_UNUSED(indexUnchanged); #endif + // todo:: change the warning to error once VersionsMismatch learns how to differntiate when an update script is + // running - it is fine to temporarily have version mismatch when we are running an update script if(!VersionsMatch()) { - elog(ERROR, + elog(WARNING, "Attempting to insert into lantern index, but the SQL version and binary version do not match. This can " "cause errors. Please run `ALTER EXTENSION lantern UPDATE and reconnect"); } diff --git a/src/hnsw/options.c b/src/hnsw/options.c index 354864c51..f839b4baa 100644 --- a/src/hnsw/options.c +++ b/src/hnsw/options.c @@ -146,12 +146,9 @@ void _PG_init(void) "Make sure to restart the server before running ALTER EXTENSION lantern UPDATE"); } - if(!VersionsMatch()) { - elog( - WARNING, - "LanternDB binary version does not match the version in SQL. This can cause errors as the two APIs may " - "differ. Please run `ALTER EXTENSION lantern UPDATE` and reconnect before attempting to work with indices"); - } + // Print a warning with helpful info + (void)VersionsMatch(); + original_post_parse_analyze_hook = post_parse_analyze_hook; original_ExecutorStart_hook = ExecutorStart_hook; diff --git a/src/hnsw/utils.c b/src/hnsw/utils.c index 819cb3cda..03ccf1878 100644 --- a/src/hnsw/utils.c +++ b/src/hnsw/utils.c @@ -177,6 +177,17 @@ bool VersionsMatch() version_checked = true; SPI_finish(); + + if(!versions_match) { + elog(WARNING, + "LanternDB binary version (%s) does not match the version in SQL (%s). This can cause errors as the " + "two " + "APIs may " + "differ. Please run `ALTER EXTENSION lantern UPDATE` and reconnect before attempting to work with " + "indices", + LDB_BINARY_VERSION, + version); + } return versions_match; } }