Skip to content

Commit

Permalink
Release 0.0.12 (#265)
Browse files Browse the repository at this point in the history
Do a bunch of small updates and fixes
* Improve docker instructions
* Update clang format required version
* Require cpp17
* Suppress autovectorization failure warning on clang
* Allow non-root installs in dev dockerfile
* Allow update script to run even when git fetch is not available
* Deprecate unsupported updates
* Allow update to 'latest' even when corresponding tag is missing
* Rename latest update script according to the new convention
* Actually fix "latest" testing mechanism
* Add support to "latest" in Version class
* Add Release ID that defaults to latest. Must be set explicitly when doing a release
* Improve version mismatch error message
* Use "latest" as version name when building locally
* Add Release preparation instructions
* Make sure ci/cd follows release prep instructions
* Reorganize dev Dockerfile to allow non-root extension installs
* Make update script more robust
* Improve build parallelism in update tests
* Add version mismatch check to build.c
* Remove temporary files to make sure ALTER EXTENSION is run in update
* Temporarily disable erroring out from inserts in case of version mismatch to allow update scripts that rebuild the index
* Document version mismatch test  status
  • Loading branch information
Ngalstyan4 authored Jan 24, 2024
1 parent 7b66765 commit 02c653f
Show file tree
Hide file tree
Showing 14 changed files with 170 additions and 61 deletions.
38 changes: 30 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
cmake_minimum_required(VERSION 3.3)

set(LANTERNDB_VERSION 0.0.11)
set(LANTERN_VERSION 0.0.12)

project(
LanternDB
VERSION ${LANTERNDB_VERSION}
VERSION ${LANTERN_VERSION}
LANGUAGES C CXX)

if (POLICY CMP0074)
Expand All @@ -18,7 +18,9 @@ if(POLICY CMP0077)
cmake_policy(SET CMP0077 NEW)
endif()

# OPTIONS
set(RELEASE_ID "latest" CACHE STRING "Release ID placed in the binary. Must be set externally when doing a release")

option(BUILD_FOR_DISTRIBUTING "Build LANTERN_VERSION info into the binary" OFF)
option(MARCH_NATIVE "Build assuming the presence of all the features in the current CPU model" OFF)
option(USEARCH_USE_SIMSIMD "Build usearch with SIMSIMD" OFF)

Expand All @@ -28,6 +30,10 @@ option(BENCH "Enable benchmarking" OFF)
option(FAILURE_POINTS "Enable failure points" ON)
option(BUILD_C_TESTS "Build C client tests" OFF)

if (${BUILD_FOR_DISTRIBUTING})
set(RELEASE_ID ${LANTERN_VERSION})
endif()

if(CODECOVERAGE)
message(STATUS "Code coverage is enabled.")
# Note that --coverage is synonym for the necessary compiler and linker flags
Expand Down Expand Up @@ -58,7 +64,7 @@ if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
message(STATUS "${CMAKE_COLOR_GREEN}Build type: ${CMAKE_BUILD_TYPE}${CMAKE_COLOR_RESET}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-conversion -Wno-unknown-pragmas")

find_package(PostgreSQL REQUIRED)
Expand Down Expand Up @@ -146,6 +152,13 @@ endforeach()
if(APPLE)
set(_link_flags "${_link_flags} -bundle_loader ${PG_BINARY} -undefined dynamic_lookup")
endif()
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
# suppress warnings from autovectorization failures such as:
# loop not vectorized: the optimizer was unable to perform the
# requested transformation; the transformation might be disabled
# or specified as part of an unsupported transformation ordering [-Wpass-failed=transform-warning]
target_compile_options(lantern PRIVATE -Wno-pass-failed)
endif()

set_target_properties(
lantern
Expand Down Expand Up @@ -204,9 +217,8 @@ if (${LANTERNDB_COPYNODES})
target_compile_definitions(lantern PRIVATE LANTERNDB_COPYNODES)
endif()

set(_script_file "lantern--${LANTERNDB_VERSION}.sql")
set(_script_file "lantern--${RELEASE_ID}.sql")
set (_update_files
sql/updates/0.0.4--0.0.5.sql
sql/updates/0.0.5--0.0.6.sql
sql/updates/0.0.6--0.0.7.sql
sql/updates/0.0.7--0.0.8.sql
Expand All @@ -216,6 +228,16 @@ set (_update_files
sql/updates/0.0.11--0.0.12.sql
)

# Generate version information for the binary
EXECUTE_PROCESS(
COMMAND git log -1 --format=%h
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
OUTPUT_VARIABLE GIT_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE
)
# OPTIONS
set(BUILD_ID "latest-${GIT_HASH}")

add_custom_command(
OUTPUT ${CMAKE_BINARY_DIR}/${_script_file}
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/sql/lantern.sql ${CMAKE_BINARY_DIR}/${_script_file}
Expand Down Expand Up @@ -311,8 +333,8 @@ if (CLANG_FORMAT)

string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" CLANG_FORMAT_VERSION "${CLANG_FORMAT_VERSION}")

if(CLANG_FORMAT_VERSION VERSION_LESS 14)
message(WARNING "clang-format version ${CLANG_FORMAT_VERSION} found, need at least 14")
if(CLANG_FORMAT_VERSION VERSION_LESS 13)
message(WARNING "clang-format version ${CLANG_FORMAT_VERSION} found, need at least 13")
set(CLANG_FORMAT OFF)
endif()
endif()
Expand Down
9 changes: 9 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,12 @@ git clone https://git.postgresql.org/git/postgresql.git
# release head only
git clone --single-branch --branch REL_15_STABLE https://git.postgresql.org/git/postgresql.git --depth=1
```

## Preparing a release

1. Update LANTERN_VERSION variable at the top of CMakeLists.txt file
2. Prepare the SQL update script for the release
1. If there already is an update script for the current release with a 'latest' suffix, rename it according to the version name being released
2. If there is no such file, create an empty update file for the current release
3. Build the project with `cmake -DBUILD_FOR_DISTRIBUTING=YES` that will embed cmake version number into the binary.
Alternatively, if you want to embed a different version name into the binary, build with -DRELEASE_ID=\[version name\] where the version name is the name of the release and the name used in update file above
29 changes: 18 additions & 11 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ARG VERSION=15
ARG PGVECTOR_VERSION=0.5.0
ARG PGVECTOR_VERSION=0.5.1

# If you want to build the base image for different versions
# Refer to the base image Dockerfile here https://github.com/var77/postgres-docker-debug
Expand All @@ -17,28 +17,35 @@ WORKDIR /lantern

USER root
ENV DEBIAN_FRONTEND=noninteractive
RUN apt update && apt install curl -y && pip install GitPython libtmux lcov libpq5 && \
wget -O pgvector.tar.gz https://github.com/pgvector/pgvector/archive/refs/tags/v${PGVECTOR_VERSION}.tar.gz && \
RUN apt update && apt install -y curl lcov libpq5

# allow non-root users to install in the container to make it easier to run update-tests
RUN chmod 777 /usr/local/pgsql/lib/ /usr/local/pgsql/share/extension/ /usr/local/pgsql/include/server/

USER postgres

RUN pip install GitPython libtmux

# Build & Install pgvector
RUN wget -O pgvector.tar.gz https://github.com/pgvector/pgvector/archive/refs/tags/v${PGVECTOR_VERSION}.tar.gz && \
tar xzf pgvector.tar.gz && \
cd pgvector-${PGVECTOR_VERSION} && \
make && make install
make -j && make install

COPY . .

# Build lantern
RUN rm -rf build \
# Build & Install lantern
RUN sudo rm -rf build \
&& mkdir build \
&& cd build \
&& cmake -DCMAKE_BUILD_TYPE=Debug -DBUILD_C_TESTS=ON .. \
&& make install
&& make -j install

# Install benchmarking tools in build folder
RUN mkdir build/lantern \
&& git clone https://github.com/lanterndata/benchmark build/benchmark \
RUN git clone https://github.com/lanterndata/benchmark build/benchmark \
&& cd build/benchmark \
&& pip install -r core/requirements.txt \
&& pip install -r external/requirements.txt

ENV DATABASE_URL=postgres://postgres@localhost:5432/postgres
ENV LANTERN_DATABASE_URL=postgres://postgres@localhost:5432/postgres

USER postgres
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ Lantern builds and uses [usearch](https://github.com/unum-cloud/usearch), a sing
If you don’t have PostgreSQL already, use Lantern with [Docker](https://hub.docker.com/r/lanterndata/lantern) to get started quickly:

```bash
docker run -p 5432:5432 -e 'POSTGRES_PASSWORD=postgres' lanterndata/lantern:latest-pg15
docker run -p 5432:5432 -e "POSTGRES_USER=$USER" 'POSTGRES_PASSWORD=postgres' lanterndata/lantern:latest-pg15
```

To install Lantern from source on top of PostgreSQL:
Then, you can connect to the database via `postgresql://$USER:postgres@localhost/postgres`.

To install Lantern from source on top of your existing PostgreSQL:

```
git clone --recursive https://github.com/lanterndata/lantern.git
Expand Down
2 changes: 1 addition & 1 deletion ci/scripts/build-docker.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

get_cmake_flags(){
echo "-DMARCH_NATIVE=OFF"
echo "-DBUILD_FOR_DISTRIBUTING=YES -DMARCH_NATIVE=OFF"
}

export DEBIAN_FRONTEND=noninteractive
Expand Down
2 changes: 1 addition & 1 deletion ci/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ function build_and_install() {
mkdir build
cd build

flags="-DMARCH_NATIVE=OFF"
flags="-DBUILD_FOR_DISTRIBUTING=YES -DMARCH_NATIVE=OFF"

# Treat warnings as errors in CI/CD
flags+=" -DCMAKE_COMPILE_WARNING_AS_ERROR=ON"
Expand Down
2 changes: 1 addition & 1 deletion cmake/lantern.control.template
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
comment = 'Lantern: Fast vector embedding processing in Postgres'
default_version = '@LANTERNDB_VERSION@'
default_version = '@RELEASE_ID@'
module_pathname = '$libdir/lantern'
relocatable = false
3 changes: 2 additions & 1 deletion cmake/version.h.template
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef LDB_HNSW_VERSION_H
#define LDB_HNSW_VERSION_H

#define LDB_BINARY_VERSION "@LANTERNDB_VERSION@"
#define LDB_BINARY_VERSION "@RELEASE_ID@"
#define LDB_BUILD_ID "@BUILD_ID@"

#endif
108 changes: 79 additions & 29 deletions scripts/test_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,31 @@
import getpass
import git
import os
from functools import cmp_to_key


INCOMPATIBLE_VERSIONS = {
'16': ['0.0.4']
}
# placeholder used in sql update scripts as the next release version
LATEST="latest"

class Version:
def __init__(self, version: str):
self.latest = False
self.version = version
if version == LATEST:
self.latest = True
return

self.version_numbers = [int(n) for n in version.split('.')]
def __lt__(self, other):
if self.latest:
return False
if other.latest:
return True
for i, v in enumerate(self.version_numbers):
if v < other.version_numbers[i]:
return True
return False
def __eq__(self, other):
if self.latest or other.latest:
return self.latest == other.latest
for i, v in enumerate(self.version_numbers):
if v != other.version_numbers[i]:
return False
Expand All @@ -31,6 +40,14 @@ def __gt__(self, other):
return not self == other and not self < other
def __ge__(self, other):
return not self < other
def __str__(self):
return self.version
def __repr__(self):
return self.version

INCOMPATIBLE_VERSIONS = {
'16': [Version('0.0.4')]
}

def shell(cmd, exit_on_error=True):
res = subprocess.run(cmd, shell=True)
Expand All @@ -44,18 +61,40 @@ def shell(cmd, exit_on_error=True):
print("ERROR on command", cmd)


# Make sure lantern can smoothly be updated from from_version to to_version
# the function installs the DB at from_version, runs an upgrade via ALTER EXTENSION ... UPDATE
# and runs the test suit on the resulting DB
# Note: from_version must be a valid tag on the repo that has a corresponding release and SQL migration script
# to_version must be the value LATEST or follow the requirements above
def update_from_tag(from_version: str, to_version: str):
from_tag = "v" + from_version
repo = git.Repo(search_parent_directories=True)
sha_before = repo.head.object.hexsha
print(repo.remotes)
repo.remotes[0].fetch()
to_sha = repo.head.object.hexsha

if to_version != LATEST:
to_tag = "v" + to_version
tag_names = [tag.name for tag in repo.tags]
if to_tag in tag_names:
to_sha = to_tag
else:
print(f"WARNING: to_version=${to_version} has not corresponding tag. assuming current HEAD corresponds to that version")

try:
repo.remotes[0].fetch()
except Exception as e:
# fetching does not work in the dev dockerfile but it does not need to,
# since we are testing the updates on the local repo
if not "error: cannot run ssh" in str(e):
raise Exception(f"unknown fetch error: {e}")


repo.git.checkout(from_tag)
sha_after = repo.head.object.hexsha
print(f"Updating from tag {from_tag}(sha: {sha_after}) to {to_version}")

# run "mkdir build && cd build && cmake .. && make -j4 && make install"
res = shell(f"mkdir -p {args.builddir} ; cd {args.builddir} && git submodule update --recursive && cmake .. && make -j4 && make install")
res = shell(f"mkdir -p {args.builddir} ; cd {args.builddir} && git submodule update --init --recursive && cmake -DRELEASE_ID={from_version} .. && make -j install")

res = shell(f"psql postgres -U {args.user} -c 'DROP DATABASE IF EXISTS {args.db};'")
res = shell(f"psql postgres -U {args.user} -c 'CREATE DATABASE {args.db};'")
Expand All @@ -76,13 +115,17 @@ def update_from_tag(from_version: str, to_version: str):
# initialize misc tests to ensure that version mismatch results in an error
res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={from_version} make test-misc FILTER=begin")

repo.git.checkout(sha_before)
res = shell(f"cd {args.builddir} ; git submodule update --recursive && cmake .. && make -j4 && make install")
# res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={to_version} make test")
if Version(from_version) > Version('0.0.11'):
repo.git.checkout(to_sha)
res = shell(f"cd {args.builddir} ; git submodule update --init --recursive && cmake -DRELEASE_ID={to_version} .. && make -j install")

# todo:: currently version mismatch logic only prints a warning and not an error
# we need to teach the version matching function when an update script vs client script is running for proper error enforcement
if Version(from_version) > Version('0.1.1'):
res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={from_version} make test-misc FILTER=version_mismatch")

# run the actual parallel tests after the upgrade
res = shell('rm -f /tmp/ldb_update.lock')
res = shell('rm -f /tmp/ldb_update_finished')
res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={to_version} make test-parallel EXCLUDE=begin")

print(f"Update {from_version}->{to_version} Success!")
Expand All @@ -93,11 +136,6 @@ def incompatible_version(pg_version, version_tag):
return False
return version_tag in INCOMPATIBLE_VERSIONS[pg_version]

def sort_versions(v1, v2):
a = int(v1.replace('.', ''))
b = int(v2.replace('.', ''))

return a - b

if __name__ == "__main__":

Expand Down Expand Up @@ -127,25 +165,37 @@ def sort_versions(v1, v2):
exit(1)

# test updates from all tags
tag_pairs = [update_fname.split("--") for update_fname in os.listdir("sql/updates")]
tag_pairs = [(from_tag, to_tag.split('.sql')[0]) for from_tag, to_tag in tag_pairs]
version_pairs = [update_fname.split("--") for update_fname in os.listdir("sql/updates")]
version_pairs = [(from_version, to_version.split('.sql')[0]) for from_version, to_version in version_pairs]
repo = git.Repo(search_parent_directories=True)
tags_actual = [tag.name for tag in repo.tags]
tags_actual = [name[1:] if name[0] == 'v' else name for name in tags_actual]
tag_pairs = [(from_tag, to_tag) for from_tag, to_tag in tag_pairs if from_tag in tags_actual and to_tag in tags_actual]
from_tags = list(sorted([p[0] for p in tag_pairs], key=cmp_to_key(sort_versions)))
from_tags.reverse()
to_tags = list(sorted([p[1] for p in tag_pairs], key=cmp_to_key(sort_versions)))

if len(to_tags) > 0:
latest_version = to_tags[-1]
print("Updating from tags", from_tags, "to ", latest_version)

version_pairs = [(from_v, to_v) for from_v, to_v in version_pairs]
from_versions = list(sorted([Version(p[0]) for p in version_pairs]))
from_versions.reverse()
to_versions = list(sorted([Version(p[1]) for p in version_pairs]))
for from_v in from_versions:
assert(str(from_v) in tags_actual)

num_untagged = 0
for to_v in to_versions:
if num_untagged != 0:
print(f"${to_v}, ${tags_actual}")
# only the last to_v may be untagged (when the release has not happened yet)
assert(num_untagged == 0)
if str(to_v) not in tags_actual:
num_untagged += 1

if len(to_versions) > 0:
latest_version = to_versions[-1]
print("Updating from tags", from_versions, "to ", latest_version)

pg_version = None if not 'PG_VERSION' in os.environ else os.environ['PG_VERSION']
for from_tag in from_tags:
for from_tag in from_versions:
if incompatible_version(pg_version, from_tag):
continue
update_from_tag(from_tag, latest_version)
update_from_tag(str(from_tag), str(latest_version))



Expand Down
File renamed without changes.
Loading

0 comments on commit 02c653f

Please sign in to comment.