diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f6f61fd..b8b0df9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -38,12 +38,12 @@ jobs: strategy: matrix: version: ["14", "15", "16", "17"] - runner: ["ubicloud-standard-4", "ubicloud-standard-4-arm"] + runner: ["ubuntu-22.04", "ubuntu-22.04-arm"] env: PGRX_IMAGE: "ghcr.io/tensorchord/vectorchord-pgrx:0.12.9-nightly-2024-12-25" SEMVER: ${{ needs.semver.outputs.SEMVER }} - ARCH: ${{ matrix.runner == 'ubicloud-standard-4' && 'x86_64' || 'aarch64' }} - PLATFORM: ${{ matrix.runner == 'ubicloud-standard-4' && 'amd64' || 'arm64' }} + ARCH: ${{ matrix.runner == 'ubuntu-22.04' && 'x86_64' || 'aarch64' }} + PLATFORM: ${{ matrix.runner == 'ubuntu-22.04' && 'amd64' || 'arm64' }} steps: - uses: actions/checkout@v4 @@ -65,15 +65,17 @@ jobs: - name: Build env: - VERSION: ${{ matrix.version }} GH_TOKEN: ${{ github.token }} run: | - docker run --rm -v .:/workspace $CACHE_ENVS $PGRX_IMAGE cargo build --lib --features pg$VERSION --release - docker run --rm -v .:/workspace $CACHE_ENVS -e SEMVER=${SEMVER} $PGRX_IMAGE ./tools/schema.sh --features pg$VERSION --release - ./tools/package.sh + docker run --rm -v .:/workspace $CACHE_ENVS \ + -e SEMVER=$SEMVER \ + -e VERSION=${{ matrix.version }} \ + -e ARCH=$ARCH \ + -e PLATFORM=$PLATFORM \ + $PGRX_IMAGE ./tools/package.sh ls ./build - gh release upload --clobber $SEMVER ./build/vchord-bm25-pg${VERSION}_${SEMVER}_${PLATFORM}.deb - gh release upload --clobber $SEMVER ./build/vchord-bm25-pg${VERSION}_${ARCH}-unknown-linux-gnu_${SEMVER}.zip + gh release upload --clobber $SEMVER ./build/postgresql-${{ matrix.version }}-vchord-bm25_${SEMVER}-1_${PLATFORM}.deb + gh release upload --clobber $SEMVER ./build/postgresql-${{ matrix.version }}-vchord-bm25_${SEMVER}_${ARCH}-linux-gnu.zip docker: runs-on: ubuntu-latest @@ -94,7 +96,7 @@ jobs: run: | mkdir -p build for arch in amd64 arm64; do - gh release download $SEMVER --pattern "vchord-bm25-pg${{ matrix.version }}_${SEMVER}_${arch}.deb" --output ./build/vchord-bm25-pg${{ matrix.version }}_${SEMVER}_${arch}.deb + gh release download $SEMVER --pattern "postgresql-${{ matrix.version }}-vchord-bm25_${SEMVER}-1_${arch}.deb" --output ./build/postgresql-${{ matrix.version }}-vchord-bm25_${SEMVER}-1_${arch}.deb done - name: Set up QEMU uses: docker/setup-qemu-action@v3 @@ -137,4 +139,4 @@ jobs: build-args: | PG_VERSION=${{ matrix.version }} SEMVER=${{ env.SEMVER }} - PGVECTOR=0.8.0 + VCHORD_VERSION=0.2.0 diff --git a/Cargo.toml b/Cargo.toml index e2cc114..5309408 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ bytemuck = "1.18" generator = "0.8.4" lazy_static = "1.5" lending-iterator = "0.1.7" -pgrx = { version = "=0.12.8", default-features = false, features = ["cshim"] } +pgrx = { version = "=0.12.9", default-features = false, features = ["cshim"] } regex = "1.11.1" rust-stemmers = { git = "https://github.com/tensorchord/rust-stemmers.git" } serde = { version = "1.0.217", features = ["derive"] } diff --git a/docker/Dockerfile b/docker/Dockerfile index fdbdd13..7dd589f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,14 +1,14 @@ ARG PG_VERSION=17 -ARG PGVECTOR=0.8.0 +ARG VCHORD_VERSION=0.2.0 -FROM pgvector/pgvector:${PGVECTOR}-pg${PG_VERSION} +FROM tensorchord/vchord-postgres:pg${PG_VERSION}-v${VCHORD_VERSION} ARG PG_VERSION ARG SEMVER=0.0.0 ARG TARGETARCH RUN echo ${PG_VERSION} -COPY ./build/vchord-bm25-pg${PG_VERSION}_${SEMVER}_${TARGETARCH}.deb /tmp/vchord-bm25.deb +COPY ./build/postgresql-${PG_VERSION}-vchord-bm25_${SEMVER}-1_${TARGETARCH}.deb /tmp/vchord-bm25.deb RUN apt-get install -y /tmp/vchord-bm25.deb && rm -f /tmp/vchord-bm25.deb -CMD ["postgres", "-c" ,"shared_preload_libraries=vchord_bm25.so"] \ No newline at end of file +CMD ["postgres", "-c" ,"shared_preload_libraries=vchord.so,vchord_bm25.so"] \ No newline at end of file diff --git a/docker/binary.Dockerfile b/docker/binary.Dockerfile index 0085154..3f1ae52 100644 --- a/docker/binary.Dockerfile +++ b/docker/binary.Dockerfile @@ -5,4 +5,4 @@ ARG PG_VERSION ARG TARGETARCH WORKDIR /workspace -COPY ./build/vchord-bm25-pg${PG_VERSION}_${SEMVER}_${TARGETARCH}.deb /workspace/ \ No newline at end of file +COPY ./build/postgresql-${PG_VERSION}-vchord-bm25_${SEMVER}-1_${TARGETARCH}.deb /workspace/ \ No newline at end of file diff --git a/tools/package.sh b/tools/package.sh index faf08e5..6500902 100644 --- a/tools/package.sh +++ b/tools/package.sh @@ -6,39 +6,40 @@ printf "VERSION = ${VERSION}\n" printf "ARCH = ${ARCH}\n" printf "PLATFORM = ${PLATFORM}\n" -rm -rf ./build/dir_zip -rm -rf ./build/vchord-bm25-pg${VERSION}_${ARCH}-unknown-linux-gnu_${SEMVER}.zip -rm -rf ./build/dir_deb -rm -rf ./build/vchord-bm25-pg${VERSION}_${SEMVER}_${PLATFORM}.deb +cargo build --lib --features pg$VERSION --release +cargo pgrx schema --features pg$VERSION --out ./target/schema.sql -mkdir -p ./build/dir_zip -cp ./target/release/schema.sql ./build/dir_zip/vchord_bm25--$SEMVER.sql -sed -e "s/@CARGO_VERSION@/$SEMVER/g" < ./vchord_bm25.control > ./build/dir_zip/vchord_bm25.control -cp ./target/release/libvchord_bm25.so ./build/dir_zip/vchord_bm25.so -zip ./build/vchord-bm25-pg${VERSION}_${ARCH}-unknown-linux-gnu_${SEMVER}.zip -j ./build/dir_zip/* +rm -rf ./build -mkdir -p ./build/dir_deb -mkdir -p ./build/dir_deb/DEBIAN -mkdir -p ./build/dir_deb/usr/share/postgresql/$VERSION/extension/ -mkdir -p ./build/dir_deb/usr/lib/postgresql/$VERSION/lib/ -for file in $(ls ./build/dir_zip/*.sql | xargs -n 1 basename); do - cp ./build/dir_zip/$file ./build/dir_deb/usr/share/postgresql/$VERSION/extension/$file +mkdir -p ./build/zip +[[ -d ./sql/upgrade ]] && cp -a ./sql/upgrade/. ./build/zip/ +cp ./target/schema.sql ./build/zip/vchord_bm25--$SEMVER.sql +sed -e "s/@CARGO_VERSION@/$SEMVER/g" < ./vchord_bm25.control > ./build/zip/vchord_bm25.control +cp ./target/release/libvchord_bm25.so ./build/zip/vchord_bm25.so +zip ./build/postgresql-${VERSION}-vchord-bm25_${SEMVER}_${ARCH}-linux-gnu.zip -j ./build/zip/* + +mkdir -p ./build/deb +mkdir -p ./build/deb/DEBIAN +mkdir -p ./build/deb/usr/share/postgresql/$VERSION/extension/ +mkdir -p ./build/deb/usr/lib/postgresql/$VERSION/lib/ +for file in $(ls ./build/zip/*.sql | xargs -n 1 basename); do + cp ./build/zip/$file ./build/deb/usr/share/postgresql/$VERSION/extension/$file done -for file in $(ls ./build/dir_zip/*.control | xargs -n 1 basename); do - cp ./build/dir_zip/$file ./build/dir_deb/usr/share/postgresql/$VERSION/extension/$file +for file in $(ls ./build/zip/*.control | xargs -n 1 basename); do + cp ./build/zip/$file ./build/deb/usr/share/postgresql/$VERSION/extension/$file done -for file in $(ls ./build/dir_zip/*.so | xargs -n 1 basename); do - cp ./build/dir_zip/$file ./build/dir_deb/usr/lib/postgresql/$VERSION/lib/$file +for file in $(ls ./build/zip/*.so | xargs -n 1 basename); do + cp ./build/zip/$file ./build/deb/usr/lib/postgresql/$VERSION/lib/$file done -echo "Package: vchord-bm25-pg${VERSION} +echo "Package: postgresql-${VERSION}-vchord-bm25 Version: ${SEMVER} Section: database Priority: optional Architecture: ${PLATFORM} Maintainer: Tensorchord -Description: Vector database plugin for Postgres, written in Rust, specifically designed for LLM +Description: Native BM25 Ranking Index in PostgreSQL Homepage: https://github.com/tensorchord/VectorChord-bm25/ -License: AGPLv3 or ELv2" \ -> ./build/dir_deb/DEBIAN/control -(cd ./build/dir_deb && md5sum usr/share/postgresql/$VERSION/extension/* usr/lib/postgresql/$VERSION/lib/*) > ./build/dir_deb/DEBIAN/md5sums -dpkg-deb -Zxz --build ./build/dir_deb/ ./build/vchord-bm25-pg${VERSION}_${SEMVER}_${PLATFORM}.deb \ No newline at end of file +License: AGPL-3.0-only or Elastic-2.0" \ +> ./build/deb/DEBIAN/control +(cd ./build/deb && md5sum usr/share/postgresql/$VERSION/extension/* usr/lib/postgresql/$VERSION/lib/*) > ./build/deb/DEBIAN/md5sums +dpkg-deb --root-owner-group -Zxz --build ./build/deb/ ./build/postgresql-${VERSION}-vchord-bm25_${SEMVER}-1_${PLATFORM}.deb diff --git a/tools/pg_config.sh b/tools/pg_config.sh deleted file mode 100755 index 169e1d8..0000000 --- a/tools/pg_config.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash -set -e - -source=$(cat -) - -if [ -z "$source" ]; then - echo "pg_config: could't find configuration file" - exit 1 -fi - -for arg in "$@"; do - if [ "$arg" = "--help" ] || [ "$arg" = "-?" ]; then - cat <. -PostgreSQL home page: -EOF - exit 0 - fi -done - -if [ $# -eq 0 ]; then - echo "$source" - exit 0 -fi - -for arg in "$@"; do - res="" - - if [[ "$arg" == --* ]]; then - var=$(echo "$arg" | cut -c 3- | tr '[:lower:]' '[:upper:]') - res=$(printf "%s" "$source" | grep -E "^$var = " - | cut -d "=" -f "2-") - fi - - if [ -z "$res" ]; then - echo "pg_config: invalid argument: $arg" - echo "Try "pg_config --help" for more information." - exit 1 - fi - - echo $res -done \ No newline at end of file diff --git a/tools/schema-codegen.sh b/tools/schema-codegen.sh deleted file mode 100644 index 9fc5ee6..0000000 --- a/tools/schema-codegen.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash -set -e - -# CONTROL_FILEPATH -# SO_FILEPATH - -printf "fn main() {\n" - -cat << EOF - vchord_bm25::__pgrx_marker(); - - let mut entities = Vec::new(); - let control_file_path = std::path::PathBuf::from("$CONTROL_FILEPATH"); - let control_file = ::pgrx::pgrx_sql_entity_graph::ControlFile::try_from(control_file_path).expect(".control file should properly formatted"); - let control_file_entity = ::pgrx::pgrx_sql_entity_graph::SqlGraphEntity::ExtensionRoot(control_file); - - entities.push(control_file_entity); -EOF - -while read -r name_ident; do -cat << EOF - extern "Rust" { - fn $name_ident() -> ::pgrx::pgrx_sql_entity_graph::SqlGraphEntity; - } - let entity = unsafe { $name_ident() }; - entities.push(entity); -EOF -done <<< $(nm -D -g $SO_FILEPATH | grep "T __pgrx_internals_" | awk '{print $3}') - -cat << EOF - let pgrx_sql = ::pgrx::pgrx_sql_entity_graph::PgrxSql::build( - entities.into_iter(), - "vchord_bm25".to_string(), - false, - ) - .expect("SQL generation error"); - eprintln!("SQL entities to {}", "/dev/stdout",); - pgrx_sql - .write(&mut std::io::stdout()) - .expect("Could not write SQL to stdout"); -EOF - -printf "}\n" \ No newline at end of file diff --git a/tools/schema.sh b/tools/schema.sh deleted file mode 100644 index dad6745..0000000 --- a/tools/schema.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash -set -e -if [[ " $@ " =~ --target' '([^ ]+) ]]; then - TARGET="${BASH_REMATCH[1]}" - if [[ " $@ " =~ " --release " ]]; then - DIR="./target/$TARGET/release" - else - DIR="./target/$TARGET/debug" - fi -else - if [[ " $@ " =~ " --release " ]]; then - DIR="./target/release" - else - DIR="./target/debug" - fi -fi - -if [ "$TARGET" = "" ]; then - printf "Target: [not specified]\n" 1>&2 - RUNNER=() -elif [ "$TARGET" = $(rustc -vV | awk '/^host/ { print $2 }') ]; then - printf "Target: [host]\n" 1>&2 - RUNNER=() -elif [ "$TARGET" = "aarch64-unknown-linux-gnu" ]; then - printf "Target: $TARGET\n" 1>&2 - QEMU_LD_PREFIX="/usr/aarch64-linux-gnu" - RUNNER=("qemu-aarch64-static") -elif [ "$TARGET" = "riscv64gc-unknown-linux-gnu" ]; then - printf "Target: $TARGET\n" 1>&2 - QEMU_LD_PREFIX="/usr/riscv64-linux-gnu" - RUNNER=("qemu-riscv64-static") -else - printf "Unknown target: $TARGET\n" 1>&2 - exit 1 -fi - -code=$(mktemp) -chmod 700 $code -CONTROL_FILEPATH="./vchord_bm25.control" SO_FILEPATH="$DIR/libvchord_bm25.so" $(dirname "$0")/schema-codegen.sh >> $code - -PGRX_EMBED=$code cargo rustc --package vchord_bm25 --bin pgrx_embed_vchord_bm25 "$@" -- --cfg pgrx_embed - -CARGO_PKG_VERSION=${SEMVER} QEMU_LD_PREFIX=$QEMU_LD_PREFIX "${RUNNER[@]}" "$DIR/pgrx_embed_vchord_bm25" | expand -t 4 > $DIR/schema.sql \ No newline at end of file