Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a duckdb extension to vortex #2610

Merged
merged 12 commits into from
Mar 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[submodule "duckdb"]
path = duckdb/duckdb
url = https://github.com/duckdb/duckdb
[submodule "extension-ci-tools"]
path = duckdb/extension-ci-tools
url = https://github.com/duckdb/extension-ci-tools
1 change: 1 addition & 0 deletions duckdb-vortex/.clang-format
1 change: 1 addition & 0 deletions duckdb-vortex/.clang-tidy
1 change: 1 addition & 0 deletions duckdb-vortex/.editorconfig
29 changes: 29 additions & 0 deletions duckdb-vortex/.github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#
# This workflow calls the main distribution pipeline from DuckDB to build, test and (optionally) release the extension
#
name: Main Extension Distribution Pipeline
on:
push:
pull_request:
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
cancel-in-progress: true

jobs:
duckdb-next-build:
name: Build extension binaries
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
with:
duckdb_version: main
ci_tools_version: main
extension_name: vortex_duckdb

duckdb-stable-build:
name: Build extension binaries
uses: duckdb/extension-ci-tools/.github/workflows/[email protected]
with:
duckdb_version: v1.2.0
ci_tools_version: v1.2.0
extension_name: vortex_duckdb
12 changes: 12 additions & 0 deletions duckdb-vortex/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
build
.idea
cmake-build-debug
duckdb_unittest_tempdir/
.DS_Store
testext
test/python/__pycache__/
.Rhistory

# Keep CMAKE

!CMakeLists.txt
30 changes: 30 additions & 0 deletions duckdb-vortex/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
cmake_minimum_required(VERSION 3.5)

# Set extension name here
set(TARGET_NAME vortex_duckdb)

# DuckDB's extension distribution supports vcpkg. As such, dependencies can be added in ./vcpkg.json and then
# used in cmake with find_package. Feel free to remove or replace with other dependencies.
# Note that it should also be removed from vcpkg.json to prevent needlessly installing it..
find_package(OpenSSL REQUIRED)

set(EXTENSION_NAME ${TARGET_NAME}_extension)
set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)

project(${TARGET_NAME})
include_directories(src/include)

set(EXTENSION_SOURCES src/vortex_duckdb_extension.cpp)

build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})

# Link OpenSSL in both the static library as the loadable extension
target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto)
target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto)

install(
TARGETS ${EXTENSION_NAME}
EXPORT "${DUCKDB_EXPORT_SET}"
LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
ARCHIVE DESTINATION "${INSTALL_LIB_DIR}")
7 changes: 7 additions & 0 deletions duckdb-vortex/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Copyright 2018-2025 Stichting DuckDB Foundation

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 changes: 8 additions & 0 deletions duckdb-vortex/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))

# Configuration of extension
EXT_NAME=vortex_duckdb
EXT_CONFIG=${PROJ_DIR}extension_config.cmake

# Include the Makefile from extension-ci-tools
include extension-ci-tools/makefiles/duckdb_extension.Makefile
124 changes: 124 additions & 0 deletions duckdb-vortex/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# VortexDuckdb

This repository is based on https://github.com/duckdb/extension-template, check it out if you want to build and ship
your own DuckDB extension.

---

This extension, VortexDuckdb, allow you to ... <extension_goal>.

## Building

### Install required system dependencies

#### MacOS

```shell
brew install pkg-config
```

### Managing dependencies

DuckDB extensions uses VCPKG for dependency management. Enabling VCPKG is very simple: follow
the [installation instructions](https://vcpkg.io/en/getting-started) or just run the following:

```shell
git clone https://github.com/Microsoft/vcpkg.git
./vcpkg/bootstrap-vcpkg.sh
export VCPKG_TOOLCHAIN_PATH=`pwd`/vcpkg/scripts/buildsystems/vcpkg.cmake
```

Note: VCPKG is only required for extensions that want to rely on it for dependency management. If you want to develop an
extension without dependencies, or want to do your own dependency management, just skip this step. Note that the example
extension uses VCPKG to build with a dependency for instructive purposes, so when skipping this step the build may not
work without removing the dependency.

### Build steps

Now to build the extension, run:

```sh
make
```

The main binaries that will be built are:

```sh
./build/release/duckdb
./build/release/test/unittest
./build/release/extension/vortex_duckdb/vortex_duckdb.duckdb_extension
```

- `duckdb` is the binary for the duckdb shell with the extension code automatically loaded.
- `unittest` is the test runner of duckdb. Again, the extension is already linked into the binary.
- `vortex_duckdb.duckdb_extension` is the loadable binary as it would be distributed.

## Running the extension

To run the extension code, simply start the shell with `./build/release/duckdb`.

Now we can use the features from the extension directly in DuckDB. The template contains a single scalar function
`vortex_duckdb()` that takes a string arguments and returns a string:

```
D select vortex_duckdb('Jane') as result;
┌───────────────┐
│ result │
│ varchar │
├───────────────┤
│ VortexDuckdb Jane 🐥 │
└───────────────┘
```

## Running the tests

Different tests can be created for DuckDB extensions. The primary way of testing DuckDB extensions should be the SQL
tests in `./test/sql`. These SQL tests can be run using:

```sh
make test
```

### Installing the deployed binaries

To install your extension binaries from S3, you will need to do two things. Firstly, DuckDB should be launched with the
`allow_unsigned_extensions` option set to true. How to set this will depend on the client you're using. Some examples:

CLI:

```shell
duckdb -unsigned
```

Python:

```python
con = duckdb.connect(':memory:', config={'allow_unsigned_extensions': 'true'})
```

NodeJS:

```js
db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"});
```

Secondly, you will need to set the repository endpoint in DuckDB to the HTTP url of your bucket + version of the
extension
you want to install. To do this run the following SQL query in DuckDB:

```sql
SET
custom_extension_repository='bucket.s3.eu-west-1.amazonaws.com/<your_extension_name>/latest';
```

Note that the `/latest` path will allow you to install the latest extension version available for your current version
of
DuckDB. To specify a specific version, you can pass the version instead.

After running these steps, you can install and load your extension using the regular INSTALL/LOAD commands in DuckDB:

```sql
INSTALL
vortex_duckdb
LOAD vortex_duckdb
```
23 changes: 23 additions & 0 deletions duckdb-vortex/docs/UPDATING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Extension updating
When cloning this template, the target version of DuckDB should be the latest stable release of DuckDB. However, there
will inevitably come a time when a new DuckDB is released and the extension repository needs updating. This process goes
as follows:

- Bump submodules
- `./duckdb` should be set to latest tagged release
- `./extension-ci-tools` should be set to updated branch corresponding to latest DuckDB release. So if you're building for DuckDB `v1.1.0` there will be a branch in `extension-ci-tools` named `v1.1.0` to which you should check out.
- Bump versions in `./github/workflows`
- `duckdb_version` input in `duckdb-stable-build` job in `MainDistributionPipeline.yml` should be set to latest tagged release
- `duckdb_version` input in `duckdb-stable-deploy` job in `MainDistributionPipeline.yml` should be set to latest tagged release
- the reusable workflow `duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml` for the `duckdb-stable-build` job should be set to latest tagged release

# API changes
DuckDB extensions built with this extension template are built against the internal C++ API of DuckDB. This API is not guaranteed to be stable.
What this means for extension development is that when updating your extensions DuckDB target version using the above steps, you may run into the fact that your extension no longer builds properly.

Currently, DuckDB does not (yet) provide a specific change log for these API changes, but it is generally not too hard to figure out what has changed.

For figuring out how and why the C++ API changed, we recommend using the following resources:
- DuckDB's [Release Notes](https://github.com/duckdb/duckdb/releases)
- DuckDB's history of [Core extension patches](https://github.com/duckdb/duckdb/commits/main/.github/patches/extensions)
- The git history of the relevant C++ Header file of the API that has changed
1 change: 1 addition & 0 deletions duckdb-vortex/duckdb
Submodule duckdb added at 5f5512
1 change: 1 addition & 0 deletions duckdb-vortex/extension-ci-tools
Submodule extension-ci-tools added at 58970c
10 changes: 10 additions & 0 deletions duckdb-vortex/extension_config.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# This file is included by DuckDB's build system. It specifies which extension to load

# Extension from this repo
duckdb_extension_load(vortex_duckdb
SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}
LOAD_TESTS
)

# Any extra extensions that should be built
# e.g.: duckdb_extension_load(json)
90 changes: 90 additions & 0 deletions duckdb-vortex/scripts/extension-upload.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/bin/bash

# Extension upload script

# Usage: ./extension-upload.sh <name> <extension_version> <duckdb_version> <architecture> <s3_bucket> <copy_to_latest> <copy_to_versioned>
# <name> : Name of the extension
# <extension_version> : Version (commit / version tag) of the extension
# <duckdb_version> : Version (commit / version tag) of DuckDB
# <architecture> : Architecture target of the extension binary
# <s3_bucket> : S3 bucket to upload to
# <copy_to_latest> : Set this as the latest version ("true" / "false", default: "false")
# <copy_to_versioned> : Set this as a versioned version that will prevent its deletion

set -e

if [[ $4 == wasm* ]]; then
ext="/tmp/extension/$1.duckdb_extension.wasm"
else
ext="/tmp/extension/$1.duckdb_extension"
fi

echo $ext

script_dir="$(dirname "$(readlink -f "$0")")"

# calculate SHA256 hash of extension binary
cat $ext > $ext.append

if [[ $4 == wasm* ]]; then
# 0 for custom section
# 113 in hex = 275 in decimal, total lenght of what follows (1 + 16 + 2 + 256)
# [1(continuation) + 0010011(payload) = \x93, 0(continuation) + 10(payload) = \x02]
echo -n -e '\x00' >> $ext.append
echo -n -e '\x93\x02' >> $ext.append
# 10 in hex = 16 in decimal, lenght of name, 1 byte
echo -n -e '\x10' >> $ext.append
echo -n -e 'duckdb_signature' >> $ext.append
# the name of the WebAssembly custom section, 16 bytes
# 100 in hex, 256 in decimal
# [1(continuation) + 0000000(payload) = ff, 0(continuation) + 10(payload)],
# for a grand total of 2 bytes
echo -n -e '\x80\x02' >> $ext.append
fi

# (Optionally) Sign binary
if [ "$DUCKDB_EXTENSION_SIGNING_PK" != "" ]; then
echo "$DUCKDB_EXTENSION_SIGNING_PK" > private.pem
$script_dir/../duckdb/scripts/compute-extension-hash.sh $ext.append > $ext.hash
openssl pkeyutl -sign -in $ext.hash -inkey private.pem -pkeyopt digest:sha256 -out $ext.sign
rm -f private.pem
fi

# Signature is always there, potentially defaulting to 256 zeros
truncate -s 256 $ext.sign

# append signature to extension binary
cat $ext.sign >> $ext.append

# compress extension binary
if [[ $4 == wasm_* ]]; then
brotli < $ext.append > "$ext.compressed"
else
gzip < $ext.append > "$ext.compressed"
fi

set -e

# Abort if AWS key is not set
if [ -z "$AWS_ACCESS_KEY_ID" ]; then
echo "No AWS key found, skipping.."
exit 0
fi

# upload versioned version
if [[ $7 = 'true' ]]; then
if [[ $4 == wasm* ]]; then
aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm"
else
aws s3 cp $ext.compressed s3://$5/$1/$2/$3/$4/$1.duckdb_extension.gz --acl public-read
fi
fi

# upload to latest version
if [[ $6 = 'true' ]]; then
if [[ $4 == wasm* ]]; then
aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.wasm --acl public-read --content-encoding br --content-type="application/wasm"
else
aws s3 cp $ext.compressed s3://$5/$3/$4/$1.duckdb_extension.gz --acl public-read
fi
fi
11 changes: 11 additions & 0 deletions duckdb-vortex/scripts/setup-custom-toolchain.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

# This is an example script that can be used to install additional toolchain dependencies. Feel free to remove this script
# if no additional toolchains are required

# To enable this script, set the `custom_toolchain_script` option to true when calling the reusable workflow
# `.github/workflows/_extension_distribution.yml` from `https://github.com/duckdb/extension-ci-tools`

# note that the $DUCKDB_PLATFORM environment variable can be used to discern between the platforms
echo "This is the sample custom toolchain script running for architecture '$DUCKDB_PLATFORM' for the vortex_duckdb extension."

14 changes: 14 additions & 0 deletions duckdb-vortex/src/include/vortex_duckdb_extension.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#pragma once

#include "duckdb.hpp"

namespace duckdb {

class VortexDuckdbExtension : public Extension {
public:
void Load(DuckDB &db) override;
std::string Name() override;
std::string Version() const override;
};

} // namespace duckdb
Loading
Loading