diff --git a/README.md b/README.md index 2a2a8cb..0ef53ff 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,37 @@ You can check the indexing progress by querying the `pg_stat_progress_create_ind SELECT phase, round(100.0 * blocks_done / nullif(blocks_total, 0), 1) AS "%" FROM pg_stat_progress_create_index; ``` +### External Index Precomputation + +Unlike pure SQL, an external index precomputation will first do clustering outside and insert centroids to a PostgreSQL table. Although it might be more complicated, external build is definitely much faster on larger dataset (>5M). + +To get started, you need to do a clustering of vectors using `faiss`, `scikit-learn` or any other clustering library. + +The centroids should be preset in a table of any name with 3 columns: +- id(integer): id of each centroid, should be unique +- parent(integer, nullable): parent id of each centroid, should be NULL for normal clustering +- vector(vector): representation of each centroid, `pgvector` vector type + +And example could be like this: + +```sql +-- Create table of centroids +CREATE TABLE centroids (id integer NOT NULL UNIQUE, parent integer, vector vector(768)); +-- Insert centroids into it +INSERT INTO centroids (id, parent, vector) VALUES (1, NULL, '{0.1, 0.2, 0.3, ..., 0.768}'); +INSERT INTO centroids (id, parent, vector) VALUES (2, NULL, '{0.4, 0.5, 0.6, ..., 0.768}'); +INSERT INTO centroids (id, parent, vector) VALUES (3, NULL, '{0.7, 0.8, 0.9, ..., 0.768}'); +-- ... + +-- Create index using the centroid table +CREATE INDEX ON gist_train USING vchordrq (embedding vector_l2_ops) WITH (options = $$ +[build.external] +table = 'centroids' +$$); +``` + +To simplify the workflow, we provide end-to-end scripts for external index pre-computation, see [scripts](./scripts/README.md#run-external-index-precomputation-toolkit). + ### Installing From Source Install pgrx according to [pgrx's instruction](https://github.com/pgcentralfoundation/pgrx?tab=readme-ov-file#getting-started). ```bash diff --git a/bench/README.md b/bench/README.md deleted file mode 100644 index 57368e6..0000000 --- a/bench/README.md +++ /dev/null @@ -1,62 +0,0 @@ -## Build Docker - -```shell -sudo apt install -y build-essential libreadline-dev zlib1g-dev flex bison libxml2-dev libxslt-dev libssl-dev libxml2-utils xsltproc ccache pkg-config clang -cargo install --locked cargo-pgrx -cargo pgrx init -cargo build --package vchord --lib --features pg16 --target x86_64-unknown-linux-gnu --profile opt -./tools/schema.sh --features pg16 --target x86_64-unknown-linux-gnu --profile opt - -export SEMVER="0.0.0" -export VERSION="16" -export ARCH="x86_64" -export PLATFORM="amd64" -export PROFILE="opt" -./tools/package.sh - -docker build -t vchord:pg16-latest --build-arg PG_VERSION=16 -f ./docker/Dockerfile . -``` - -Or you can use `starkind/vchord:pg16-latest` to run the bench. - -## Run Instance - -```shell -docker run --name vchord -e POSTGRES_PASSWORD=123 -p 5432:5432 -d vchord:pg16-latest - -PGPASSWORD=123 psql -h 127.0.0.1 -U postgres -c "CREATE USER bench WITH PASSWORD '123';" -PGPASSWORD=123 psql -h 127.0.0.1 -U postgres -c "ALTER ROLE bench SUPERUSER;" -``` - -## Run Bench - -Options for `-n`: -- sift -- glove -- gist -- openai -- cohere_1m_22 -- cohere_1m_23 -- cohere_10m_23 - -```shell -# pip install pgvector numpy faiss-cpu psycopg h5py tqdm - -# If using GPU for train.py: -# conda install pytorch::faiss-gpu - -# dump table embedding column to a local h5 file["train"] -python dump.py -n sift -o sift.h5 -c embedding -d 128 - -# external k-means -python train.py -i sift.hdf5 -o sift_centroids_4096 -m l2 - -# build index (w/wo external centroids) -## with external centroids -python index.py -n sift -c sift_centroids_4096.npy -i sift.hdf5 -m l2 -p 123 -k 4096 -d 768 -w 4 -## without external centroids -## python index.py -n sift -i sift.hdf5 -m l2 -p 123 -k 4096 -d 768 -w 4 - -# bench -python bench.py -n sift -i sift.hdf5 --nprob 100 -``` diff --git a/bench/bench.py b/bench/bench.py deleted file mode 100644 index 5acd7e4..0000000 --- a/bench/bench.py +++ /dev/null @@ -1,91 +0,0 @@ -import time -import argparse -from pathlib import Path -from tqdm import tqdm - -import psycopg -import h5py -from pgvector.psycopg import register_vector - - -def build_arg_parse(): - parser = argparse.ArgumentParser() - parser.add_argument( - "-m", - "--metric", - help="Metric to pick, in l2 or cos", - choices=["l2", "cos", "dot"], - default="l2", - ) - parser.add_argument("-n", "--name", help="Dataset name, like: sift", required=True) - parser.add_argument("-i", "--input", help="input filepath", required=True) - parser.add_argument( - "-p", "--password", help="Database password", default="password" - ) - parser.add_argument( - "--nprob", help="argument vchordrq.probes for query", default=300, type=int - ) - return parser - - -def create_connection(password): - keepalive_kwargs = { - "keepalives": 1, - "keepalives_idle": 30, - "keepalives_interval": 5, - "keepalives_count": 5, - } - conn = psycopg.connect( - conninfo=f"postgresql://postgres:{password}@localhost:5432/postgres", - dbname="postgres", - autocommit=True, - **keepalive_kwargs, - ) - conn.execute("CREATE EXTENSION IF NOT EXISTS vector") - conn.execute("CREATE EXTENSION IF NOT EXISTS vchord") - register_vector(conn) - return conn - - -def bench(name, test, answer, metric_ops, conn): - m = test.shape[0] - for k in [10, 100]: - hits = 0 - delta = 0 - pbar = tqdm(enumerate(test), total=m) - for i, query in pbar: - start = time.perf_counter() - result = conn.execute( - f"SELECT id FROM {name} ORDER BY embedding {metric_ops} %s LIMIT {k}", - (query,), - ).fetchall() - end = time.perf_counter() - hits += len(set([p[0] for p in result[:k]]) & set(answer[i][:k].tolist())) - delta += end - start - pbar.set_description(f"recall: {hits / k / (i+1)} QPS: {(i+1) / delta} ") - recall = hits / k / m - qps = m / delta - print(f"Top: {k} recall: {recall:.4f} QPS: {qps:.2f}") - - -if __name__ == "__main__": - parser = build_arg_parse() - args = parser.parse_args() - print(args) - - dataset = h5py.File(Path(args.input), "r") - test = dataset["test"][:] - answer = dataset["neighbors"][:] - - if args.metric == "l2": - metric_ops = "<->" - elif args.metric == "cos": - metric_ops = "<=>" - elif args.metric == "dot": - metric_ops = "<#>" - else: - raise ValueError - conn = create_connection(args.password) - conn.execute(f"SET vchordrq.probes={args.nprob}") - - bench(args.name, test, answer, metric_ops, conn) diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..550402c --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,87 @@ +## Build Docker + +```shell +sudo apt install -y build-essential libreadline-dev zlib1g-dev flex bison libxml2-dev libxslt-dev libssl-dev libxml2-utils xsltproc ccache pkg-config clang +cargo install --locked cargo-pgrx +cargo pgrx init +cargo build --package vchord --lib --features pg16 --target x86_64-unknown-linux-gnu --profile opt +./tools/schema.sh --features pg16 --target x86_64-unknown-linux-gnu --profile opt + +export SEMVER="0.0.0" +export VERSION="16" +export ARCH="x86_64" +export PLATFORM="amd64" +export PROFILE="opt" +./tools/package.sh + +docker build -t vchord:pg16-latest --build-arg PG_VERSION=16 -f ./docker/Dockerfile . +``` + +## Run Instance + +```shell +docker run --name vchord -e POSTGRES_PASSWORD=123 -p 5432:5432 -d vchord:pg16-latest + +PGPASSWORD=123 psql -h 127.0.0.1 -U postgres -c "CREATE USER bench WITH PASSWORD '123';" +PGPASSWORD=123 psql -h 127.0.0.1 -U postgres -c "ALTER ROLE bench SUPERUSER;" +``` + +## Run External Index Precomputation Toolkit + +1. Install requirements + +```shell +# PYTHON = 3.11 +# When using CPU to train k-means clustering +conda install conda-forge::pgvector-python numpy pytorch::faiss-cpu conda-forge::psycopg h5py tqdm +# or +pip install pgvector-python numpy faiss-cpu psycopg h5py tqdm + +# When using GPU to train k-means clustering +conda install conda-forge::pgvector-python numpy pytorch::faiss-gpu conda-forge::psycopg h5py tqdm +``` + +1. Prepare dataset in `hdf5` format + + - If you already have your vectors stored in `PostgreSQL` using `pgvector`, you can export them to a local file by: + ```shell + python script/dump.py -n [table name] -c [column name] -d [dim] -o export.hdf5 + ``` + + - If you don't have any data, but would like to give it a try, you can choose one of these datasets: + + ```shell + wget http://ann-benchmarks.com/sift-128-euclidean.hdf5 # num=1M dim=128 metric=l2 + wget http://ann-benchmarks.com/gist-960-euclidean.hdf5 # num=1M dim=960 metric=l2 + wget https://myscale-datasets.s3.ap-southeast-1.amazonaws.com/laion-5m-test-ip.hdf5 # num=5M dim=768 metric=dot + wget https://myscale-datasets.s3.ap-southeast-1.amazonaws.com/laion-20m-test-ip.hdf5 # num=20M dim=768 metric=dot + wget https://myscale-datasets.s3.ap-southeast-1.amazonaws.com/laion-100m-test-ip.hdf5 # num=100M dim=768 metric=dot + ``` + +2. Preform clustering of centroids from vectors + + ```shell + # For small dataset size from 1M to 5M + python script/train.py -i [dataset file(export.hdf5)] -o [centroid filename(centroid.npy)] -lists [lists] -m [metric(l2/cos/dot)] + # For large datasets size, 5M to 100M in size, use GPU and chunks + python script/train.py -i [dataset file(export.hdf5)] -o [centroid filename(centroid.npy)] --lists [lists] -m [metric(l2/cos/dot)] -g --in-memory + ``` + + `lists` is the number of centroids for clustering, and a typical value could range from: + $$ + 4*\sqrt{len(vectors)} \le lists \le 16*\sqrt{len(vectors)} + $$ + +3. To insert vectors and centroids into the database, and then create an index + + ```shell + python script/index.py -n [table name] -i [dataset file(export.hdf5)] -c [centroid filename(centroid.npy)] -m [metric(l2/cos/dot)] -d [dim] + ``` + +4. Let's start our tour to check the benchmark result of VectorChord + + ```shell + python script/bench.py -n [table name] -i [dataset file(export.hdf5)] -m [metric(l2/cos/dot)] -p [database password] --nprob 100 --epsilon 1.0 + ``` + + Larger `nprobe` and `epsilon` will have a more precise query but at a slower speed. \ No newline at end of file diff --git a/scripts/bench.py b/scripts/bench.py new file mode 100644 index 0000000..9b38a6b --- /dev/null +++ b/scripts/bench.py @@ -0,0 +1,236 @@ +import time +import argparse +from pathlib import Path +from tqdm import tqdm +import multiprocessing as mp +import numpy as np + +import psycopg +import h5py +from pgvector.psycopg import register_vector + +TOP = [10] + + +def build_arg_parse(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-m", + "--metric", + help="Metric to pick, in l2 or cos", + choices=["l2", "cos", "dot"], + default="l2", + ) + parser.add_argument("-n", "--name", help="Dataset name, like: sift", required=True) + parser.add_argument("-i", "--input", help="input filepath", required=True) + parser.add_argument( + "-p", "--password", help="Database password", default="password" + ) + parser.add_argument( + "--nprob", help="argument probes for query", default=100, type=int + ) + parser.add_argument( + "--epsilon", help="argument epsilon for query", type=float, default=1.0 + ) + parser.add_argument( + "--processes", help="Number of parallel processes to use", type=int, default=1 + ) + return parser + + +def create_connection(password, nprob, epsilon): + keepalive_kwargs = { + "keepalives": 1, + "keepalives_idle": 30, + "keepalives_interval": 5, + "keepalives_count": 5, + } + conn = psycopg.connect( + conninfo=f"postgresql://postgres:{password}@localhost:5432/postgres", + dbname="postgres", + autocommit=True, + **keepalive_kwargs, + ) + conn.execute("CREATE EXTENSION IF NOT EXISTS vector") + conn.execute("CREATE EXTENSION IF NOT EXISTS vchord") + # Tuning + conn.execute(f"SET jit=false") + conn.execute(f"SET effective_io_concurrency=200") + + conn.execute(f"SET vchordrq.probes={nprob}") + conn.execute(f"SET vchordrq.epsilon={epsilon}") + conn.execute(f"SELECT vchordrq_prewarm('{args.name}_embedding_idx'::regclass)") + register_vector(conn) + return conn + + +def process_batch(args): + """Process a batch of queries in a single process""" + batch_queries, batch_answers, k, metric_ops, password, name, nprob, epsilon = args + + # Create a new connection for this process + conn = create_connection(password, nprob, epsilon) + + hits = 0 + latencies = [] + results = [] + + for query, ground_truth in zip(batch_queries, batch_answers): + start = time.perf_counter() + result = conn.execute( + f"SELECT id FROM {name} ORDER BY embedding {metric_ops} %s LIMIT {k}", + (query,), + ).fetchall() + end = time.perf_counter() + + query_time = end - start + latencies.append(query_time) + + result_ids = set([p[0] for p in result[:k]]) + ground_truth_ids = set(ground_truth[:k].tolist()) + hit = len(result_ids & ground_truth_ids) + hits += hit + + results.append((hit, query_time)) + + conn.close() + return results + + +def calculate_metrics(all_results, k, m): + """Calculate recall, QPS, and latency percentiles from results""" + hits, latencies = zip(*all_results) + + total_hits = sum(hits) + total_time = sum(latencies) + + recall = total_hits / (k * m) + qps = m / total_time + + # Calculate latency percentiles (in milliseconds) + latencies_ms = np.array(latencies) * 1000 + p50 = np.percentile(latencies_ms, 50) + p99 = np.percentile(latencies_ms, 99) + + return recall, qps, p50, p99 + + +def parallel_bench( + name, test, answer, metric_ops, num_processes, password, nprob, epsilon +): + """Run benchmark in parallel using multiple processes""" + m = test.shape[0] + + for k in TOP: + # Split data into batches for each process + batch_size = m // num_processes + batches = [] + + for i in range(num_processes): + start_idx = i * batch_size + end_idx = start_idx + batch_size if i < num_processes - 1 else m + + batch = ( + test[start_idx:end_idx], + answer[start_idx:end_idx], + k, + metric_ops, + password, + name, + nprob, + epsilon, + ) + batches.append(batch) + + # Create process pool and execute batches + with mp.Pool(processes=num_processes) as pool: + batch_results = list( + tqdm( + pool.imap(process_batch, batches), + total=len(batches), + desc=f"Processing k={k}", + ) + ) + + # Flatten results from all batches + all_results = [result for batch in batch_results for result in batch] + + # Calculate metrics + recall, qps, p50, p99 = calculate_metrics(all_results, k, m) + + print(f"Top: {k}") + print(f" Recall: {recall:.4f}") + print(f" QPS: {qps*num_processes:.2f}") + print(f" P50 latency: {p50:.2f}ms") + print(f" P99 latency: {p99:.2f}ms") + + +def sequential_bench(name, test, answer, metric_ops, conn): + """Original sequential benchmark implementation with latency tracking""" + m = test.shape[0] + for k in TOP: + results = [] + pbar = tqdm(enumerate(test), total=m) + for i, query in pbar: + start = time.perf_counter() + result = conn.execute( + f"SELECT id FROM {name} ORDER BY embedding {metric_ops} %s LIMIT {k}", + (query,), + ).fetchall() + end = time.perf_counter() + + query_time = end - start + hit = len(set([p[0] for p in result[:k]]) & set(answer[i][:k].tolist())) + results.append((hit, query_time)) + + # Update progress bar with running metrics + curr_results = results[: i + 1] + curr_recall, curr_qps, curr_p50, _ = calculate_metrics( + curr_results, k, i + 1 + ) + pbar.set_description( + f"recall: {curr_recall:.4f} QPS: {curr_qps:.2f} P50: {curr_p50:.2f}ms" + ) + + # Calculate final metrics + recall, qps, p50, p99 = calculate_metrics(results, k, m) + + print(f"Top: {k}") + print(f" Recall: {recall:.4f}") + print(f" QPS: {qps:.2f}") + print(f" P50 latency: {p50:.2f}ms") + print(f" P99 latency: {p99:.2f}ms") + + +if __name__ == "__main__": + parser = build_arg_parse() + args = parser.parse_args() + print(args) + + dataset = h5py.File(Path(args.input), "r") + test = dataset["test"][:] + answer = dataset["neighbors"][:] + + if args.metric == "l2": + metric_ops = "<->" + elif args.metric == "cos": + metric_ops = "<=>" + elif args.metric == "dot": + metric_ops = "<#>" + else: + raise ValueError + + if args.processes > 1: + parallel_bench( + args.name, + test, + answer, + metric_ops, + args.processes, + args.password, + args.nprob, + args.epsilon, + ) + else: + conn = create_connection(args.password, args.nprob, args.epsilon) + sequential_bench(args.name, test, answer, metric_ops, conn) diff --git a/bench/dump.py b/scripts/dump.py similarity index 100% rename from bench/dump.py rename to scripts/dump.py diff --git a/bench/index.py b/scripts/index.py similarity index 77% rename from bench/index.py rename to scripts/index.py index 1cfb54c..c982543 100644 --- a/bench/index.py +++ b/scripts/index.py @@ -1,5 +1,6 @@ import asyncio import math +import os from time import perf_counter import argparse from pathlib import Path @@ -30,14 +31,10 @@ def build_arg_parse(): choices=["l2", "cos", "dot"], ) parser.add_argument("-n", "--name", help="Dataset name, like: sift", required=True) - parser.add_argument( - "-c", "--centroids", help="K-means centroids file", required=True - ) parser.add_argument("-i", "--input", help="Input filepath", required=True) parser.add_argument( "-p", "--password", help="Database password", default="password" ) - parser.add_argument("-k", help="Number of centroids", type=int, required=True) parser.add_argument("-d", "--dim", help="Dimension", type=int, required=True) parser.add_argument( "-w", @@ -53,42 +50,56 @@ def build_arg_parse(): type=int, default=CHUNKS, ) + # External build + parser.add_argument( + "-c", "--centroids", help="K-means centroids file", required=False + ) + # Internal build + parser.add_argument("--lists", help="Number of centroids", type=int, required=False) + return parser -def get_ivf_ops_config(metric, k, name=None): - external_centroids = """ - [external_centroids] +def get_ivf_ops_config(metric, workers, k=None, name=None): + assert name is not None or k is not None + external_centroids_cfg = """ + [build.external] table = 'public.{name}_centroids' - h1_means_column = 'coordinate' """ if metric == "l2": metric_ops = "vector_l2_ops" - ivf_config = f""" + config = "residual_quantization = true" + internal_centroids_cfg = f""" + [build.internal] lists = {k} - residual_quantization = true + build_threads = {workers} spherical_centroids = false """ - elif metric == "cosine": + elif metric == "cos": metric_ops = "vector_cosine_ops" - ivf_config = f""" + config = "residual_quantization = false" + internal_centroids_cfg = f""" + [build.internal] lists = {k} - residual_quantization = false + build_threads = {workers} spherical_centroids = true """ - elif metric == "ip": + elif metric == "dot": metric_ops = "vector_ip_ops" - ivf_config = f""" + config = "residual_quantization = false" + internal_centroids_cfg = f""" + [build.internal] lists = {k} - residual_quantization = false + build_threads = {workers} spherical_centroids = true """ else: raise ValueError - if name: - ivf_config += external_centroids.format(name=name) - return metric_ops, ivf_config + build_config = ( + external_centroids_cfg.format(name=name) if name else internal_centroids_cfg + ) + return metric_ops, "\n".join([config, build_config]) async def create_connection(url): @@ -105,17 +116,19 @@ async def create_connection(url): async def add_centroids(conn, name, centroids): - dim = centroids.shape[1] + n, dim = centroids.shape + root = np.mean(centroids, axis=0) await conn.execute(f"DROP TABLE IF EXISTS public.{name}_centroids") await conn.execute( - f"CREATE TABLE public.{name}_centroids (coordinate vector({dim}))" + f"CREATE TABLE public.{name}_centroids (id integer, parent integer, vector vector({dim}))" ) async with conn.cursor().copy( - f"COPY public.{name}_centroids (coordinate) FROM STDIN WITH (FORMAT BINARY)" + f"COPY public.{name}_centroids (id, parent, vector) FROM STDIN WITH (FORMAT BINARY)" ) as copy: - copy.set_types(["vector"]) - for centroid in tqdm(centroids, desc="Adding centroids"): - await copy.write_row((centroid,)) + copy.set_types(["integer", "integer", "vector"]) + await copy.write_row((0, None, root)) + for i, centroid in tqdm(enumerate(centroids), desc="Adding centroids", total=n): + await copy.write_row((i+1, 0, centroid)) while conn.pgconn.flush() == 1: await asyncio.sleep(0) @@ -152,7 +165,7 @@ async def build_index( await conn.execute(f"SET max_parallel_maintenance_workers TO {workers}") await conn.execute(f"SET max_parallel_workers TO {workers}") await conn.execute( - f"CREATE INDEX ON {name} USING vchordrq (embedding {metric_ops}) WITH (options = $${ivf_config}$$)" + f"CREATE INDEX {name}_embedding_idx ON {name} USING vchordrq (embedding {metric_ops}) WITH (options = $${ivf_config}$$)" ) print(f"Index build time: {perf_counter() - start_time:.2f}s") finish.set() @@ -189,7 +202,7 @@ async def main(dataset): centroids = np.load(args.centroids, allow_pickle=False) await add_centroids(conn, args.name, centroids) metric_ops, ivf_config = get_ivf_ops_config( - args.metric, args.k, args.name if args.centroids else None + args.metric, args.workers, args.lists, args.name if args.centroids else None ) await add_embeddings(conn, args.name, args.dim, dataset["train"], args.chunks) diff --git a/bench/train.py b/scripts/train.py similarity index 93% rename from bench/train.py rename to scripts/train.py index de4c18e..f5bb46a 100644 --- a/bench/train.py +++ b/scripts/train.py @@ -15,7 +15,7 @@ import numpy as np from tqdm import tqdm -DEFAULT_K = 4096 +DEFAULT_LISTS = 4096 N_ITER = 25 CHUNKS = 10 SEED = 42 @@ -27,12 +27,14 @@ def build_arg_parse(): parser.add_argument("-i", "--input", help="input filepath", required=True) parser.add_argument("-o", "--output", help="output filepath", required=True) parser.add_argument( - "-k", - help="K-means centroids or lists", + "--lists", + "--lists-1", + help="Number of centroids", type=int, - default=DEFAULT_K, + required=False, + default=DEFAULT_LISTS, ) - parser.add_argument("--child-k", type=int, help="lower layer lists (if enabled)") + parser.add_argument("--lists-2", type=int, help="lower layer lists (if enabled)") parser.add_argument( "--niter", help="number of iterations", type=int, default=N_ITER ) @@ -125,9 +127,9 @@ def kmeans_cluster( ): n, dim = data.shape if n > MAX_POINTS_PER_CLUSTER * k and not in_memory: - train = reservoir_sampling(iter(data), MAX_POINTS_PER_CLUSTER * args.k) + train = reservoir_sampling(iter(data), MAX_POINTS_PER_CLUSTER * args.lists) elif n > MAX_POINTS_PER_CLUSTER * k and in_memory: - reservoir_sampling_np(data, file_path, MAX_POINTS_PER_CLUSTER * args.k, chunks) + reservoir_sampling_np(data, file_path, MAX_POINTS_PER_CLUSTER * args.lists, chunks) train = np.array( np.memmap( "index.mmap", @@ -186,14 +188,14 @@ def kmeans_cluster( centroids = kmeans_cluster( dataset["train"], args.input, - args.k, - args.child_k, + args.lists, + args.lists_2, args.niter, args.metric, args.gpu, args.in_memory, args.chunks, ) - print(f"K-means (k=({args.k}, {args.child_k})): {perf_counter() - start_time:.2f}s") + print(f"K-means (k=({args.lists}, {args.lists_2})): {perf_counter() - start_time:.2f}s") np.save(Path(args.output), centroids, allow_pickle=False) diff --git a/src/index/am.rs b/src/index/am.rs index 7a3cdd9..36a38b3 100644 --- a/src/index/am.rs +++ b/src/index/am.rs @@ -365,7 +365,7 @@ impl RabbitholeLeader { } let pcxt = unsafe { pgrx::pg_sys::CreateParallelContext( - c"vchordrq".as_ptr(), + c"vchord".as_ptr(), c"vchordrq_parallel_build_main".as_ptr(), request, )