diff --git a/README.md b/README.md
index 2a2a8cb..0ef53ff 100644
--- a/README.md
+++ b/README.md
@@ -136,6 +136,37 @@ You can check the indexing progress by querying the `pg_stat_progress_create_ind
 SELECT phase, round(100.0 * blocks_done / nullif(blocks_total, 0), 1) AS "%" FROM pg_stat_progress_create_index;
 ```
 
+### External Index Precomputation
+
+Unlike pure SQL, an external index precomputation will first do clustering outside and insert centroids to a PostgreSQL table. Although it might be more complicated, external build is definitely much faster on larger dataset (>5M).
+
+To get started, you need to do a clustering of vectors using `faiss`, `scikit-learn` or any other clustering library.
+
+The centroids should be preset in a table of any name with 3 columns:
+- id(integer): id of each centroid, should be unique
+- parent(integer, nullable): parent id of each centroid, should be NULL for normal clustering
+- vector(vector): representation of each centroid, `pgvector` vector type
+
+And example could be like this:
+
+```sql
+-- Create table of centroids
+CREATE TABLE centroids (id integer NOT NULL UNIQUE, parent integer, vector vector(768));
+-- Insert centroids into it
+INSERT INTO centroids (id, parent, vector) VALUES (1, NULL, '{0.1, 0.2, 0.3, ..., 0.768}');
+INSERT INTO centroids (id, parent, vector) VALUES (2, NULL, '{0.4, 0.5, 0.6, ..., 0.768}');
+INSERT INTO centroids (id, parent, vector) VALUES (3, NULL, '{0.7, 0.8, 0.9, ..., 0.768}');
+-- ...
+
+-- Create index using the centroid table
+CREATE INDEX ON gist_train USING vchordrq (embedding vector_l2_ops) WITH (options = $$
+[build.external]
+table = 'centroids'
+$$);
+```
+
+To simplify the workflow, we provide end-to-end scripts for external index pre-computation, see [scripts](./scripts/README.md#run-external-index-precomputation-toolkit).
+
 ### Installing From Source
 Install pgrx according to [pgrx's instruction](https://github.com/pgcentralfoundation/pgrx?tab=readme-ov-file#getting-started).
 ```bash
diff --git a/bench/README.md b/bench/README.md
deleted file mode 100644
index 57368e6..0000000
--- a/bench/README.md
+++ /dev/null
@@ -1,62 +0,0 @@
-## Build Docker
-
-```shell
-sudo apt install -y build-essential libreadline-dev zlib1g-dev flex bison libxml2-dev libxslt-dev libssl-dev libxml2-utils xsltproc ccache pkg-config clang
-cargo install --locked cargo-pgrx
-cargo pgrx init
-cargo build --package vchord --lib --features pg16 --target x86_64-unknown-linux-gnu --profile opt
-./tools/schema.sh --features pg16 --target x86_64-unknown-linux-gnu --profile opt
-
-export SEMVER="0.0.0"
-export VERSION="16"
-export ARCH="x86_64"
-export PLATFORM="amd64"
-export PROFILE="opt"
-./tools/package.sh
-
-docker build -t vchord:pg16-latest --build-arg PG_VERSION=16 -f ./docker/Dockerfile .
-```
-
-Or you can use `starkind/vchord:pg16-latest` to run the bench.
-
-## Run Instance
-
-```shell
-docker run --name vchord -e POSTGRES_PASSWORD=123 -p 5432:5432 -d vchord:pg16-latest
-
-PGPASSWORD=123 psql -h 127.0.0.1 -U postgres -c "CREATE USER bench WITH PASSWORD '123';"
-PGPASSWORD=123 psql -h 127.0.0.1 -U postgres -c "ALTER ROLE bench SUPERUSER;"
-```
-
-## Run Bench
-
-Options for `-n`:
-- sift
-- glove
-- gist
-- openai
-- cohere_1m_22
-- cohere_1m_23
-- cohere_10m_23
-
-```shell
-# pip install pgvector numpy faiss-cpu psycopg h5py tqdm
-
-# If using GPU for train.py:
-# conda install pytorch::faiss-gpu
-
-# dump table embedding column to a local h5 file["train"]
-python dump.py -n sift -o sift.h5 -c embedding -d 128
-
-# external k-means
-python train.py -i sift.hdf5 -o sift_centroids_4096 -m l2
-
-# build index (w/wo external centroids)
-## with external centroids
-python index.py -n sift -c sift_centroids_4096.npy -i sift.hdf5 -m l2 -p 123 -k 4096 -d 768 -w 4
-## without external centroids
-## python index.py -n sift -i sift.hdf5 -m l2 -p 123 -k 4096 -d 768 -w 4
-
-# bench
-python bench.py -n sift -i sift.hdf5 --nprob 100
-```
diff --git a/bench/bench.py b/bench/bench.py
deleted file mode 100644
index 5acd7e4..0000000
--- a/bench/bench.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import time
-import argparse
-from pathlib import Path
-from tqdm import tqdm
-
-import psycopg
-import h5py
-from pgvector.psycopg import register_vector
-
-
-def build_arg_parse():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "-m",
-        "--metric",
-        help="Metric to pick, in l2 or cos",
-        choices=["l2", "cos", "dot"],
-        default="l2",
-    )
-    parser.add_argument("-n", "--name", help="Dataset name, like: sift", required=True)
-    parser.add_argument("-i", "--input", help="input filepath", required=True)
-    parser.add_argument(
-        "-p", "--password", help="Database password", default="password"
-    )
-    parser.add_argument(
-        "--nprob", help="argument vchordrq.probes for query", default=300, type=int
-    )
-    return parser
-
-
-def create_connection(password):
-    keepalive_kwargs = {
-        "keepalives": 1,
-        "keepalives_idle": 30,
-        "keepalives_interval": 5,
-        "keepalives_count": 5,
-    }
-    conn = psycopg.connect(
-        conninfo=f"postgresql://postgres:{password}@localhost:5432/postgres",
-        dbname="postgres",
-        autocommit=True,
-        **keepalive_kwargs,
-    )
-    conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
-    conn.execute("CREATE EXTENSION IF NOT EXISTS vchord")
-    register_vector(conn)
-    return conn
-
-
-def bench(name, test, answer, metric_ops, conn):
-    m = test.shape[0]
-    for k in [10, 100]:
-        hits = 0
-        delta = 0
-        pbar = tqdm(enumerate(test), total=m)
-        for i, query in pbar:
-            start = time.perf_counter()
-            result = conn.execute(
-                f"SELECT id FROM {name} ORDER BY embedding {metric_ops} %s LIMIT {k}",
-                (query,),
-            ).fetchall()
-            end = time.perf_counter()
-            hits += len(set([p[0] for p in result[:k]]) & set(answer[i][:k].tolist()))
-            delta += end - start
-            pbar.set_description(f"recall: {hits / k / (i+1)} QPS: {(i+1) / delta} ")
-        recall = hits / k / m
-        qps = m / delta
-        print(f"Top: {k} recall: {recall:.4f} QPS: {qps:.2f}")
-
-
-if __name__ == "__main__":
-    parser = build_arg_parse()
-    args = parser.parse_args()
-    print(args)
-
-    dataset = h5py.File(Path(args.input), "r")
-    test = dataset["test"][:]
-    answer = dataset["neighbors"][:]
-
-    if args.metric == "l2":
-        metric_ops = "<->"
-    elif args.metric == "cos":
-        metric_ops = "<=>"
-    elif args.metric == "dot":
-        metric_ops = "<#>"
-    else:
-        raise ValueError
-    conn = create_connection(args.password)
-    conn.execute(f"SET vchordrq.probes={args.nprob}")
-
-    bench(args.name, test, answer, metric_ops, conn)
diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 0000000..550402c
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,87 @@
+## Build Docker
+
+```shell
+sudo apt install -y build-essential libreadline-dev zlib1g-dev flex bison libxml2-dev libxslt-dev libssl-dev libxml2-utils xsltproc ccache pkg-config clang
+cargo install --locked cargo-pgrx
+cargo pgrx init
+cargo build --package vchord --lib --features pg16 --target x86_64-unknown-linux-gnu --profile opt
+./tools/schema.sh --features pg16 --target x86_64-unknown-linux-gnu --profile opt
+
+export SEMVER="0.0.0"
+export VERSION="16"
+export ARCH="x86_64"
+export PLATFORM="amd64"
+export PROFILE="opt"
+./tools/package.sh
+
+docker build -t vchord:pg16-latest --build-arg PG_VERSION=16 -f ./docker/Dockerfile .
+```
+
+## Run Instance
+
+```shell
+docker run --name vchord -e POSTGRES_PASSWORD=123 -p 5432:5432 -d vchord:pg16-latest
+
+PGPASSWORD=123 psql -h 127.0.0.1 -U postgres -c "CREATE USER bench WITH PASSWORD '123';"
+PGPASSWORD=123 psql -h 127.0.0.1 -U postgres -c "ALTER ROLE bench SUPERUSER;"
+```
+
+## Run External Index Precomputation Toolkit
+
+1. Install requirements
+
+```shell
+# PYTHON = 3.11
+# When using CPU to train k-means clustering
+conda install conda-forge::pgvector-python numpy pytorch::faiss-cpu conda-forge::psycopg h5py tqdm
+# or
+pip install pgvector-python numpy faiss-cpu psycopg h5py tqdm
+
+# When using GPU to train k-means clustering
+conda install conda-forge::pgvector-python numpy pytorch::faiss-gpu conda-forge::psycopg h5py tqdm
+```
+
+1. Prepare dataset in `hdf5` format
+
+   - If you already have your vectors stored in `PostgreSQL` using `pgvector`, you can export them to a local file by:
+     ```shell
+     python script/dump.py -n [table name] -c [column name] -d [dim] -o export.hdf5
+     ```
+
+   - If you don't have any data, but would like to give it a try, you can choose one of these datasets:
+
+     ```shell
+     wget http://ann-benchmarks.com/sift-128-euclidean.hdf5 # num=1M dim=128 metric=l2
+     wget http://ann-benchmarks.com/gist-960-euclidean.hdf5 # num=1M dim=960 metric=l2
+     wget https://myscale-datasets.s3.ap-southeast-1.amazonaws.com/laion-5m-test-ip.hdf5 # num=5M dim=768 metric=dot
+     wget https://myscale-datasets.s3.ap-southeast-1.amazonaws.com/laion-20m-test-ip.hdf5 # num=20M dim=768 metric=dot
+     wget https://myscale-datasets.s3.ap-southeast-1.amazonaws.com/laion-100m-test-ip.hdf5 # num=100M dim=768 metric=dot
+     ```
+
+2. Preform clustering of centroids from vectors
+
+   ```shell
+   # For small dataset size from 1M to 5M
+   python script/train.py -i [dataset file(export.hdf5)] -o [centroid filename(centroid.npy)] -lists [lists] -m [metric(l2/cos/dot)]
+   # For large datasets size, 5M to 100M in size, use GPU and chunks
+   python script/train.py -i [dataset file(export.hdf5)] -o [centroid filename(centroid.npy)] --lists [lists] -m [metric(l2/cos/dot)] -g --in-memory
+   ```
+
+   `lists` is the number of centroids for clustering, and a typical value could range from: 
+   $$
+   4*\sqrt{len(vectors)} \le lists \le 16*\sqrt{len(vectors)}
+   $$
+
+3. To insert vectors and centroids into the database, and then create an index 
+
+   ```shell
+   python script/index.py -n [table name] -i [dataset file(export.hdf5)] -c [centroid filename(centroid.npy)] -m [metric(l2/cos/dot)] -d [dim]
+   ```
+
+4. Let's start our tour to check the benchmark result of VectorChord
+
+   ```shell
+   python script/bench.py -n [table name] -i [dataset file(export.hdf5)] -m [metric(l2/cos/dot)] -p [database password] --nprob 100 --epsilon 1.0
+   ```
+
+    Larger `nprobe` and `epsilon` will have a more precise query but at a slower speed.
\ No newline at end of file
diff --git a/scripts/bench.py b/scripts/bench.py
new file mode 100644
index 0000000..9b38a6b
--- /dev/null
+++ b/scripts/bench.py
@@ -0,0 +1,236 @@
+import time
+import argparse
+from pathlib import Path
+from tqdm import tqdm
+import multiprocessing as mp
+import numpy as np
+
+import psycopg
+import h5py
+from pgvector.psycopg import register_vector
+
+TOP = [10]
+
+
+def build_arg_parse():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-m",
+        "--metric",
+        help="Metric to pick, in l2 or cos",
+        choices=["l2", "cos", "dot"],
+        default="l2",
+    )
+    parser.add_argument("-n", "--name", help="Dataset name, like: sift", required=True)
+    parser.add_argument("-i", "--input", help="input filepath", required=True)
+    parser.add_argument(
+        "-p", "--password", help="Database password", default="password"
+    )
+    parser.add_argument(
+        "--nprob", help="argument probes for query", default=100, type=int
+    )
+    parser.add_argument(
+        "--epsilon", help="argument epsilon for query", type=float, default=1.0
+    )
+    parser.add_argument(
+        "--processes", help="Number of parallel processes to use", type=int, default=1
+    )
+    return parser
+
+
+def create_connection(password, nprob, epsilon):
+    keepalive_kwargs = {
+        "keepalives": 1,
+        "keepalives_idle": 30,
+        "keepalives_interval": 5,
+        "keepalives_count": 5,
+    }
+    conn = psycopg.connect(
+        conninfo=f"postgresql://postgres:{password}@localhost:5432/postgres",
+        dbname="postgres",
+        autocommit=True,
+        **keepalive_kwargs,
+    )
+    conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
+    conn.execute("CREATE EXTENSION IF NOT EXISTS vchord")
+    # Tuning
+    conn.execute(f"SET jit=false")
+    conn.execute(f"SET effective_io_concurrency=200")
+
+    conn.execute(f"SET vchordrq.probes={nprob}")
+    conn.execute(f"SET vchordrq.epsilon={epsilon}")
+    conn.execute(f"SELECT vchordrq_prewarm('{args.name}_embedding_idx'::regclass)")
+    register_vector(conn)
+    return conn
+
+
+def process_batch(args):
+    """Process a batch of queries in a single process"""
+    batch_queries, batch_answers, k, metric_ops, password, name, nprob, epsilon = args
+
+    # Create a new connection for this process
+    conn = create_connection(password, nprob, epsilon)
+
+    hits = 0
+    latencies = []
+    results = []
+
+    for query, ground_truth in zip(batch_queries, batch_answers):
+        start = time.perf_counter()
+        result = conn.execute(
+            f"SELECT id FROM {name} ORDER BY embedding {metric_ops} %s LIMIT {k}",
+            (query,),
+        ).fetchall()
+        end = time.perf_counter()
+
+        query_time = end - start
+        latencies.append(query_time)
+
+        result_ids = set([p[0] for p in result[:k]])
+        ground_truth_ids = set(ground_truth[:k].tolist())
+        hit = len(result_ids & ground_truth_ids)
+        hits += hit
+
+        results.append((hit, query_time))
+
+    conn.close()
+    return results
+
+
+def calculate_metrics(all_results, k, m):
+    """Calculate recall, QPS, and latency percentiles from results"""
+    hits, latencies = zip(*all_results)
+
+    total_hits = sum(hits)
+    total_time = sum(latencies)
+
+    recall = total_hits / (k * m)
+    qps = m / total_time
+
+    # Calculate latency percentiles (in milliseconds)
+    latencies_ms = np.array(latencies) * 1000
+    p50 = np.percentile(latencies_ms, 50)
+    p99 = np.percentile(latencies_ms, 99)
+
+    return recall, qps, p50, p99
+
+
+def parallel_bench(
+    name, test, answer, metric_ops, num_processes, password, nprob, epsilon
+):
+    """Run benchmark in parallel using multiple processes"""
+    m = test.shape[0]
+
+    for k in TOP:
+        # Split data into batches for each process
+        batch_size = m // num_processes
+        batches = []
+
+        for i in range(num_processes):
+            start_idx = i * batch_size
+            end_idx = start_idx + batch_size if i < num_processes - 1 else m
+
+            batch = (
+                test[start_idx:end_idx],
+                answer[start_idx:end_idx],
+                k,
+                metric_ops,
+                password,
+                name,
+                nprob,
+                epsilon,
+            )
+            batches.append(batch)
+
+        # Create process pool and execute batches
+        with mp.Pool(processes=num_processes) as pool:
+            batch_results = list(
+                tqdm(
+                    pool.imap(process_batch, batches),
+                    total=len(batches),
+                    desc=f"Processing k={k}",
+                )
+            )
+
+        # Flatten results from all batches
+        all_results = [result for batch in batch_results for result in batch]
+
+        # Calculate metrics
+        recall, qps, p50, p99 = calculate_metrics(all_results, k, m)
+
+        print(f"Top: {k}")
+        print(f"  Recall: {recall:.4f}")
+        print(f"  QPS: {qps*num_processes:.2f}")
+        print(f"  P50 latency: {p50:.2f}ms")
+        print(f"  P99 latency: {p99:.2f}ms")
+
+
+def sequential_bench(name, test, answer, metric_ops, conn):
+    """Original sequential benchmark implementation with latency tracking"""
+    m = test.shape[0]
+    for k in TOP:
+        results = []
+        pbar = tqdm(enumerate(test), total=m)
+        for i, query in pbar:
+            start = time.perf_counter()
+            result = conn.execute(
+                f"SELECT id FROM {name} ORDER BY embedding {metric_ops} %s LIMIT {k}",
+                (query,),
+            ).fetchall()
+            end = time.perf_counter()
+
+            query_time = end - start
+            hit = len(set([p[0] for p in result[:k]]) & set(answer[i][:k].tolist()))
+            results.append((hit, query_time))
+
+            # Update progress bar with running metrics
+            curr_results = results[: i + 1]
+            curr_recall, curr_qps, curr_p50, _ = calculate_metrics(
+                curr_results, k, i + 1
+            )
+            pbar.set_description(
+                f"recall: {curr_recall:.4f} QPS: {curr_qps:.2f} P50: {curr_p50:.2f}ms"
+            )
+
+        # Calculate final metrics
+        recall, qps, p50, p99 = calculate_metrics(results, k, m)
+
+        print(f"Top: {k}")
+        print(f"  Recall: {recall:.4f}")
+        print(f"  QPS: {qps:.2f}")
+        print(f"  P50 latency: {p50:.2f}ms")
+        print(f"  P99 latency: {p99:.2f}ms")
+
+
+if __name__ == "__main__":
+    parser = build_arg_parse()
+    args = parser.parse_args()
+    print(args)
+
+    dataset = h5py.File(Path(args.input), "r")
+    test = dataset["test"][:]
+    answer = dataset["neighbors"][:]
+
+    if args.metric == "l2":
+        metric_ops = "<->"
+    elif args.metric == "cos":
+        metric_ops = "<=>"
+    elif args.metric == "dot":
+        metric_ops = "<#>"
+    else:
+        raise ValueError
+
+    if args.processes > 1:
+        parallel_bench(
+            args.name,
+            test,
+            answer,
+            metric_ops,
+            args.processes,
+            args.password,
+            args.nprob,
+            args.epsilon,
+        )
+    else:
+        conn = create_connection(args.password, args.nprob, args.epsilon)
+        sequential_bench(args.name, test, answer, metric_ops, conn)
diff --git a/bench/dump.py b/scripts/dump.py
similarity index 100%
rename from bench/dump.py
rename to scripts/dump.py
diff --git a/bench/index.py b/scripts/index.py
similarity index 77%
rename from bench/index.py
rename to scripts/index.py
index 1cfb54c..c982543 100644
--- a/bench/index.py
+++ b/scripts/index.py
@@ -1,5 +1,6 @@
 import asyncio
 import math
+import os
 from time import perf_counter
 import argparse
 from pathlib import Path
@@ -30,14 +31,10 @@ def build_arg_parse():
         choices=["l2", "cos", "dot"],
     )
     parser.add_argument("-n", "--name", help="Dataset name, like: sift", required=True)
-    parser.add_argument(
-        "-c", "--centroids", help="K-means centroids file", required=True
-    )
     parser.add_argument("-i", "--input", help="Input filepath", required=True)
     parser.add_argument(
         "-p", "--password", help="Database password", default="password"
     )
-    parser.add_argument("-k", help="Number of centroids", type=int, required=True)
     parser.add_argument("-d", "--dim", help="Dimension", type=int, required=True)
     parser.add_argument(
         "-w",
@@ -53,42 +50,56 @@ def build_arg_parse():
         type=int,
         default=CHUNKS,
     )
+    # External build
+    parser.add_argument(
+        "-c", "--centroids", help="K-means centroids file", required=False
+    )
+    # Internal build
+    parser.add_argument("--lists", help="Number of centroids", type=int, required=False)
+
     return parser
 
 
-def get_ivf_ops_config(metric, k, name=None):
-    external_centroids = """
-    [external_centroids]
+def get_ivf_ops_config(metric, workers, k=None, name=None):
+    assert name is not None or k is not None
+    external_centroids_cfg = """
+    [build.external]
     table = 'public.{name}_centroids'
-    h1_means_column = 'coordinate'
     """
     if metric == "l2":
         metric_ops = "vector_l2_ops"
-        ivf_config = f"""
+        config = "residual_quantization = true"
+        internal_centroids_cfg = f"""
+        [build.internal]
         lists = {k}
-        residual_quantization = true
+        build_threads = {workers}
         spherical_centroids = false
         """
-    elif metric == "cosine":
+    elif metric == "cos":
         metric_ops = "vector_cosine_ops"
-        ivf_config = f"""
+        config = "residual_quantization = false"
+        internal_centroids_cfg = f"""
+        [build.internal]
         lists = {k}
-        residual_quantization = false
+        build_threads = {workers}
         spherical_centroids = true
         """
-    elif metric == "ip":
+    elif metric == "dot":
         metric_ops = "vector_ip_ops"
-        ivf_config = f"""
+        config = "residual_quantization = false"
+        internal_centroids_cfg = f"""
+        [build.internal]
         lists = {k}
-        residual_quantization = false
+        build_threads = {workers}
         spherical_centroids = true
         """
     else:
         raise ValueError
 
-    if name:
-        ivf_config += external_centroids.format(name=name)
-    return metric_ops, ivf_config
+    build_config = (
+        external_centroids_cfg.format(name=name) if name else internal_centroids_cfg
+    )
+    return metric_ops, "\n".join([config, build_config])
 
 
 async def create_connection(url):
@@ -105,17 +116,19 @@ async def create_connection(url):
 
 
 async def add_centroids(conn, name, centroids):
-    dim = centroids.shape[1]
+    n, dim = centroids.shape
+    root = np.mean(centroids, axis=0)
     await conn.execute(f"DROP TABLE IF EXISTS public.{name}_centroids")
     await conn.execute(
-        f"CREATE TABLE public.{name}_centroids (coordinate vector({dim}))"
+        f"CREATE TABLE public.{name}_centroids (id integer, parent integer, vector vector({dim}))"
     )
     async with conn.cursor().copy(
-        f"COPY public.{name}_centroids (coordinate) FROM STDIN WITH (FORMAT BINARY)"
+        f"COPY public.{name}_centroids (id, parent, vector) FROM STDIN WITH (FORMAT BINARY)"
     ) as copy:
-        copy.set_types(["vector"])
-        for centroid in tqdm(centroids, desc="Adding centroids"):
-            await copy.write_row((centroid,))
+        copy.set_types(["integer", "integer", "vector"])
+        await copy.write_row((0, None, root))
+        for i, centroid in tqdm(enumerate(centroids), desc="Adding centroids", total=n):
+            await copy.write_row((i+1, 0, centroid))
         while conn.pgconn.flush() == 1:
             await asyncio.sleep(0)
 
@@ -152,7 +165,7 @@ async def build_index(
     await conn.execute(f"SET max_parallel_maintenance_workers TO {workers}")
     await conn.execute(f"SET max_parallel_workers TO {workers}")
     await conn.execute(
-        f"CREATE INDEX ON {name} USING vchordrq (embedding {metric_ops}) WITH (options = $${ivf_config}$$)"
+        f"CREATE INDEX {name}_embedding_idx ON {name} USING vchordrq (embedding {metric_ops}) WITH (options = $${ivf_config}$$)"
     )
     print(f"Index build time: {perf_counter() - start_time:.2f}s")
     finish.set()
@@ -189,7 +202,7 @@ async def main(dataset):
         centroids = np.load(args.centroids, allow_pickle=False)
         await add_centroids(conn, args.name, centroids)
     metric_ops, ivf_config = get_ivf_ops_config(
-        args.metric, args.k, args.name if args.centroids else None
+        args.metric, args.workers, args.lists, args.name if args.centroids else None
     )
     await add_embeddings(conn, args.name, args.dim, dataset["train"], args.chunks)
 
diff --git a/bench/train.py b/scripts/train.py
similarity index 93%
rename from bench/train.py
rename to scripts/train.py
index de4c18e..f5bb46a 100644
--- a/bench/train.py
+++ b/scripts/train.py
@@ -15,7 +15,7 @@
 import numpy as np
 from tqdm import tqdm
 
-DEFAULT_K = 4096
+DEFAULT_LISTS = 4096
 N_ITER = 25
 CHUNKS = 10
 SEED = 42
@@ -27,12 +27,14 @@ def build_arg_parse():
     parser.add_argument("-i", "--input", help="input filepath", required=True)
     parser.add_argument("-o", "--output", help="output filepath", required=True)
     parser.add_argument(
-        "-k",
-        help="K-means centroids or lists",
+        "--lists",
+        "--lists-1",
+        help="Number of centroids",
         type=int,
-        default=DEFAULT_K,
+        required=False,
+        default=DEFAULT_LISTS,
     )
-    parser.add_argument("--child-k", type=int, help="lower layer lists (if enabled)")
+    parser.add_argument("--lists-2", type=int, help="lower layer lists (if enabled)")
     parser.add_argument(
         "--niter", help="number of iterations", type=int, default=N_ITER
     )
@@ -125,9 +127,9 @@ def kmeans_cluster(
 ):
     n, dim = data.shape
     if n > MAX_POINTS_PER_CLUSTER * k and not in_memory:
-        train = reservoir_sampling(iter(data), MAX_POINTS_PER_CLUSTER * args.k)
+        train = reservoir_sampling(iter(data), MAX_POINTS_PER_CLUSTER * args.lists)
     elif n > MAX_POINTS_PER_CLUSTER * k and in_memory:
-        reservoir_sampling_np(data, file_path, MAX_POINTS_PER_CLUSTER * args.k, chunks)
+        reservoir_sampling_np(data, file_path, MAX_POINTS_PER_CLUSTER * args.lists, chunks)
         train = np.array(
             np.memmap(
                 "index.mmap",
@@ -186,14 +188,14 @@ def kmeans_cluster(
     centroids = kmeans_cluster(
         dataset["train"],
         args.input,
-        args.k,
-        args.child_k,
+        args.lists,
+        args.lists_2,
         args.niter,
         args.metric,
         args.gpu,
         args.in_memory,
         args.chunks,
     )
-    print(f"K-means (k=({args.k}, {args.child_k})): {perf_counter() - start_time:.2f}s")
+    print(f"K-means (k=({args.lists}, {args.lists_2})): {perf_counter() - start_time:.2f}s")
 
     np.save(Path(args.output), centroids, allow_pickle=False)
diff --git a/src/index/am.rs b/src/index/am.rs
index 7a3cdd9..36a38b3 100644
--- a/src/index/am.rs
+++ b/src/index/am.rs
@@ -365,7 +365,7 @@ impl RabbitholeLeader {
         }
         let pcxt = unsafe {
             pgrx::pg_sys::CreateParallelContext(
-                c"vchordrq".as_ptr(),
+                c"vchord".as_ptr(),
                 c"vchordrq_parallel_build_main".as_ptr(),
                 request,
             )