Skip to content

Commit

Permalink
Add hnsw.ef_search variable to allow changing ef at runtime (#199)
Browse files Browse the repository at this point in the history
  • Loading branch information
therealdarkknight authored Oct 25, 2023
1 parent e68e849 commit be86766
Show file tree
Hide file tree
Showing 7 changed files with 289 additions and 7 deletions.
16 changes: 15 additions & 1 deletion src/hnsw/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
static relopt_kind ldb_hnsw_index_withopts;

int ldb_hnsw_init_k;
int ldb_hnsw_ef_search;

// this variable is only set during testing and controls whether
// certain elog() calls are made
Expand Down Expand Up @@ -188,7 +189,7 @@ void _PG_init(void)

add_int_reloption(ldb_hnsw_index_withopts,
"ef",
"HNSW ef-construction hyperparameter",
"HNSW ef-search hyperparameter",
HNSW_DEFAULT_EF,
1,
HNSW_MAX_EF
Expand Down Expand Up @@ -220,6 +221,19 @@ void _PG_init(void)
NULL,
NULL);

DefineCustomIntVariable("lantern_hnsw.ef_search",
"Expansion factor to use during vector search in a scan",
"Valid values are in range [1, 400]",
&ldb_hnsw_ef_search,
USEARCH_SEARCH_EF_INVALID_VALUE,
1,
HNSW_MAX_EF,
PGC_USERSET,
0,
NULL,
NULL,
NULL);

DefineCustomBoolVariable("_lantern_internal.is_test",
"Whether or not the DB is in a regression test",
"set this to 1 to enable extra logging for use in lanterndb regression tests",
Expand Down
1 change: 1 addition & 0 deletions src/hnsw/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ usearch_metric_kind_t ldb_HnswGetMetricKind(Relation index);
bytea* ldb_amoptions(Datum reloptions, bool validate);

extern int ldb_hnsw_init_k;
extern int ldb_hnsw_ef_search;
extern bool ldb_is_test;

#endif // LDB_HNSW_OPTIONS_H
22 changes: 18 additions & 4 deletions src/hnsw/scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
// about the furtheest neighbors
Assert(ScanDirectionIsForward(dir));

int ef = ldb_hnsw_ef_search; // 0 if not set, but we pass it into usearch_custom_ef anyway since 0 is also a
// sentinel value there
if(scanstate->first) {
int num_returned;
Datum value;
Expand Down Expand Up @@ -199,8 +201,14 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
k,
"index size exceeded work_mem during scan, consider increasing work_mem");
ldb_dlog("LANTERN querying index for %d elements", k);
num_returned = usearch_search(
scanstate->usearch_index, vec, usearch_scalar_f32_k, k, scanstate->labels, scanstate->distances, &error);
num_returned = usearch_search(scanstate->usearch_index,
vec,
usearch_scalar_f32_k,
k,
ef,
scanstate->labels,
scanstate->distances,
&error);
ldb_wal_retriever_area_reset(scanstate->retriever_ctx, NULL);

scanstate->count = num_returned;
Expand Down Expand Up @@ -240,8 +248,14 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
"index size exceeded work_mem during scan, consider increasing work_mem");

ldb_dlog("LANTERN - querying index for %d elements", k);
num_returned = usearch_search(
scanstate->usearch_index, vec, usearch_scalar_f32_k, k, scanstate->labels, scanstate->distances, &error);
num_returned = usearch_search(scanstate->usearch_index,
vec,
usearch_scalar_f32_k,
k,
ef,
scanstate->labels,
scanstate->distances,
&error);
ldb_wal_retriever_area_reset(scanstate->retriever_ctx, NULL);

scanstate->count = num_returned;
Expand Down
197 changes: 197 additions & 0 deletions test/expected/hnsw_ef_search.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
------------------------------------------------------------------------------
-- Test changing lantern_hnsw.ef_search variable at runtime
------------------------------------------------------------------------------
\ir utils/sift1k_array.sql
CREATE TABLE IF NOT EXISTS sift_base1k (
id SERIAL,
v REAL[]
);
COPY sift_base1k (v) FROM '/tmp/lantern/vector_datasets/sift_base1k_arrays.csv' WITH csv;
CREATE INDEX hnsw_l2_index ON sift_base1k USING hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2.usearch');
INFO: done init usearch index
INFO: done loading usearch index
INFO: done saving 1000 vectors
SELECT * FROM ldb_get_indexes('sift_base1k');
indexname | size | indexdef | total_index_size
---------------+--------+----------------------------------------------------------------------------------------------------------------------------------------------+------------------
hnsw_l2_index | 720 kB | CREATE INDEX hnsw_l2_index ON public.sift_base1k USING hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2.usearch') | 720 kB
(1 row)

INSERT INTO sift_base1k (id, v) VALUES
(1001, array_fill(1, ARRAY[128])),
(1002, array_fill(2, ARRAY[128]));
-- Validate error on invalid ef_search values
\set ON_ERROR_STOP off
SET lantern_hnsw.ef_search = -1;
ERROR: -1 is outside the valid range for parameter "lantern_hnsw.ef_search" (1 .. 400)
SET lantern_hnsw.ef_search = 0;
ERROR: 0 is outside the valid range for parameter "lantern_hnsw.ef_search" (1 .. 400)
SET lantern_hnsw.ef_search = 401;
ERROR: 401 is outside the valid range for parameter "lantern_hnsw.ef_search" (1 .. 400)
\set ON_ERROR_STOP on
-- Repeat the same query while varying ef parameter
-- NOTE: it is not entirely known if the results of these are deterministic
SET enable_seqscan = false;
SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset
-- Queries below have the same result
SET lantern_hnsw.ef_search = 1;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249515.00
249589.00
249647.00
249652.00
249675.00
(10 rows)

SET lantern_hnsw.ef_search = 2;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249515.00
249589.00
249647.00
249652.00
249675.00
(10 rows)

SET lantern_hnsw.ef_search = 4;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249515.00
249589.00
249647.00
249652.00
249675.00
(10 rows)

SET lantern_hnsw.ef_search = 8;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249515.00
249589.00
249647.00
249652.00
249675.00
(10 rows)

SET lantern_hnsw.ef_search = 16;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249515.00
249589.00
249647.00
249652.00
249675.00
(10 rows)

-- Queries below have the same result, which is different from above
SET lantern_hnsw.ef_search = 32;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249457.00
249515.00
249589.00
249647.00
249652.00
(10 rows)

SET lantern_hnsw.ef_search = 64;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249457.00
249515.00
249589.00
249647.00
249652.00
(10 rows)

SET lantern_hnsw.ef_search = 128;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249457.00
249515.00
249589.00
249647.00
249652.00
(10 rows)

SET lantern_hnsw.ef_search = 256;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249457.00
249515.00
249589.00
249647.00
249652.00
(10 rows)

SET lantern_hnsw.ef_search = 400;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249457.00
249515.00
249589.00
249647.00
249652.00
(10 rows)

2 changes: 1 addition & 1 deletion test/schedule.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
# - 'test' lines may have multiple space-separated tests. All tests in a single 'test' line will be run in parallel

test_pgvector: hnsw_vector
test: hnsw_config hnsw_correct hnsw_create hnsw_create_expr hnsw_dist_func hnsw_insert hnsw_select hnsw_todo hnsw_index_from_file hnsw_cost_estimate ext_relocation
test: hnsw_config hnsw_correct hnsw_create hnsw_create_expr hnsw_dist_func hnsw_insert hnsw_select hnsw_todo hnsw_index_from_file hnsw_cost_estimate ext_relocation hnsw_ef_search
56 changes: 56 additions & 0 deletions test/sql/hnsw_ef_search.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
------------------------------------------------------------------------------
-- Test changing lantern_hnsw.ef_search variable at runtime
------------------------------------------------------------------------------

\ir utils/sift1k_array.sql

CREATE INDEX hnsw_l2_index ON sift_base1k USING hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2.usearch');
SELECT * FROM ldb_get_indexes('sift_base1k');

INSERT INTO sift_base1k (id, v) VALUES
(1001, array_fill(1, ARRAY[128])),
(1002, array_fill(2, ARRAY[128]));

-- Validate error on invalid ef_search values
\set ON_ERROR_STOP off
SET lantern_hnsw.ef_search = -1;
SET lantern_hnsw.ef_search = 0;
SET lantern_hnsw.ef_search = 401;
\set ON_ERROR_STOP on

-- Repeat the same query while varying ef parameter
-- NOTE: it is not entirely known if the results of these are deterministic
SET enable_seqscan = false;
SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset

-- Queries below have the same result
SET lantern_hnsw.ef_search = 1;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 2;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 4;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 8;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 16;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

-- Queries below have the same result, which is different from above
SET lantern_hnsw.ef_search = 32;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 64;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 128;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 256;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 400;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
2 changes: 1 addition & 1 deletion third_party/usearch

0 comments on commit be86766

Please sign in to comment.