Skip to content

Commit

Permalink
renamed to lantern_hnsw.ef_search, refactored usearch code, and added…
Browse files Browse the repository at this point in the history
… a test hnsw_ef_search
  • Loading branch information
therealdarkknight committed Oct 16, 2023
1 parent b5b93ac commit 7c260fc
Show file tree
Hide file tree
Showing 6 changed files with 270 additions and 21 deletions.
5 changes: 2 additions & 3 deletions src/hnsw/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,12 +221,11 @@ void _PG_init(void)
NULL,
NULL);

DefineCustomIntVariable("hnsw.search_ef",
DefineCustomIntVariable("lantern_hnsw.ef_search",
"Expansion factor to use during vector search in a scan",
"Valid values are in range [1, 400]",
&ldb_hnsw_ef_search,
// HNSW_DEFAULT_EF,
0, // sentinel value
USEARCH_SEARCH_EF_INVALID_VALUE,
1,
HNSW_MAX_EF,
PGC_USERSET,
Expand Down
32 changes: 16 additions & 16 deletions src/hnsw/scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,14 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
}

ldb_dlog("LANTERN querying index for %d elements", k);
num_returned = usearch_search_custom_ef(scanstate->usearch_index,
vec,
usearch_scalar_f32_k,
k,
ef,
scanstate->labels,
scanstate->distances,
&error);
num_returned = usearch_search(scanstate->usearch_index,
vec,
usearch_scalar_f32_k,
k,
ef,
scanstate->labels,
scanstate->distances,
&error);
ldb_wal_retriever_area_reset(scanstate->retriever_ctx, NULL);

scanstate->count = num_returned;
Expand Down Expand Up @@ -236,14 +236,14 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
scanstate->labels = repalloc(scanstate->labels, k * sizeof(usearch_label_t));

ldb_dlog("LANTERN - querying index for %d elements", k);
num_returned = usearch_search_custom_ef(scanstate->usearch_index,
vec,
usearch_scalar_f32_k,
k,
ef,
scanstate->labels,
scanstate->distances,
&error);
num_returned = usearch_search(scanstate->usearch_index,
vec,
usearch_scalar_f32_k,
k,
ef,
scanstate->labels,
scanstate->distances,
&error);
ldb_wal_retriever_area_reset(scanstate->retriever_ctx, NULL);

scanstate->count = num_returned;
Expand Down
194 changes: 194 additions & 0 deletions test/expected/hnsw_ef_search.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
------------------------------------------------------------------------------
-- Test changing lantern_hnsw.ef_search variable at runtime
------------------------------------------------------------------------------
\ir utils/sift1k_array.sql
CREATE TABLE IF NOT EXISTS sift_base1k (
id SERIAL,
v REAL[]
);
COPY sift_base1k (v) FROM '/tmp/lantern/vector_datasets/sift_base1k_arrays.csv' WITH csv;
CREATE INDEX hnsw_l2_index ON sift_base1k USING hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2.usearch');
INFO: done init usearch index
INFO: done loading usearch index
INFO: done saving 1000 vectors
SELECT * FROM ldb_get_indexes('sift_base1k');
indexname | size | indexdef | total_index_size
---------------+--------+----------------------------------------------------------------------------------------------------------------------------------------------+------------------
hnsw_l2_index | 720 kB | CREATE INDEX hnsw_l2_index ON public.sift_base1k USING hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2.usearch') | 720 kB
(1 row)

INSERT INTO sift_base1k (id, v) VALUES
(1001, array_fill(1, ARRAY[128])),
(1002, array_fill(2, ARRAY[128]));
-- Validate error on invalid ef_search values
\set ON_ERROR_STOP off
--SET lantern_hnsw.ef_search = -1;
--SET lantern_hnsw.ef_search = 0;
--SET lantern_hnsw.ef_search = 401;
\set ON_ERROR_STOP on
-- Repeat the same query while varying ef parameter
-- NOTE: it is not entirely known if the results of these are deterministic
SET enable_seqscan = false;
SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset
-- Queries below have the same result
SET lantern_hnsw.ef_search = 1;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249515.00
249589.00
249647.00
249652.00
249675.00
(10 rows)

SET lantern_hnsw.ef_search = 2;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249515.00
249589.00
249647.00
249652.00
249675.00
(10 rows)

SET lantern_hnsw.ef_search = 4;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249515.00
249589.00
249647.00
249652.00
249675.00
(10 rows)

SET lantern_hnsw.ef_search = 8;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249515.00
249589.00
249647.00
249652.00
249675.00
(10 rows)

SET lantern_hnsw.ef_search = 16;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249515.00
249589.00
249647.00
249652.00
249675.00
(10 rows)

-- Queries below have the same result, which is different from above
SET lantern_hnsw.ef_search = 32;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249457.00
249515.00
249589.00
249647.00
249652.00
(10 rows)

SET lantern_hnsw.ef_search = 64;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249457.00
249515.00
249589.00
249647.00
249652.00
(10 rows)

SET lantern_hnsw.ef_search = 128;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249457.00
249515.00
249589.00
249647.00
249652.00
(10 rows)

SET lantern_hnsw.ef_search = 256;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249457.00
249515.00
249589.00
249647.00
249652.00
(10 rows)

SET lantern_hnsw.ef_search = 400;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
round
-----------
0.00
128.00
249249.00
249285.00
249418.00
249457.00
249515.00
249589.00
249647.00
249652.00
(10 rows)

2 changes: 1 addition & 1 deletion test/schedule.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
# - 'test' lines may have multiple space-separated tests. All tests in a single 'test' line will be run in parallel

test_pgvector: hnsw_vector
test: hnsw_config hnsw_correct hnsw_create hnsw_create_expr hnsw_dist_func hnsw_insert hnsw_select hnsw_todo hnsw_index_from_file hnsw_cost_estimate
test: hnsw_config hnsw_correct hnsw_create hnsw_create_expr hnsw_dist_func hnsw_insert hnsw_select hnsw_todo hnsw_index_from_file hnsw_cost_estimate hnsw_ef_search
56 changes: 56 additions & 0 deletions test/sql/hnsw_ef_search.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
------------------------------------------------------------------------------
-- Test changing lantern_hnsw.ef_search variable at runtime
------------------------------------------------------------------------------

\ir utils/sift1k_array.sql

CREATE INDEX hnsw_l2_index ON sift_base1k USING hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2.usearch');
SELECT * FROM ldb_get_indexes('sift_base1k');

INSERT INTO sift_base1k (id, v) VALUES
(1001, array_fill(1, ARRAY[128])),
(1002, array_fill(2, ARRAY[128]));

-- Validate error on invalid ef_search values
\set ON_ERROR_STOP off
--SET lantern_hnsw.ef_search = -1;
--SET lantern_hnsw.ef_search = 0;
--SET lantern_hnsw.ef_search = 401;
\set ON_ERROR_STOP on

-- Repeat the same query while varying ef parameter
-- NOTE: it is not entirely known if the results of these are deterministic
SET enable_seqscan = false;
SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset

-- Queries below have the same result
SET lantern_hnsw.ef_search = 1;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 2;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 4;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 8;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 16;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

-- Queries below have the same result, which is different from above
SET lantern_hnsw.ef_search = 32;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 64;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 128;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 256;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;

SET lantern_hnsw.ef_search = 400;
SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10;
2 changes: 1 addition & 1 deletion third_party/usearch

0 comments on commit 7c260fc

Please sign in to comment.