diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f950b8bc..5b6e93e1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.3) -set(LANTERNDB_VERSION 0.0.9) +set(LANTERNDB_VERSION 0.0.10) project( LanternDB @@ -190,6 +190,7 @@ set (_update_files sql/updates/0.0.6--0.0.7.sql sql/updates/0.0.7--0.0.8.sql sql/updates/0.0.8--0.0.9.sql + sql/updates/0.0.9--0.0.10.sql ) add_custom_command( diff --git a/README.md b/README.md index b9d22d1e7..904c0eebe 100644 --- a/README.md +++ b/README.md @@ -89,9 +89,9 @@ Lantern supports several distance functions in the index and it has 2 modes for Note that in this mode, you need to use right operator in order to trigger an index scan. 2. `lantern.pgvector_compat=FALSE` - In this mode you only need to specify the distance function used for a column at index creation time. Lantern will automatically infer the distance function to use for search so you always use `<->` operator in search queries. + In this mode you only need to specify the distance function used for a column at index creation time. Lantern will automatically infer the distance function to use for search so you always use `` operator in search queries. - Note that in this mode, the operator `<->` is intended exclusively for use with index lookups. If you expect to not use the index in a query, use the distance function directly (e.g. `l2sq_dist(v1, v2)`) + Note that in this mode, the operator `` is intended exclusively for use with index lookups. If you expect to not use the index in a query, use the distance function directly (e.g. `l2sq_dist(v1, v2)`) > To switch between modes set `lantern.pgvector_compat` variable to `TRUE` or `FALSE`. @@ -102,7 +102,6 @@ There are four defined operator classes that can be employed during index creati - **`dist_cos_ops`**: Applicable to the type `real[]` - **`dist_vec_cos_ops`**: Applicable to the type `vector` - **`dist_hamming_ops`**: Applicable to the type `integer[]` -- **`dist_vec_hamming_ops`**: Applicable to the type `vector` ### Index Construction Parameters diff --git a/scripts/test_updates.py b/scripts/test_updates.py index 5a6f3709f..5a2918c24 100644 --- a/scripts/test_updates.py +++ b/scripts/test_updates.py @@ -3,6 +3,7 @@ import getpass import git import os +from functools import cmp_to_key INCOMPATIBLE_VERSIONS = { @@ -37,8 +38,7 @@ def update_from_tag(from_version: str, to_version: str): res = shell(f"psql postgres -U {args.user} -c 'CREATE DATABASE {args.db};'") res = shell(f"psql postgres -U {args.user} -c 'DROP EXTENSION IF EXISTS lantern CASCADE; CREATE EXTENSION lantern;' -d {args.db};") - # run begin of parallel tests. Run this while the from_tag version of the binary is installed and loaded - # run begin on {from_version} + # run begin of parallel tests. Run this while the from_tag version of the binary is installed and loaded run begin on {from_version} if from_tag != "v0.0.4": # the source code at 0.0.4 did not yet have parallel tests res = shell(f"cd {args.builddir} ; UPDATE_EXTENSION=1 UPDATE_FROM={from_version} UPDATE_TO={from_version} make test-parallel FILTER=begin") @@ -59,6 +59,12 @@ def incompatible_version(pg_version, version_tag): return False return version_tag in INCOMPATIBLE_VERSIONS[pg_version] +def sort_versions(v1, v2): + a = int(v1.replace('.', '')) + b = int(v2.replace('.', '')) + + return a - b + if __name__ == "__main__": default_user = getpass.getuser() @@ -88,8 +94,9 @@ def incompatible_version(pg_version, version_tag): # test updates from all tags tag_pairs = [update_fname.split("--") for update_fname in os.listdir("sql/updates")] - from_tags = list(sorted([p[0] for p in tag_pairs], reverse=True)) - to_tags = list(sorted([p[1].split(".sql")[0] for p in tag_pairs])) + from_tags = list(sorted([p[0] for p in tag_pairs], key=cmp_to_key(sort_versions))) + from_tags.reverse() + to_tags = list(sorted([p[1].split(".sql")[0] for p in tag_pairs], key=cmp_to_key(sort_versions))) latest_version = to_tags[-1] print("Updating from tags", from_tags, "to ", latest_version) diff --git a/sql/lantern.sql b/sql/lantern.sql index 0f8e7cbb5..ab4093deb 100644 --- a/sql/lantern.sql +++ b/sql/lantern.sql @@ -6,48 +6,41 @@ CREATE FUNCTION hnsw_handler(internal) RETURNS index_am_handler CREATE FUNCTION ldb_generic_dist(real[], real[]) RETURNS real AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; -CREATE FUNCTION l2sq_dist(real[], real[]) RETURNS real +CREATE FUNCTION ldb_generic_dist(integer[], integer[]) RETURNS real AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; - --- this function is needed, as we should also use <-> operator --- with integer[] type (to overwrite hamming dist function in our hooks) --- and if we do not create l2sq_dist for integer[] type it will fail to cast in pgvector_compat mode -CREATE FUNCTION l2sq_dist(integer[], integer[]) RETURNS real + +CREATE FUNCTION l2sq_dist(real[], real[]) RETURNS real AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION cos_dist(real[], real[]) RETURNS real AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; --- functions _with_guard suffix are used to forbid operator usage --- if operator hooks are enabled (lantern.pgvector_compat=FALSE) -CREATE FUNCTION cos_dist_with_guard(real[], real[]) RETURNS real - AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; - CREATE FUNCTION hamming_dist(integer[], integer[]) RETURNS integer AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; - - -CREATE FUNCTION hamming_dist_with_guard(integer[], integer[]) RETURNS integer - AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; -- operators -CREATE OPERATOR <-> ( - LEFTARG = real[], RIGHTARG = real[], PROCEDURE = l2sq_dist, - COMMUTATOR = '<->' +CREATE OPERATOR ( + LEFTARG = real[], RIGHTARG = real[], PROCEDURE = ldb_generic_dist, + COMMUTATOR = '' +); + +CREATE OPERATOR ( + LEFTARG = integer[], RIGHTARG = integer[], PROCEDURE = ldb_generic_dist, + COMMUTATOR = '' ); CREATE OPERATOR <-> ( - LEFTARG = integer[], RIGHTARG = integer[], PROCEDURE = l2sq_dist, + LEFTARG = real[], RIGHTARG = real[], PROCEDURE = l2sq_dist, COMMUTATOR = '<->' ); CREATE OPERATOR <=> ( - LEFTARG = real[], RIGHTARG = real[], PROCEDURE = cos_dist_with_guard, + LEFTARG = real[], RIGHTARG = real[], PROCEDURE = cos_dist, COMMUTATOR = '<=>' ); CREATE OPERATOR <+> ( - LEFTARG = integer[], RIGHTARG = integer[], PROCEDURE = hamming_dist_with_guard, + LEFTARG = integer[], RIGHTARG = integer[], PROCEDURE = hamming_dist, COMMUTATOR = '<+>' ); @@ -74,28 +67,28 @@ BEGIN dist_l2sq_ops := ' CREATE OPERATOR CLASS dist_l2sq_ops DEFAULT FOR TYPE real[] USING ' || access_method_name || ' AS - OPERATOR 1 <-> (real[], real[]) FOR ORDER BY float_ops, - FUNCTION 1 l2sq_dist(real[], real[]); + OPERATOR 1 (real[], real[]) FOR ORDER BY float_ops, + FUNCTION 1 l2sq_dist(real[], real[]), + OPERATOR 2 <-> (real[], real[]) FOR ORDER BY float_ops, + FUNCTION 2 l2sq_dist(real[], real[]); '; dist_cos_ops := ' CREATE OPERATOR CLASS dist_cos_ops FOR TYPE real[] USING ' || access_method_name || ' AS - OPERATOR 1 <-> (real[], real[]) FOR ORDER BY float_ops, + OPERATOR 1 (real[], real[]) FOR ORDER BY float_ops, FUNCTION 1 cos_dist(real[], real[]), - -- it is important to set the function with guard the second - -- as op rewriting hook takes the first function to use OPERATOR 2 <=> (real[], real[]) FOR ORDER BY float_ops, - FUNCTION 2 cos_dist_with_guard(real[], real[]); + FUNCTION 2 cos_dist(real[], real[]); '; dist_hamming_ops := ' CREATE OPERATOR CLASS dist_hamming_ops FOR TYPE integer[] USING ' || access_method_name || ' AS - OPERATOR 1 <-> (integer[], integer[]) FOR ORDER BY float_ops, + OPERATOR 1 (integer[], integer[]) FOR ORDER BY float_ops, FUNCTION 1 hamming_dist(integer[], integer[]), OPERATOR 2 <+> (integer[], integer[]) FOR ORDER BY integer_ops, - FUNCTION 2 hamming_dist_with_guard(integer[], integer[]); + FUNCTION 2 hamming_dist(integer[], integer[]); '; -- Execute the dynamic SQL statement. @@ -142,16 +135,25 @@ BEGIN CREATE FUNCTION cos_dist(vector, vector) RETURNS float8 AS 'MODULE_PATHNAME', 'vector_cos_dist' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; - -- pgvecor's vector type requires floats and we cannot define hamming distance for floats + CREATE FUNCTION ldb_generic_dist(vector, vector) RETURNS real + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + + CREATE OPERATOR ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = ldb_generic_dist, + COMMUTATOR = '' + ); + -- pgvecor's vector type requires floats and we cannot define hamming distance for floats CREATE OPERATOR CLASS dist_vec_l2sq_ops DEFAULT FOR TYPE vector USING lantern_hnsw AS - OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, - FUNCTION 1 l2sq_dist(vector, vector); + OPERATOR 1 (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 l2sq_dist(vector, vector), + OPERATOR 2 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 2 l2sq_dist(vector, vector); CREATE OPERATOR CLASS dist_vec_cos_ops FOR TYPE vector USING lantern_hnsw AS - OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, + OPERATOR 1 (vector, vector) FOR ORDER BY float_ops, FUNCTION 1 cos_dist(vector, vector), OPERATOR 2 <=> (vector, vector) FOR ORDER BY float_ops, FUNCTION 2 cos_dist(vector, vector); diff --git a/sql/updates/0.0.9--0.0.10.sql b/sql/updates/0.0.9--0.0.10.sql index 721bd3214..24208da9b 100644 --- a/sql/updates/0.0.9--0.0.10.sql +++ b/sql/updates/0.0.9--0.0.10.sql @@ -1,5 +1,177 @@ --- these go for good. +-- +DO $BODY$ +DECLARE + pgvector_exists boolean; + am_name TEXT; + r pg_indexes%ROWTYPE; + indexes_cursor REFCURSOR; + index_names TEXT[] := '{}'; + index_definitions TEXT[] := '{}'; +BEGIN + -- Function to recreate operator classes for specified access method + CREATE OR REPLACE FUNCTION _lantern_internal._recreate_ldb_operator_classes(access_method_name TEXT) RETURNS BOOLEAN AS $$ + DECLARE + dist_l2sq_ops TEXT; + dist_l2sq_ops_drop TEXT; + dist_cos_ops TEXT; + dist_cos_ops_drop TEXT; + dist_hamming_ops TEXT; + dist_hamming_ops_drop TEXT; + BEGIN + + -- Construct the SQL statement to create the operator classes dynamically. + dist_l2sq_ops_drop := 'DROP OPERATOR CLASS IF EXISTS dist_l2sq_ops USING ' || access_method_name || ' CASCADE;'; + dist_l2sq_ops := ' + CREATE OPERATOR CLASS dist_l2sq_ops + DEFAULT FOR TYPE real[] USING ' || access_method_name || ' AS + OPERATOR 1 (real[], real[]) FOR ORDER BY float_ops, + FUNCTION 1 l2sq_dist(real[], real[]), + OPERATOR 2 <-> (real[], real[]) FOR ORDER BY float_ops, + FUNCTION 2 l2sq_dist(real[], real[]); + '; + + dist_cos_ops_drop := 'DROP OPERATOR CLASS IF EXISTS dist_cos_ops USING ' || access_method_name || ' CASCADE;'; + dist_cos_ops := ' + CREATE OPERATOR CLASS dist_cos_ops + FOR TYPE real[] USING ' || access_method_name || ' AS + OPERATOR 1 (real[], real[]) FOR ORDER BY float_ops, + FUNCTION 1 cos_dist(real[], real[]), + OPERATOR 2 <=> (real[], real[]) FOR ORDER BY float_ops, + FUNCTION 2 cos_dist(real[], real[]); + '; + + + dist_hamming_ops_drop := 'DROP OPERATOR CLASS IF EXISTS dist_hamming_ops USING ' || access_method_name || ' CASCADE;'; + dist_hamming_ops := ' + CREATE OPERATOR CLASS dist_hamming_ops + FOR TYPE integer[] USING ' || access_method_name || ' AS + OPERATOR 1 (integer[], integer[]) FOR ORDER BY float_ops, + FUNCTION 1 hamming_dist(integer[], integer[]), + OPERATOR 2 <+> (integer[], integer[]) FOR ORDER BY integer_ops, + FUNCTION 2 hamming_dist(integer[], integer[]); + '; + + + -- Execute the dynamic SQL statement. + EXECUTE dist_l2sq_ops_drop; + EXECUTE dist_l2sq_ops; + EXECUTE dist_cos_ops_drop; + EXECUTE dist_cos_ops; + EXECUTE dist_hamming_ops_drop; + EXECUTE dist_hamming_ops; + + RETURN TRUE; + END; + $$ LANGUAGE plpgsql VOLATILE; + + -- Check if the vector type from pgvector exists + SELECT EXISTS ( + SELECT 1 + FROM pg_type + WHERE typname = 'vector' + ) INTO pgvector_exists; + + am_name := 'hnsw'; + + + IF pgvector_exists THEN + am_name := 'lantern_hnsw'; + -- these go for good. + DROP OPERATOR CLASS IF EXISTS dist_vec_hamming_ops USING lantern_hnsw CASCADE; + DROP FUNCTION IF EXISTS hamming_dist(vector, vector); + DROP OPERATOR <+> (vector, vector) CASCADE; + END IF; + + + -- keep existing indexes to reindex as we should drop indexes in order to change operator classes + OPEN indexes_cursor FOR SELECT * FROM pg_indexes WHERE indexdef ILIKE '%USING ' || am_name || '%'; + -- Fetch index names into the array + LOOP + FETCH indexes_cursor INTO r; + EXIT WHEN NOT FOUND; + + -- Append index name to the array + index_names := array_append(index_names, r.indexname); + index_definitions := array_append(index_definitions, r.indexdef); + END LOOP; + + CLOSE indexes_cursor; + + IF pgvector_exists THEN + CREATE FUNCTION ldb_generic_dist(vector, vector) RETURNS real + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + + + CREATE OPERATOR ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = ldb_generic_dist, + COMMUTATOR = '' + ); + + -- pgvecor's vector type requires floats and we cannot define hamming distance for floats + CREATE OPERATOR CLASS dist_vec_l2sq_ops + DEFAULT FOR TYPE vector USING lantern_hnsw AS + OPERATOR 1 (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 l2sq_dist(vector, vector), + OPERATOR 2 <-> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 2 l2sq_dist(vector, vector); + + CREATE OPERATOR CLASS dist_vec_cos_ops + FOR TYPE vector USING lantern_hnsw AS + OPERATOR 1 (vector, vector) FOR ORDER BY float_ops, + FUNCTION 1 cos_dist(vector, vector), + OPERATOR 2 <=> (vector, vector) FOR ORDER BY float_ops, + FUNCTION 2 cos_dist(vector, vector); + + am_name := 'lantern_hnsw'; + END IF; + + -- operators + DROP OPERATOR <->(integer[], integer[]) CASCADE; + DROP OPERATOR <->(real[], real[]) CASCADE; + DROP OPERATOR <=>(real[], real[]) CASCADE; + DROP OPERATOR <+>(integer[], integer[]) CASCADE; + + DROP FUNCTION IF EXISTS cos_dist_with_guard CASCADE; + DROP FUNCTION IF EXISTS hamming_dist_with_guard CASCADE; + + CREATE OR REPLACE FUNCTION ldb_generic_dist(integer[], integer[]) RETURNS real + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + + CREATE OPERATOR ( + LEFTARG = real[], RIGHTARG = real[], PROCEDURE = ldb_generic_dist, + COMMUTATOR = '' + ); + + CREATE OPERATOR ( + LEFTARG = integer[], RIGHTARG = integer[], PROCEDURE = ldb_generic_dist, + COMMUTATOR = '' + ); + + CREATE OPERATOR <-> ( + LEFTARG = real[], RIGHTARG = real[], PROCEDURE = l2sq_dist, + COMMUTATOR = '<->' + ); + + CREATE OPERATOR <=> ( + LEFTARG = real[], RIGHTARG = real[], PROCEDURE = cos_dist, + COMMUTATOR = '<=>' + ); + + CREATE OPERATOR <+> ( + LEFTARG = integer[], RIGHTARG = integer[], PROCEDURE = hamming_dist, + COMMUTATOR = '<+>' + ); + + PERFORM _lantern_internal._recreate_ldb_operator_classes(am_name); + + SET client_min_messages TO NOTICE; + -- reindex indexes + FOR i IN 1..coalesce(array_length(index_names, 1), 0) LOOP + RAISE NOTICE 'Reindexing index %', index_names[i]; + EXECUTE index_definitions[i]; + RAISE NOTICE 'Reindexed index: %', index_names[i]; + END LOOP; +END; +$BODY$ +LANGUAGE plpgsql; -DROP OPERATOR CLASS IF EXISTS dist_vec_hamming_ops USING hnsw CASCADE; -DROP FUNCTION IF EXISTS cos_dist(vector, vector); -DROP OPERATOR <+>(vector, vector) CASCADE \ No newline at end of file diff --git a/src/hnsw.c b/src/hnsw.c index ce913b9c3..d47656e41 100644 --- a/src/hnsw.c +++ b/src/hnsw.c @@ -327,16 +327,15 @@ static float8 vector_dist(Vector *a, Vector *b, usearch_metric_kind_t metric_kin return usearch_dist(a->x, b->x, metric_kind, a->dim, usearch_scalar_f32_k); } -static void pgvector_compat_guard() +PGDLLEXPORT PG_FUNCTION_INFO_V1(ldb_generic_dist); +Datum ldb_generic_dist(PG_FUNCTION_ARGS) { - if(!ldb_pgvector_compat) { - elog(ERROR, "Operator can only be used when lantern.pgvector_compat=TRUE"); + if(ldb_pgvector_compat) { + elog(ERROR, "Operator can only be used when lantern.pgvector_compat=FALSE"); } + PG_RETURN_NULL(); } -PGDLLEXPORT PG_FUNCTION_INFO_V1(ldb_generic_dist); -Datum ldb_generic_dist(PG_FUNCTION_ARGS) { PG_RETURN_NULL(); } - PGDLLEXPORT PG_FUNCTION_INFO_V1(l2sq_dist); Datum l2sq_dist(PG_FUNCTION_ARGS) { @@ -353,15 +352,6 @@ Datum cos_dist(PG_FUNCTION_ARGS) PG_RETURN_FLOAT4(array_dist(a, b, usearch_metric_cos_k)); } -PGDLLEXPORT PG_FUNCTION_INFO_V1(cos_dist_with_guard); -Datum cos_dist_with_guard(PG_FUNCTION_ARGS) -{ - pgvector_compat_guard(); - ArrayType *a = PG_GETARG_ARRAYTYPE_P(0); - ArrayType *b = PG_GETARG_ARRAYTYPE_P(1); - PG_RETURN_FLOAT4(array_dist(a, b, usearch_metric_cos_k)); -} - PGDLLEXPORT PG_FUNCTION_INFO_V1(hamming_dist); Datum hamming_dist(PG_FUNCTION_ARGS) { @@ -370,14 +360,16 @@ Datum hamming_dist(PG_FUNCTION_ARGS) PG_RETURN_INT32((int32)array_dist(a, b, usearch_metric_hamming_k)); } +// The guard functions are not used anymore +// They are left for updates from <0.0.9 to >0.0.9 to work +// As in update 0.0.9 it will try to create _guard functions +// And will fail if the corresponding functions will not exist in C +// This can happen for example when updating from v0.0.8 to v0.0.10 PGDLLEXPORT PG_FUNCTION_INFO_V1(hamming_dist_with_guard); -Datum hamming_dist_with_guard(PG_FUNCTION_ARGS) -{ - pgvector_compat_guard(); - ArrayType *a = PG_GETARG_ARRAYTYPE_P(0); - ArrayType *b = PG_GETARG_ARRAYTYPE_P(1); - PG_RETURN_INT32((int32)array_dist(a, b, usearch_metric_hamming_k)); -} +Datum hamming_dist_with_guard(PG_FUNCTION_ARGS) { PG_RETURN_NULL(); } + +PGDLLEXPORT PG_FUNCTION_INFO_V1(cos_dist_with_guard); +Datum cos_dist_with_guard(PG_FUNCTION_ARGS) { PG_RETURN_NULL(); } PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_l2sq_dist); Datum vector_l2sq_dist(PG_FUNCTION_ARGS) @@ -397,15 +389,6 @@ Datum vector_cos_dist(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8((double)vector_dist(a, b, usearch_metric_cos_k)); } -PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_hamming_dist); -Datum vector_hamming_dist(PG_FUNCTION_ARGS) -{ - Vector *a = PG_GETARG_VECTOR_P(0); - Vector *b = PG_GETARG_VECTOR_P(1); - - PG_RETURN_FLOAT8((double)vector_dist(a, b, usearch_metric_hamming_k)); -} - PGDLLEXPORT PG_FUNCTION_INFO_V1(lantern_internal_validate_index); Datum lantern_internal_validate_index(PG_FUNCTION_ARGS) { diff --git a/src/hnsw.h b/src/hnsw.h index e6d9b58f2..c20911c9a 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -34,7 +34,6 @@ PGDLLEXPORT Datum hamming_dist_with_guard(PG_FUNCTION_ARGS); PGDLLEXPORT Datum cos_dist(PG_FUNCTION_ARGS); PGDLLEXPORT Datum cos_dist_with_guard(PG_FUNCTION_ARGS); PGDLLEXPORT Datum vector_cos_dist(PG_FUNCTION_ARGS); -PGDLLEXPORT Datum vector_hamming_dist(PG_FUNCTION_ARGS); HnswColumnType GetColumnTypeFromOid(Oid oid); HnswColumnType GetIndexColumnType(Relation index); diff --git a/src/hnsw/options.c b/src/hnsw/options.c index bbd99d823..2c8f2e7fb 100644 --- a/src/hnsw/options.c +++ b/src/hnsw/options.c @@ -95,7 +95,7 @@ usearch_metric_kind_t ldb_HnswGetMetricKind(Relation index) if(fnaddr == l2sq_dist || fnaddr == vector_l2sq_dist) { return usearch_metric_l2sq_k; - } else if(fnaddr == hamming_dist || fnaddr == vector_hamming_dist) { + } else if(fnaddr == hamming_dist) { return usearch_metric_hamming_k; } else if(fnaddr == cos_dist || fnaddr == vector_cos_dist) { return usearch_metric_cos_k; diff --git a/src/hooks/utils.c b/src/hooks/utils.c index bf263cecb..c46b98dc0 100644 --- a/src/hooks/utils.c +++ b/src/hooks/utils.c @@ -9,7 +9,7 @@ List *ldb_get_operator_oids() { List *oidList = NIL; - List *nameList = lappend(NIL, makeString("<->")); + List *nameList = lappend(NIL, makeString("")); Oid intOperator = LookupOperName(NULL, nameList, INT4ARRAYOID, INT4ARRAYOID, true, -1); Oid floatOperator = LookupOperName(NULL, nameList, FLOAT4ARRAYOID, FLOAT4ARRAYOID, true, -1); diff --git a/test/expected/ext_relocation.out b/test/expected/ext_relocation.out index 804973918..0275315b0 100644 --- a/test/expected/ext_relocation.out +++ b/test/expected/ext_relocation.out @@ -40,14 +40,12 @@ ORDER BY 1, 3, 2; schema1 | reindex_lantern_indexes | _lantern_internal schema1 | validate_index | _lantern_internal schema1 | cos_dist | schema1 - schema1 | cos_dist_with_guard | schema1 schema1 | hamming_dist | schema1 - schema1 | hamming_dist_with_guard | schema1 schema1 | hnsw_handler | schema1 schema1 | l2sq_dist | schema1 - schema1 | l2sq_dist | schema1 schema1 | ldb_generic_dist | schema1 -(13 rows) + schema1 | ldb_generic_dist | schema1 +(11 rows) -- show all the extension operators SELECT ne.nspname AS extschema, op.oprname, np.nspname AS proschema @@ -60,11 +58,12 @@ WHERE d.deptype = 'e' AND e.extname = 'lantern' ORDER BY 1, 3; extschema | oprname | proschema -----------+---------+----------- - schema1 | <-> | schema1 + schema1 | | schema1 + schema1 | | schema1 schema1 | <-> | schema1 schema1 | <=> | schema1 schema1 | <+> | schema1 -(4 rows) +(5 rows) SET search_path TO public, schema1; -- extension function is accessible @@ -105,11 +104,12 @@ WHERE d.deptype = 'e' AND e.extname = 'lantern' ORDER BY 1, 3; extschema | oprname | proschema -----------+---------+----------- - schema1 | <-> | schema1 + schema1 | | schema1 + schema1 | | schema1 schema1 | <-> | schema1 schema1 | <=> | schema1 schema1 | <+> | schema1 -(4 rows) +(5 rows) SET search_path TO public, schema2; --extension access method is still accessible since access methods are not schema-qualified diff --git a/test/expected/hnsw_correct.out b/test/expected/hnsw_correct.out index 05eb5483d..b0a4fad0b 100644 --- a/test/expected/hnsw_correct.out +++ b/test/expected/hnsw_correct.out @@ -12,7 +12,8 @@ CREATE INDEX ON small_world USING hnsw (v dist_l2sq_ops) WITH (dim=2, M=4); INFO: done init usearch index INFO: inserted 4 elements INFO: done saving 4 vectors -SET enable_seqscan = false; +SET enable_seqscan=FALSE; +SET lantern.pgvector_compat=FALSE; -- Get the results without the index CREATE TEMP TABLE results_wo_index AS SELECT @@ -24,7 +25,7 @@ FROM -- Get the results with the index CREATE TEMP TABLE results_w_index AS SELECT - ROW_NUMBER() OVER (ORDER BY v <-> '{0,0}') AS row_num, + ROW_NUMBER() OVER (ORDER BY v '{0,0}') AS row_num, id, l2sq_dist(v, '{0,0}') AS dist FROM diff --git a/test/expected/hnsw_cost_estimate.out b/test/expected/hnsw_cost_estimate.out index 7f91ce2e9..30f51080f 100644 --- a/test/expected/hnsw_cost_estimate.out +++ b/test/expected/hnsw_cost_estimate.out @@ -44,10 +44,11 @@ BEGIN RETURN is_within_error(get_cost_estimate(explain_query), expected_cost, error_margin); END; $$ LANGUAGE plpgsql; +SET lantern.pgvector_compat=FALSE; -- Goal: make sure query cost estimate is accurate -- when index is created with varying costruction parameters. SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -\set explain_query_template 'EXPLAIN SELECT * FROM sift_base10k ORDER BY v <-> ''%s'' LIMIT 10' +\set explain_query_template 'EXPLAIN SELECT * FROM sift_base10k ORDER BY v ''%s'' LIMIT 10' \set enable_seqscan = off; -- Case 0, sanity check. No data. CREATE TABLE empty_table(id SERIAL PRIMARY KEY, v REAL[2]); @@ -56,7 +57,7 @@ INFO: done init usearch index INFO: inserted 0 elements INFO: done saving 0 vectors SET _lantern_internal.is_test = true; -SELECT is_cost_estimate_within_error('EXPLAIN SELECT * FROM empty_table ORDER BY v <-> ''{1,2}'' LIMIT 10', 0.47); +SELECT is_cost_estimate_within_error('EXPLAIN SELECT * FROM empty_table ORDER BY v ''{1,2}'' LIMIT 10', 0.47); DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- DEBUG: LANTERN - Total cost: 4.225000 diff --git a/test/expected/hnsw_create.out b/test/expected/hnsw_create.out index 84b77c3fd..2d866705e 100644 --- a/test/expected/hnsw_create.out +++ b/test/expected/hnsw_create.out @@ -49,17 +49,18 @@ CREATE TABLE IF NOT EXISTS sift_base10k ( v REAL[128] ); \copy sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' with csv; +SET lantern.pgvector_compat=FALSE; CREATE INDEX hnsw_idx ON sift_base10k USING hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); INFO: done init usearch index INFO: inserted 10000 elements INFO: done saving 10000 vectors SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v <-> :'v4444' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444' LIMIT 10; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Limit -> Index Scan using hnsw_idx on sift_base10k - Order By: (v <-> '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) + Order By: (v '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) (3 rows) SELECT _lantern_internal.validate_index('hnsw_idx', false); diff --git a/test/expected/hnsw_create_expr.out b/test/expected/hnsw_create_expr.out index 690beb7bd..ee2141821 100644 --- a/test/expected/hnsw_create_expr.out +++ b/test/expected/hnsw_create_expr.out @@ -63,6 +63,8 @@ $$ LANGUAGE plpgsql IMMUTABLE; CREATE TABLE test_table (id INTEGER); INSERT INTO test_table VALUES (0), (1), (7); \set enable_seqscan = off; +SET enable_seqscan = false; +SET lantern.pgvector_compat=FALSE; -- This should success CREATE INDEX ON test_table USING hnsw (int_to_fixed_binary_real_array(id)) WITH (M=2); INFO: done init usearch index @@ -87,7 +89,7 @@ ERROR: data type text has no default operator class for access method "hnsw" -- This should result in error about multicolumn expressions support CREATE INDEX ON test_table USING hnsw (int_to_fixed_binary_real_array(id), int_to_dynamic_binary_real_array(id)) WITH (M=2); ERROR: access method "hnsw" does not support multicolumn indexes -SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) <-> '{0,0,0}'::REAL[] LIMIT 2; +SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) '{0,0,0}'::REAL[] LIMIT 2; id ---- 0 diff --git a/test/expected/hnsw_dist_func.out b/test/expected/hnsw_dist_func.out index ee0d9be57..04b0578d4 100644 --- a/test/expected/hnsw_dist_func.out +++ b/test/expected/hnsw_dist_func.out @@ -37,7 +37,7 @@ INSERT INTO small_world_ham SELECT id, ARRAY[CAST(v[1] AS INTEGER), CAST(v[2] AS SET enable_seqscan=FALSE; SET lantern.pgvector_compat=FALSE; -- Verify that the distance functions work (check distances) -SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v <-> '{0,1,0}'; +SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v '{0,1,0}'; round ------- 0.00 @@ -50,7 +50,7 @@ SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v 3.00 (8 rows) -SELECT ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos ORDER BY v <-> '{0,1,0}'; +SELECT ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos ORDER BY v '{0,1,0}'; round ------- 0.00 @@ -63,7 +63,7 @@ SELECT ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos ORDER BY v 1.00 (8 rows) -SELECT ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham ORDER BY v <-> '{0,1,0}'; +SELECT ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham ORDER BY v '{0,1,0}'; round ------- 0.00 @@ -105,34 +105,34 @@ SELECT ARRAY_AGG(id ORDER BY id), ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) (4 rows) -- Verify that the indexes is being used -EXPLAIN (COSTS false) SELECT id FROM small_world_l2 ORDER BY v <-> '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_l2 ORDER BY v '{0,1,0}'; QUERY PLAN --------------------------------------------------------- Index Scan using small_world_l2_v_idx on small_world_l2 - Order By: (v <-> '{0,1,0}'::real[]) + Order By: (v '{0,1,0}'::real[]) (2 rows) -EXPLAIN (COSTS false) SELECT id FROM small_world_cos ORDER BY v <-> '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_cos ORDER BY v '{0,1,0}'; QUERY PLAN ----------------------------------------------------------- Index Scan using small_world_cos_v_idx on small_world_cos - Order By: (v <-> '{0,1,0}'::real[]) + Order By: (v '{0,1,0}'::real[]) (2 rows) -EXPLAIN (COSTS false) SELECT id FROM small_world_ham ORDER BY v <-> '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_ham ORDER BY v '{0,1,0}'; QUERY PLAN ----------------------------------------------------------- Index Scan using small_world_ham_v_idx on small_world_ham - Order By: (v <-> '{0,1,0}'::integer[]) + Order By: (v '{0,1,0}'::integer[]) (2 rows) \set ON_ERROR_STOP off -- Expect errors due to mismatching vector dimensions -SELECT 1 FROM small_world_l2 ORDER BY v <-> '{0,1,0,1}' LIMIT 1; +SELECT 1 FROM small_world_l2 ORDER BY v '{0,1,0,1}' LIMIT 1; ERROR: Expected real array with dimension 3, got 4 -SELECT 1 FROM small_world_cos ORDER BY v <-> '{0,1,0,1}' LIMIT 1; +SELECT 1 FROM small_world_cos ORDER BY v '{0,1,0,1}' LIMIT 1; ERROR: Expected real array with dimension 3, got 4 -SELECT 1 FROM small_world_ham ORDER BY v <-> '{0,1,0,1}' LIMIT 1; +SELECT 1 FROM small_world_ham ORDER BY v '{0,1,0,1}' LIMIT 1; ERROR: Expected int array with dimension 3, got 4 SELECT l2sq_dist('{1,1}'::REAL[], '{0,1,0}'::REAL[]); ERROR: expected equally sized arrays but got arrays with dimensions 2 and 3 @@ -140,12 +140,12 @@ SELECT cos_dist('{1,1}', '{0,1,0}'); ERROR: expected equally sized arrays but got arrays with dimensions 2 and 3 SELECT hamming_dist('{1,1}', '{0,1,0}'); ERROR: expected equally sized arrays but got arrays with dimensions 2 and 3 --- Expect errors due to improper use of the <-> operator outside of its supported context -SELECT ARRAY[1,2,3] <-> ARRAY[3,2,1]; +-- Expect errors due to improper use of the operator outside of its supported context +SELECT ARRAY[1,2,3] ARRAY[3,2,1]; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT ROUND((v <-> ARRAY[0,1,0])::numeric, 2) FROM small_world_cos ORDER BY v <-> '{0,1,0}' LIMIT 7; +SELECT ROUND((v ARRAY[0,1,0])::numeric, 2) FROM small_world_cos ORDER BY v '{0,1,0}' LIMIT 7; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT ROUND((v <-> ARRAY[0,1,0])::numeric, 2) FROM small_world_ham ORDER BY v <-> '{0,1,0}' LIMIT 7; +SELECT ROUND((v ARRAY[0,1,0])::numeric, 2) FROM small_world_ham ORDER BY v '{0,1,0}' LIMIT 7; ERROR: Operator <-> is invalid outside of ORDER BY context \set ON_ERROR_STOP on -- More robust distance operator tests @@ -168,63 +168,63 @@ SELECT 1 FROM test1 WHERE id = 0 + 1; \set ON_ERROR_STOP off -- Expect errors due to incorrect usage -INSERT INTO test1 (v) VALUES (ARRAY['{1,2}'::REAL[] <-> '{4,2}'::REAL[], 0]); +INSERT INTO test1 (v) VALUES (ARRAY['{1,2}'::REAL[] '{4,2}'::REAL[], 0]); ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT v <-> '{1,2}' FROM test1 ORDER BY v <-> '{1,3}'; +SELECT v '{1,2}' FROM test1 ORDER BY v '{1,3}'; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT v <-> '{1,2}' FROM test1; +SELECT v '{1,2}' FROM test1; ERROR: Operator <-> is invalid outside of ORDER BY context -WITH temp AS (SELECT v <-> '{1,2}' FROM test1) SELECT 1 FROM temp; +WITH temp AS (SELECT v '{1,2}' FROM test1) SELECT 1 FROM temp; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT t.res FROM (SELECT v <-> '{1,2}' AS res FROM test1) t; +SELECT t.res FROM (SELECT v '{1,2}' AS res FROM test1) t; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT (SELECT v <-> '{1,2}' FROM test1 LIMIT 1) FROM test1; +SELECT (SELECT v '{1,2}' FROM test1 LIMIT 1) FROM test1; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT COALESCE(v <-> '{1,2}', 0) FROM test1; +SELECT COALESCE(v '{1,2}', 0) FROM test1; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT EXISTS (SELECT v <-> '{1,2}' FROM test1); +SELECT EXISTS (SELECT v '{1,2}' FROM test1); ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT test1.v <-> test2.v FROM test1 JOIN test2 USING (id); +SELECT test1.v test2.v FROM test1 JOIN test2 USING (id); ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT v <-> '{1,2}' FROM test1 UNION SELECT v <-> '{1,3}' FROM test1; +SELECT v '{1,2}' FROM test1 UNION SELECT v '{1,3}' FROM test1; ERROR: Operator <-> is invalid outside of ORDER BY context -(SELECT v <-> '{1,2}' FROM test1 WHERE id < 5) UNION (SELECT v <-> '{1,3}' FROM test1 WHERE id >= 5); +(SELECT v '{1,2}' FROM test1 WHERE id < 5) UNION (SELECT v '{1,3}' FROM test1 WHERE id >= 5); ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT MAX(v <-> '{1,2}') FROM test1; +SELECT MAX(v '{1,2}') FROM test1; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT * FROM test1 JOIN test2 ON test1.v <-> test2.v < 0.5; +SELECT * FROM test1 JOIN test2 ON test1.v test2.v < 0.5; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT test1.v FROM test1 JOIN test2 ON test1.v <-> '{1,2}' = test2.v <-> '{1,3}'; +SELECT test1.v FROM test1 JOIN test2 ON test1.v '{1,2}' = test2.v '{1,3}'; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT (v <-> '{1,2}') + (v <-> '{1,3}') FROM test1; +SELECT (v '{1,2}') + (v '{1,3}') FROM test1; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT CASE WHEN v <-> '{1,2}' > 1 THEN 'High' ELSE 'Low' END FROM test1; +SELECT CASE WHEN v '{1,2}' > 1 THEN 'High' ELSE 'Low' END FROM test1; ERROR: Operator <-> is invalid outside of ORDER BY context -INSERT INTO test1 (v) VALUES ('{2,3}') RETURNING v <-> '{1,2}'; +INSERT INTO test1 (v) VALUES ('{2,3}') RETURNING v '{1,2}'; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT 1 FROM test1 GROUP BY v <-> '{1,3}'; +SELECT 1 FROM test1 GROUP BY v '{1,3}'; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT 1 FROM test1 ORDER BY (('{1,2}'::real[] <-> '{3,4}'::real[]) - 0); +SELECT 1 FROM test1 ORDER BY (('{1,2}'::real[] '{3,4}'::real[]) - 0); ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT 1 FROM test1 ORDER BY '{1,2}'::REAL[] <-> '{3,4}'::REAL[]; +SELECT 1 FROM test1 ORDER BY '{1,2}'::REAL[] '{3,4}'::REAL[]; ERROR: Operator <-> is invalid outside of ORDER BY context -SELECT 1 FROM test1 ORDER BY v <-> ARRAY[(SELECT '{1,4}'::REAL[] <-> '{4,2}'::REAL[]), 3]; +SELECT 1 FROM test1 ORDER BY v ARRAY[(SELECT '{1,4}'::REAL[] '{4,2}'::REAL[]), 3]; ERROR: Operator <-> is invalid outside of ORDER BY context -- Expect errors due to index not existing -SELECT id FROM test1 ORDER BY v <-> '{1,2}'; +SELECT id FROM test1 ORDER BY v '{1,2}'; ERROR: Operator <-> can only be used inside of an index -SELECT 1 FROM test1 ORDER BY v <-> (SELECT '{1,3}'::real[]); +SELECT 1 FROM test1 ORDER BY v (SELECT '{1,3}'::real[]); ERROR: Operator <-> can only be used inside of an index -SELECT t2_results.id FROM test1 t1 JOIN LATERAL (SELECT t2.id FROM test2 t2 ORDER BY t1.v <-> t2.v LIMIT 1) t2_results ON TRUE; +SELECT t2_results.id FROM test1 t1 JOIN LATERAL (SELECT t2.id FROM test2 t2 ORDER BY t1.v t2.v LIMIT 1) t2_results ON TRUE; ERROR: Operator <-> can only be used inside of an index -WITH t AS (SELECT id FROM test1 ORDER BY v <-> '{1,2}' LIMIT 1) SELECT DISTINCT id FROM t; +WITH t AS (SELECT id FROM test1 ORDER BY v '{1,2}' LIMIT 1) SELECT DISTINCT id FROM t; ERROR: Operator <-> can only be used inside of an index -WITH t AS (SELECT id FROM test1 ORDER BY v <-> '{1,2}' LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1; +WITH t AS (SELECT id FROM test1 ORDER BY v '{1,2}' LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1; ERROR: Operator <-> can only be used inside of an index -WITH t AS (SELECT id FROM test1 ORDER BY v <-> '{1,2}') SELECT id FROM t UNION SELECT id FROM t; +WITH t AS (SELECT id FROM test1 ORDER BY v '{1,2}') SELECT id FROM t UNION SELECT id FROM t; ERROR: Operator <-> can only be used inside of an index -- issue #227 -SELECT * from test2 JOIN LATERAL (SELECT * FROM (SELECT id FROM test2 ORDER BY v <-> '{1,2}') as forall) haha on TRUE; +SELECT * from test2 JOIN LATERAL (SELECT * FROM (SELECT id FROM test2 ORDER BY v '{1,2}') as forall) haha on TRUE; ERROR: Operator <-> can only be used inside of an index -- more complex setup of the above SELECT forall.id, nearest_per_id.* FROM @@ -242,7 +242,7 @@ SELECT forall.id, nearest_per_id.* FROM FROM test2 ORDER BY - v <-> forall.v + v forall.v LIMIT 5 ) as __unused_name @@ -273,7 +273,7 @@ SELECT forall.id, nearest_per_id.* FROM FROM small_world_l2 ORDER BY - v <-> forall.v + v forall.v LIMIT 4 ) as __unused_name @@ -304,7 +304,7 @@ CREATE INDEX ON extra_small_world_ham USING hnsw (v dist_hamming_ops) WITH (dim= INFO: done init usearch index INFO: inserted 4 elements INFO: done saving 4 vectors -SELECT ROUND(hamming_dist(v, '{0,0}')::numeric, 2) FROM extra_small_world_ham ORDER BY v <-> '{0,0}'; +SELECT ROUND(hamming_dist(v, '{0,0}')::numeric, 2) FROM extra_small_world_ham ORDER BY v '{0,0}'; round ------- 0.00 diff --git a/test/expected/hnsw_ef_search.out b/test/expected/hnsw_ef_search.out index 18cc1fc81..665c58970 100644 --- a/test/expected/hnsw_ef_search.out +++ b/test/expected/hnsw_ef_search.out @@ -30,11 +30,12 @@ ERROR: 401 is outside the valid range for parameter "hnsw.ef" (0 .. 400) \set ON_ERROR_STOP on -- Repeat the same query while varying ef parameter -- NOTE: it is not entirely known if the results of these are deterministic -SET enable_seqscan = false; +SET enable_seqscan=FALSE; +SET lantern.pgvector_compat=FALSE; SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset -- Queries below have the same result SET hnsw.ef = 1; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; round ----------- 0.00 @@ -50,7 +51,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> (10 rows) SET hnsw.ef = 2; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; round ----------- 0.00 @@ -66,7 +67,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> (10 rows) SET hnsw.ef = 4; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; round ----------- 0.00 @@ -82,7 +83,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> (10 rows) SET hnsw.ef = 8; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; round ----------- 0.00 @@ -98,7 +99,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> (10 rows) SET hnsw.ef = 16; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; round ----------- 0.00 @@ -115,7 +116,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> -- Queries below have the same result, which is different from above SET hnsw.ef = 32; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; round ----------- 0.00 @@ -131,7 +132,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> (10 rows) SET hnsw.ef = 64; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; round ----------- 0.00 @@ -147,7 +148,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> (10 rows) SET hnsw.ef = 128; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; round ----------- 0.00 @@ -163,7 +164,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> (10 rows) SET hnsw.ef = 256; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; round ----------- 0.00 @@ -179,7 +180,7 @@ SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> (10 rows) SET hnsw.ef = 400; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; round ----------- 0.00 diff --git a/test/expected/hnsw_index_from_file.out b/test/expected/hnsw_index_from_file.out index 918f49915..76ded26ee 100644 --- a/test/expected/hnsw_index_from_file.out +++ b/test/expected/hnsw_index_from_file.out @@ -44,17 +44,18 @@ SELECT * FROM ldb_get_indexes('sift_base1k'); hnsw_l2_index | 720 kB | CREATE INDEX hnsw_l2_index ON public.sift_base1k USING hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2.usearch') | 720 kB (1 row) -SET enable_seqscan = false; +SET enable_seqscan=FALSE; +SET lantern.pgvector_compat=FALSE; SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset -EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Limit -> Index Scan using hnsw_l2_index on sift_base1k - Order By: (v <-> '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) + Order By: (v '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) (3 rows) -SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; round ----------- 0.00 @@ -74,7 +75,7 @@ INSERT INTO sift_base1k (id, v) VALUES (1001, array_fill(1, ARRAY[128])), (1002, array_fill(2, ARRAY[128])); SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; round ----------- 0.00 @@ -117,15 +118,15 @@ SELECT * FROM ldb_get_indexes('sift_base1k'); (1 row) SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset -EXPLAIN (COSTS FALSE) SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Limit -> Index Scan using hnsw_cos_index on sift_base1k - Order By: (v <-> '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) + Order By: (v '{97,67,0,0,0,0,0,14,49,107,23,0,0,0,5,24,4,25,48,5,0,1,8,3,0,5,17,3,1,1,3,3,126,126,0,0,0,0,0,27,49,126,49,8,1,4,11,14,0,6,37,39,10,22,25,0,0,0,12,27,7,23,35,3,126,9,1,0,0,0,19,126,28,11,8,7,1,39,126,126,0,1,28,27,3,126,126,0,1,3,7,9,0,52,126,5,13,5,8,0,0,0,33,72,78,19,18,3,0,3,21,126,42,13,64,83,1,9,8,23,1,4,22,68,3,1,4,0}'::real[]) (3 rows) -SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; +SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; round ------- 0.00 @@ -167,7 +168,7 @@ INFO: validate_index() done, no issues found. (1 row) -- This should not throw error, but the first result will not be 0 as vector 777 is deleted from the table -SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; round ----------- 98486.00 diff --git a/test/expected/hnsw_insert.out b/test/expected/hnsw_insert.out index 52fe23510..c07f6d7f4 100644 --- a/test/expected/hnsw_insert.out +++ b/test/expected/hnsw_insert.out @@ -65,6 +65,7 @@ INFO: done init usearch index INFO: inserted 8 elements INFO: done saving 8 vectors SET enable_seqscan = false; +SET lantern.pgvector_compat = false; -- Inserting vectors of the same dimension and nulls should work INSERT INTO small_world (v) VALUES ('{1,1,2}'); INSERT INTO small_world (v) VALUES (NULL); @@ -79,7 +80,7 @@ SELECT FROM small_world ORDER BY - v <-> '{0,0,0}'; + v '{0,0,0}'; round ------- 0.00 @@ -107,13 +108,13 @@ SELECT FROM small_world ORDER BY - v <-> '{0,0,0}' + v '{0,0,0}' LIMIT 10; QUERY PLAN --------------------------------------------------------- Limit -> Index Scan using small_world_v_idx on small_world - Order By: (v <-> '{0,0,0}'::real[]) + Order By: (v '{0,0,0}'::real[]) (3 rows) SELECT _lantern_internal.validate_index('small_world_v_idx', false); @@ -135,11 +136,11 @@ INFO: inserted 0 elements INFO: done saving 0 vectors \COPY sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' WITH CSV; SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v <-> :'v4444'; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444'; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Index Scan using hnsw_idx on sift_base10k - Order By: (v <-> '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) + Order By: (v '{55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26}'::real[]) (2 rows) SELECT _lantern_internal.validate_index('hnsw_idx', false); diff --git a/test/expected/hnsw_operators.out b/test/expected/hnsw_operators.out index 7b0265935..f61ae0b20 100644 --- a/test/expected/hnsw_operators.out +++ b/test/expected/hnsw_operators.out @@ -7,25 +7,43 @@ INFO: inserted 2 elements INFO: done saving 2 vectors -- should rewrite operator SET lantern.pgvector_compat=FALSE; -SELECT * FROM op_test ORDER BY v <-> ARRAY[1,1,1]; +SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; v --------- {1,1,1} {0,0,0} (2 rows) --- should throw error \set ON_ERROR_STOP off -SET lantern.pgvector_compat=FALSE; -SELECT * FROM op_test ORDER BY v <=> ARRAY[1,1,1]; -ERROR: Operator can only be used when lantern.pgvector_compat=TRUE +SET lantern.pgvector_compat=TRUE; -- should throw error +SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; +ERROR: Operator can only be used when lantern.pgvector_compat=FALSE +-- should not throw error +SELECT * FROM op_test ORDER BY v <=> ARRAY[1,1,1]; + v +--------- + {1,1,1} + {0,0,0} +(2 rows) + +-- should not throw error SELECT * FROM op_test ORDER BY v::INTEGER[] <+> ARRAY[1,1,1]; -ERROR: Operator can only be used when lantern.pgvector_compat=TRUE --- should throw error + v +--------- + {1,1,1} + {0,0,0} +(2 rows) + +-- should not throw error SELECT v <-> ARRAY[1,1,1] FROM op_test ORDER BY v <-> ARRAY[1,1,1]; -ERROR: Operator <-> is invalid outside of ORDER BY context -SET lantern.pgvector_compat=TRUE; + ?column? +---------- + 0 + 3 +(2 rows) + +SET lantern.pgvector_compat=FALSE; SET enable_seqscan=OFF; \set ON_ERROR_STOP on -- one-off vector distance calculations should work with relevant operator @@ -86,13 +104,13 @@ SELECT ARRAY[1,0,0] <+> ARRAY[0,1,0]; (1 row) -- NOW THIS IS TRIGGERING INDEX SCAN AS WELL --- BECAUSE WE ARE REGISTERING <-> FOR ALL OPERATOR CLASSES +-- BECAUSE WE ARE REGISTERING FOR ALL OPERATOR CLASSES -- IDEALLY THIS SHOULD NOT TRIGGER INDEX SCAN WHEN lantern.pgvector_compat=TRUE -EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v <-> ARRAY[1,1,1]; +EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; QUERY PLAN --------------------------------------- Index Scan using cos_idx on op_test - Order By: (v <-> '{1,1,1}'::real[]) + Order By: (v '{1,1,1}'::real[]) (2 rows) -- should sort with index @@ -140,17 +158,15 @@ SELECT v <-> ARRAY[1,1,1] FROM op_test ORDER BY v <-> ARRAY[1,1,1]; RESET ALL; -- Set false twice to verify that no crash is happening SET lantern.pgvector_compat=FALSE; -SET lantern.pgvector_compat=FALSE; \set ON_ERROR_STOP off -- should rewrite operator -SELECT * FROM op_test ORDER BY v <-> ARRAY[1,1,1]; +SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; v --------- {1,1,1} {0,0,0} (2 rows) -SET lantern.pgvector_compat=TRUE; SET enable_seqscan=OFF; CREATE INDEX hamming_idx ON op_test USING hnsw(cast(v as INTEGER[]) dist_hamming_ops); INFO: done init usearch index diff --git a/test/expected/hnsw_select.out b/test/expected/hnsw_select.out index e8c5b3cc4..72b124358 100644 --- a/test/expected/hnsw_select.out +++ b/test/expected/hnsw_select.out @@ -56,7 +56,7 @@ SELECT 1 FROM test1 WHERE id = 0 + 1; -- Verify that the index is being used SET _lantern_internal.is_test = true; -EXPLAIN (COSTS FALSE) SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 1; +EXPLAIN (COSTS FALSE) SELECT * FROM small_world order by v '{1,0,0}' LIMIT 1; DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- DEBUG: LANTERN - Total cost: 4.015000 @@ -68,7 +68,7 @@ DEBUG: LANTERN - --------------------- --------------------------------------------------------- Limit -> Index Scan using small_world_v_idx on small_world - Order By: (v <-> '{1,0,0}'::real[]) + Order By: (v '{1,0,0}'::real[]) (3 rows) -- Verify that this does not use the index @@ -81,7 +81,7 @@ EXPLAIN (COSTS FALSE) SELECT 1 FROM small_world WHERE v = '{0,0,0}'; -- Ensure we can query an index for more elements than the value of init_k WITH neighbors AS ( - SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 3 + SELECT * FROM small_world order by v '{1,0,0}' LIMIT 3 ) SELECT COUNT(*) from neighbors; DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- @@ -97,7 +97,7 @@ DEBUG: LANTERN querying index for 10 elements (1 row) WITH neighbors AS ( - SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 15 + SELECT * FROM small_world order by v '{1,0,0}' LIMIT 15 ) SELECT COUNT(*) from neighbors; DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- @@ -115,7 +115,7 @@ DEBUG: LANTERN querying index for 10 elements -- Change default k and make sure the number of usearch_searchs makes sense SET hnsw.init_k = 4; WITH neighbors AS ( - SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 3 + SELECT * FROM small_world order by v '{1,0,0}' LIMIT 3 ) SELECT COUNT(*) from neighbors; DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- @@ -131,7 +131,7 @@ DEBUG: LANTERN querying index for 4 elements (1 row) WITH neighbors AS ( - SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 15 + SELECT * FROM small_world order by v '{1,0,0}' LIMIT 15 ) SELECT COUNT(*) from neighbors; DEBUG: LANTERN - Query cost estimator DEBUG: LANTERN - --------------------- @@ -150,7 +150,7 @@ DEBUG: LANTERN - querying index for 8 elements RESET client_min_messages; SET _lantern_internal.is_test = false; -- Verify where condition works properly and still uses index -SELECT has_index_scan('EXPLAIN SELECT * FROM small_world WHERE b IS TRUE ORDER BY v <-> ''{0,0,0}'''); +SELECT has_index_scan('EXPLAIN SELECT * FROM small_world WHERE b IS TRUE ORDER BY v ''{0,0,0}'''); has_index_scan ---------------- t @@ -164,51 +164,51 @@ SELECT NOT has_index_scan('EXPLAIN SELECT COUNT(*) FROM small_world'); (1 row) -- Verify swapping order doesn't change anything and still uses index -SELECT has_index_scan('EXPLAIN SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] <-> v'); +SELECT has_index_scan('EXPLAIN SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] v'); has_index_scan ---------------- t (1 row) -- Verify group by works and uses index -SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] <-> v LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1'); +SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] v LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1'); has_index_scan ---------------- t (1 row) -- Validate distinct works and uses index -SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v <-> ''{1,2}'' LIMIT 1) SELECT DISTINCT id FROM t'); +SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v ''{1,2}'' LIMIT 1) SELECT DISTINCT id FROM t'); has_index_scan ---------------- t (1 row) -- Validate join lateral works and uses index -SELECT has_index_scan('EXPLAIN SELECT t1_results.id FROM test2 t2 JOIN LATERAL (SELECT t1.id FROM test1 t1 ORDER BY t2.v <-> t1.v LIMIT 1) t1_results ON TRUE'); +SELECT has_index_scan('EXPLAIN SELECT t1_results.id FROM test2 t2 JOIN LATERAL (SELECT t1.id FROM test1 t1 ORDER BY t2.v t1.v LIMIT 1) t1_results ON TRUE'); has_index_scan ---------------- t (1 row) -- Validate union works and uses index -SELECT has_index_scan('EXPLAIN (SELECT id FROM test1 ORDER BY v <-> ''{1,4}'') UNION (SELECT id FROM test1 ORDER BY v IS NOT NULL LIMIT 1)'); +SELECT has_index_scan('EXPLAIN (SELECT id FROM test1 ORDER BY v ''{1,4}'') UNION (SELECT id FROM test1 ORDER BY v IS NOT NULL LIMIT 1)'); has_index_scan ---------------- t (1 row) -- Validate CTEs work and still use index -SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v <-> ''{1,4}'') SELECT id FROM t UNION SELECT id FROM t'); +SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v ''{1,4}'') SELECT id FROM t UNION SELECT id FROM t'); has_index_scan ---------------- t (1 row) --- Validate <-> is replaced with the matching function when an index is present +-- Validate is replaced with the matching function when an index is present set enable_seqscan = true; set enable_indexscan = false; -EXPLAIN (COSTS false) SELECT * from small_world ORDER BY v <-> '{1,1,1}'; +EXPLAIN (COSTS false) SELECT * from small_world ORDER BY v '{1,1,1}'; QUERY PLAN ----------------------------------------------- Sort @@ -216,7 +216,7 @@ EXPLAIN (COSTS false) SELECT * from small_world ORDER BY v <-> '{1,1,1}'; -> Seq Scan on small_world (3 rows) -SELECT * from small_world ORDER BY v <-> '{1,1,1}'; +SELECT * from small_world ORDER BY v '{1,1,1}'; id | b | v -----+---+--------- 111 | t | {1,1,1} @@ -236,7 +236,7 @@ CREATE INDEX test2_cos ON test2 USING hnsw(v dist_cos_ops); INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors -EXPLAIN (COSTS false) SELECT * from test2 ORDER BY v <-> '{1,4}'; +EXPLAIN (COSTS false) SELECT * from test2 ORDER BY v '{1,4}'; QUERY PLAN -------------------------------------------- Sort @@ -246,7 +246,7 @@ EXPLAIN (COSTS false) SELECT * from test2 ORDER BY v <-> '{1,4}'; -- Some additional cases that trigger operator rewriting -- SampleScan -EXPLAIN (COSTS false) SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v <-> '{1,1,1}' ASC; +EXPLAIN (COSTS false) SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v '{1,1,1}' ASC; QUERY PLAN ----------------------------------------------- Sort @@ -256,7 +256,7 @@ EXPLAIN (COSTS false) SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER (4 rows) -- can't compare direct equality here because it's random -SELECT results_match('EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v <-> ''{1,1,1}'' ASC', +SELECT results_match('EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v ''{1,1,1}'' ASC', 'EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY l2sq_dist(v, ''{1,1,1}'') ASC'); results_match --------------- @@ -264,7 +264,7 @@ SELECT results_match('EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (2 (1 row) -- SetOpt/HashSetOp -EXPLAIN (COSTS false) (SELECT * FROM small_world ORDER BY v <-> '{1,0,1}' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v <-> '{1,1,1}' ASC LIMIT 5); +EXPLAIN (COSTS false) (SELECT * FROM small_world ORDER BY v '{1,0,1}' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v '{1,1,1}' ASC LIMIT 5); QUERY PLAN ------------------------------------------------------------------------------------- HashSetOp Except @@ -280,7 +280,7 @@ EXPLAIN (COSTS false) (SELECT * FROM small_world ORDER BY v <-> '{1,0,1}' ASC ) -> Seq Scan on small_world small_world_1 (11 rows) -SELECT results_match('(SELECT * FROM small_world ORDER BY v <-> ''{1,0,1}'' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v <-> ''{1,1,1}'' ASC LIMIT 5)', +SELECT results_match('(SELECT * FROM small_world ORDER BY v ''{1,0,1}'' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v ''{1,1,1}'' ASC LIMIT 5)', '(SELECT * FROM small_world ORDER BY l2sq_dist(v, ''{1,0,1}'') ASC ) EXCEPT (SELECT * FROM small_world ORDER by l2sq_dist(v, ''{1,1,1}'') ASC LIMIT 5)'); results_match --------------- @@ -288,7 +288,7 @@ SELECT results_match('(SELECT * FROM small_world ORDER BY v <-> ''{1,0,1}'' ASC (1 row) -- HashAggregate -EXPLAIN (COSTS false) SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v <-> '{1,1,1}'; +EXPLAIN (COSTS false) SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v '{1,1,1}'; QUERY PLAN ----------------------------------------------- Sort @@ -298,7 +298,7 @@ EXPLAIN (COSTS false) SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v -> Seq Scan on small_world (5 rows) -SELECT results_match('SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v <-> ''{1,1,1}''', +SELECT results_match('SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v ''{1,1,1}''', 'SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY l2sq_dist(v, ''{1,1,1}'')'); results_match --------------- @@ -306,7 +306,7 @@ SELECT results_match('SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v (1 row) -- GroupBy this -EXPLAIN (COSTS false) SELECT * FROM small_world GROUP BY id, v, b ORDER BY v <-> '{1,1,1}'; +EXPLAIN (COSTS false) SELECT * FROM small_world GROUP BY id, v, b ORDER BY v '{1,1,1}'; QUERY PLAN ----------------------------------------------- Sort @@ -316,7 +316,7 @@ EXPLAIN (COSTS false) SELECT * FROM small_world GROUP BY id, v, b ORDER BY v <-> -> Seq Scan on small_world (5 rows) -SELECT results_match('SELECT * FROM small_world GROUP BY id, v, b ORDER BY v <-> ''{1,1,1}''', +SELECT results_match('SELECT * FROM small_world GROUP BY id, v, b ORDER BY v ''{1,1,1}''', 'SELECT * FROM small_world GROUP BY id, v, b ORDER BY l2sq_dist(v, ''{1,1,1}'')'); results_match --------------- @@ -325,7 +325,7 @@ SELECT results_match('SELECT * FROM small_world GROUP BY id, v, b ORDER BY v <-> -- HashJoin/Hash CREATE TABLE small_world_2 AS (SELECT * FROM small_world); -EXPLAIN (COSTS false) SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v <-> '{1,1,1}'; +EXPLAIN (COSTS false) SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v '{1,1,1}'; QUERY PLAN ----------------------------------------------------------- Sort @@ -337,7 +337,7 @@ EXPLAIN (COSTS false) SELECT * FROM small_world JOIN small_world_2 using (v) ORD -> Seq Scan on small_world (7 rows) -SELECT results_match('SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v <-> ''{1,1,1}''', +SELECT results_match('SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v ''{1,1,1}''', 'SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY l2sq_dist(v, ''{1,1,1}'')'); results_match --------------- @@ -345,7 +345,7 @@ SELECT results_match('SELECT * FROM small_world JOIN small_world_2 using (v) ORD (1 row) -- MixedAggregate (this doesn't require additional logic, but I include it here as an example of generating the path) -EXPLAIN (COSTS false) SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v <-> '{1,1,1}'; +EXPLAIN (COSTS false) SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v '{1,1,1}'; QUERY PLAN ----------------------------------------------- Sort @@ -356,7 +356,7 @@ EXPLAIN (COSTS false) SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v <- -> Seq Scan on small_world (6 rows) -SELECT results_match('SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v <-> ''{1,1,1}''', +SELECT results_match('SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v ''{1,1,1}''', 'SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY l2sq_dist(v, ''{1,1,1}'')'); results_match --------------- @@ -364,7 +364,7 @@ SELECT results_match('SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v <- (1 row) -- WindowAgg -EXPLAIN (COSTS false) SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v <-> '{1,1,1}'; +EXPLAIN (COSTS false) SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v '{1,1,1}'; QUERY PLAN ----------------------------------------------- Sort @@ -373,7 +373,7 @@ EXPLAIN (COSTS false) SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v <-> -> Seq Scan on small_world (4 rows) -SELECT results_match('SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v <-> ''{1,1,1}''', +SELECT results_match('SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v ''{1,1,1}''', 'SELECT v, EVERY(b) OVER () FROM small_world ORDER BY l2sq_dist(v, ''{1,1,1}'')'); results_match --------------- @@ -381,7 +381,7 @@ SELECT results_match('SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v <-> (1 row) -- LockRows -EXPLAIN (COSTS false) SELECT * FROM small_world ORDER BY v <-> '{1,1,1}' ASC FOR UPDATE; +EXPLAIN (COSTS false) SELECT * FROM small_world ORDER BY v '{1,1,1}' ASC FOR UPDATE; QUERY PLAN ----------------------------------------------------- LockRows @@ -390,7 +390,7 @@ EXPLAIN (COSTS false) SELECT * FROM small_world ORDER BY v <-> '{1,1,1}' ASC FOR -> Seq Scan on small_world (4 rows) -SELECT results_match('SELECT * FROM small_world ORDER BY v <-> ''{1,1,1}'' ASC FOR UPDATE', +SELECT results_match('SELECT * FROM small_world ORDER BY v ''{1,1,1}'' ASC FOR UPDATE', 'SELECT * FROM small_world ORDER BY l2sq_dist(v, ''{1,1,1}'') ASC FOR UPDATE'); results_match --------------- diff --git a/test/expected/hnsw_todo.out b/test/expected/hnsw_todo.out index c6cdf62c7..3fdba174f 100644 --- a/test/expected/hnsw_todo.out +++ b/test/expected/hnsw_todo.out @@ -38,14 +38,15 @@ ERROR: relation "small_world_l2_vector_int_idx" does not exist at character 41 EXPLAIN (COSTS FALSE) SELECT id, ROUND(l2sq_dist(vector_int, array[0,1,0])::numeric, 2) as dist FROM small_world_l2 -ORDER BY vector_int <-> array[0,1,0] LIMIT 7; - QUERY PLAN ------------------------------------------------------------------------- +ORDER BY vector_int array[0,1,0] LIMIT 7; + QUERY PLAN +----------------------------------------------------------------------- Limit - -> Sort - Sort Key: (public.l2sq_dist(vector_int, '{0,1,0}'::integer[])) - -> Seq Scan on small_world_l2 -(4 rows) + -> Result + -> Sort + Sort Key: (l2sq_dist(vector_int, '{0,1,0}'::integer[])) + -> Seq Scan on small_world_l2 +(5 rows) --- Test scenarious --- ----------------------------------------- @@ -79,7 +80,7 @@ INFO: validate_index() done, no issues found. (1 row) -- The 1001 and 1002 vectors will be ignored in search, so the first row will not be 0 in result -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 1; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 1; round ----------- 249285.00 @@ -112,13 +113,13 @@ INFO: validate_index() done, no issues found. -- The first row will not be 0 now as the vector under id=777 was updated to 1,1,1,1... but it was indexed with different vector -- So the usearch index can not find 1,1,1,1,1.. vector in the index and wrong results will be returned -- This is an expected behaviour for now -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 1; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 1; round ----------- 249285.00 (1 row) ----- Query on expression based index is failing to check correct <-> operator usage -------- +---- Query on expression based index is failing to check correct operator usage -------- CREATE OR REPLACE FUNCTION int_to_fixed_binary_real_array(n INT) RETURNS REAL[] AS $$ DECLARE binary_string TEXT; @@ -138,5 +139,5 @@ INSERT INTO test_table VALUES (0), (1), (7); \set enable_seqscan = off; -- This currently results in an error about using the operator outside of index -- This case should be fixed -SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) <-> '{0,0,0}'::REAL[] LIMIT 2; +SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) '{0,0,0}'::REAL[] LIMIT 2; ERROR: Operator <-> can only be used inside of an index diff --git a/test/expected/hnsw_vector.out b/test/expected/hnsw_vector.out index 91aaf6ce8..83001adb7 100644 --- a/test/expected/hnsw_vector.out +++ b/test/expected/hnsw_vector.out @@ -31,7 +31,7 @@ INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors INSERT INTO items (trait_ai) VALUES ('[10,10,10]'), (NULL); -SELECT * FROM items ORDER BY trait_ai <-> '[0,0,0]' LIMIT 3; +SELECT * FROM items ORDER BY trait_ai '[0,0,0]' LIMIT 3; id | trait_ai ----+---------- 1 | [1,2,3] @@ -78,7 +78,7 @@ INSERT INTO small_world (v) VALUES ('[99,99,2]'); INSERT INTO small_world (v) VALUES (NULL); -- Distance functions SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; dist ------ 0.00 @@ -91,16 +91,16 @@ FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; (7 rows) EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; QUERY PLAN --------------------------------------------------------- Limit -> Index Scan using small_world_v_idx on small_world - Order By: (v <-> '[0,1,0]'::vector) + Order By: (v '[0,1,0]'::vector) (3 rows) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; dist ------ 0.00 @@ -113,12 +113,12 @@ FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; (7 rows) EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; QUERY PLAN --------------------------------------------------------- Limit -> Index Scan using small_world_v_idx on small_world - Order By: (v <-> '[0,1,0]'::vector) + Order By: (v '[0,1,0]'::vector) (3 rows) -- Verify that index creation on a large vector produces an error @@ -138,18 +138,18 @@ INFO: done init usearch index INFO: inserted 10000 elements INFO: done saving 10000 vectors SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k ORDER BY v <-> :'v4444' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k ORDER BY v :'v4444' LIMIT 10; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Limit -> Index Scan using hnsw_idx on sift_base10k - Order By: (v <-> '[55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26]'::vector) + Order By: (v '[55,61,11,4,5,2,13,24,65,49,13,9,23,37,94,38,54,11,14,14,40,31,50,44,53,4,0,0,27,17,8,34,12,10,4,4,22,52,68,53,9,2,0,0,2,116,119,64,119,2,0,0,2,30,119,119,116,5,0,8,47,9,5,60,7,7,10,23,56,50,23,5,28,68,6,18,24,65,50,9,119,75,3,0,1,8,12,85,119,11,4,6,8,9,5,74,25,11,8,20,18,12,2,21,11,90,25,32,33,15,2,9,84,67,8,4,22,31,11,33,119,30,3,6,0,0,0,26]'::vector) (3 rows) -- Ensure we can query an index for more elements than the value of init_k SET hnsw.init_k = 4; WITH neighbors AS ( - SELECT * FROM small_world order by v <-> '[1,0,0]' LIMIT 3 + SELECT * FROM small_world order by v '[1,0,0]' LIMIT 3 ) SELECT COUNT(*) from neighbors; count ------- @@ -157,7 +157,7 @@ WITH neighbors AS ( (1 row) WITH neighbors AS ( - SELECT * FROM small_world order by v <-> '[1,0,0]' LIMIT 15 + SELECT * FROM small_world order by v '[1,0,0]' LIMIT 15 ) SELECT COUNT(*) from neighbors; count ------- @@ -166,11 +166,11 @@ WITH neighbors AS ( RESET client_min_messages; \set ON_ERROR_STOP off --- Expect error due to improper use of the <-> operator outside of its supported context -SELECT ARRAY[1,2,3] <-> ARRAY[3,2,1]; +-- Expect error due to improper use of the operator outside of its supported context +SELECT ARRAY[1,2,3] ARRAY[3,2,1]; ERROR: Operator <-> is invalid outside of ORDER BY context -- Expect error due to mismatching vector dimensions -SELECT 1 FROM small_world ORDER BY v <-> '[0,1,0,1]' LIMIT 1; +SELECT 1 FROM small_world ORDER BY v '[0,1,0,1]' LIMIT 1; ERROR: Expected vector with dimension 3, got 4 SELECT l2sq_dist('[1,1]'::vector, '[0,1,0]'::vector); ERROR: expected equally sized vectors but got vectors with dimensions 2 and 3 @@ -202,14 +202,14 @@ CREATE INDEX l2_idx ON small_world_arr USING lantern_hnsw(v) WITH (dim=3, m=2); INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors -EXPLAIN (COSTS FALSE) SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; +EXPLAIN (COSTS FALSE) SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; QUERY PLAN -------------------------------------------- Index Scan using l2_idx on small_world_arr - Order By: (v <-> '{0,0,0}'::real[]) + Order By: (v '{0,0,0}'::real[]) (2 rows) -SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; id ---- 1 @@ -222,7 +222,7 @@ CREATE INDEX cos_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=2); INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors -SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; id ---- 1 @@ -235,7 +235,7 @@ CREATE INDEX ham_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=3); INFO: done init usearch index INFO: inserted 3 elements INFO: done saving 3 vectors -SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; id ---- 1 diff --git a/test/sql/hnsw_correct.sql b/test/sql/hnsw_correct.sql index 14fed457a..ad761ad39 100644 --- a/test/sql/hnsw_correct.sql +++ b/test/sql/hnsw_correct.sql @@ -11,7 +11,9 @@ INSERT INTO small_world (v) VALUES ('{0,0}'), ('{1,1}'), ('{2,2}'), ('{3,3}'); -- Create index CREATE INDEX ON small_world USING hnsw (v dist_l2sq_ops) WITH (dim=2, M=4); -SET enable_seqscan = false; +SET enable_seqscan=FALSE; +SET lantern.pgvector_compat=FALSE; + -- Get the results without the index CREATE TEMP TABLE results_wo_index AS @@ -25,7 +27,7 @@ FROM -- Get the results with the index CREATE TEMP TABLE results_w_index AS SELECT - ROW_NUMBER() OVER (ORDER BY v <-> '{0,0}') AS row_num, + ROW_NUMBER() OVER (ORDER BY v '{0,0}') AS row_num, id, l2sq_dist(v, '{0,0}') AS dist FROM diff --git a/test/sql/hnsw_cost_estimate.sql b/test/sql/hnsw_cost_estimate.sql index 59e08dac4..048c494c6 100644 --- a/test/sql/hnsw_cost_estimate.sql +++ b/test/sql/hnsw_cost_estimate.sql @@ -44,17 +44,19 @@ BEGIN END; $$ LANGUAGE plpgsql; +SET lantern.pgvector_compat=FALSE; + -- Goal: make sure query cost estimate is accurate -- when index is created with varying costruction parameters. SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -\set explain_query_template 'EXPLAIN SELECT * FROM sift_base10k ORDER BY v <-> ''%s'' LIMIT 10' +\set explain_query_template 'EXPLAIN SELECT * FROM sift_base10k ORDER BY v ''%s'' LIMIT 10' \set enable_seqscan = off; -- Case 0, sanity check. No data. CREATE TABLE empty_table(id SERIAL PRIMARY KEY, v REAL[2]); CREATE INDEX empty_idx ON empty_table USING hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=2, dim=2); SET _lantern_internal.is_test = true; -SELECT is_cost_estimate_within_error('EXPLAIN SELECT * FROM empty_table ORDER BY v <-> ''{1,2}'' LIMIT 10', 0.47); +SELECT is_cost_estimate_within_error('EXPLAIN SELECT * FROM empty_table ORDER BY v ''{1,2}'' LIMIT 10', 0.47); SELECT _lantern_internal.validate_index('empty_idx', false); DROP INDEX empty_idx; diff --git a/test/sql/hnsw_create.sql b/test/sql/hnsw_create.sql index f0082d8be..776ddb2d0 100644 --- a/test/sql/hnsw_create.sql +++ b/test/sql/hnsw_create.sql @@ -13,9 +13,11 @@ SELECT _lantern_internal.validate_index('sift_base1k_v_idx', false); -- Validate that index creation works with a larger number of vectors \ir utils/sift10k_array.sql +SET lantern.pgvector_compat=FALSE; + CREATE INDEX hnsw_idx ON sift_base10k USING hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v <-> :'v4444' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444' LIMIT 10; SELECT _lantern_internal.validate_index('hnsw_idx', false); --- Validate that M values inside the allowed range [2, 128] do not throw an error diff --git a/test/sql/hnsw_create_expr.sql b/test/sql/hnsw_create_expr.sql index 9ee5f4aac..3eec58dfd 100644 --- a/test/sql/hnsw_create_expr.sql +++ b/test/sql/hnsw_create_expr.sql @@ -68,6 +68,9 @@ CREATE TABLE test_table (id INTEGER); INSERT INTO test_table VALUES (0), (1), (7); \set enable_seqscan = off; +SET enable_seqscan = false; +SET lantern.pgvector_compat=FALSE; + -- This should success CREATE INDEX ON test_table USING hnsw (int_to_fixed_binary_real_array(id)) WITH (M=2); @@ -83,4 +86,4 @@ CREATE INDEX ON test_table USING hnsw (int_to_string(id)) WITH (M=2); -- This should result in error about multicolumn expressions support CREATE INDEX ON test_table USING hnsw (int_to_fixed_binary_real_array(id), int_to_dynamic_binary_real_array(id)) WITH (M=2); -SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) <-> '{0,0,0}'::REAL[] LIMIT 2; +SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) '{0,0,0}'::REAL[] LIMIT 2; diff --git a/test/sql/hnsw_dist_func.sql b/test/sql/hnsw_dist_func.sql index 757334070..298bc9b9f 100644 --- a/test/sql/hnsw_dist_func.sql +++ b/test/sql/hnsw_dist_func.sql @@ -20,9 +20,9 @@ SET enable_seqscan=FALSE; SET lantern.pgvector_compat=FALSE; -- Verify that the distance functions work (check distances) -SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v <-> '{0,1,0}'; -SELECT ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos ORDER BY v <-> '{0,1,0}'; -SELECT ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham ORDER BY v <-> '{0,1,0}'; +SELECT ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 ORDER BY v '{0,1,0}'; +SELECT ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_cos ORDER BY v '{0,1,0}'; +SELECT ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham ORDER BY v '{0,1,0}'; -- Verify that the distance functions work (check IDs) SELECT ARRAY_AGG(id ORDER BY id), ROUND(l2sq_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_l2 GROUP BY 2 ORDER BY 2; @@ -30,24 +30,24 @@ SELECT ARRAY_AGG(id ORDER BY id), ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM SELECT ARRAY_AGG(id ORDER BY id), ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham GROUP BY 2 ORDER BY 2; -- Verify that the indexes is being used -EXPLAIN (COSTS false) SELECT id FROM small_world_l2 ORDER BY v <-> '{0,1,0}'; -EXPLAIN (COSTS false) SELECT id FROM small_world_cos ORDER BY v <-> '{0,1,0}'; -EXPLAIN (COSTS false) SELECT id FROM small_world_ham ORDER BY v <-> '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_l2 ORDER BY v '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_cos ORDER BY v '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_ham ORDER BY v '{0,1,0}'; \set ON_ERROR_STOP off -- Expect errors due to mismatching vector dimensions -SELECT 1 FROM small_world_l2 ORDER BY v <-> '{0,1,0,1}' LIMIT 1; -SELECT 1 FROM small_world_cos ORDER BY v <-> '{0,1,0,1}' LIMIT 1; -SELECT 1 FROM small_world_ham ORDER BY v <-> '{0,1,0,1}' LIMIT 1; +SELECT 1 FROM small_world_l2 ORDER BY v '{0,1,0,1}' LIMIT 1; +SELECT 1 FROM small_world_cos ORDER BY v '{0,1,0,1}' LIMIT 1; +SELECT 1 FROM small_world_ham ORDER BY v '{0,1,0,1}' LIMIT 1; SELECT l2sq_dist('{1,1}'::REAL[], '{0,1,0}'::REAL[]); SELECT cos_dist('{1,1}', '{0,1,0}'); SELECT hamming_dist('{1,1}', '{0,1,0}'); --- Expect errors due to improper use of the <-> operator outside of its supported context -SELECT ARRAY[1,2,3] <-> ARRAY[3,2,1]; -SELECT ROUND((v <-> ARRAY[0,1,0])::numeric, 2) FROM small_world_cos ORDER BY v <-> '{0,1,0}' LIMIT 7; -SELECT ROUND((v <-> ARRAY[0,1,0])::numeric, 2) FROM small_world_ham ORDER BY v <-> '{0,1,0}' LIMIT 7; +-- Expect errors due to improper use of the operator outside of its supported context +SELECT ARRAY[1,2,3] ARRAY[3,2,1]; +SELECT ROUND((v ARRAY[0,1,0])::numeric, 2) FROM small_world_cos ORDER BY v '{0,1,0}' LIMIT 7; +SELECT ROUND((v ARRAY[0,1,0])::numeric, 2) FROM small_world_ham ORDER BY v '{0,1,0}' LIMIT 7; \set ON_ERROR_STOP on @@ -64,38 +64,38 @@ SELECT 1 FROM test1 WHERE id = 0 + 1; \set ON_ERROR_STOP off -- Expect errors due to incorrect usage -INSERT INTO test1 (v) VALUES (ARRAY['{1,2}'::REAL[] <-> '{4,2}'::REAL[], 0]); -SELECT v <-> '{1,2}' FROM test1 ORDER BY v <-> '{1,3}'; -SELECT v <-> '{1,2}' FROM test1; -WITH temp AS (SELECT v <-> '{1,2}' FROM test1) SELECT 1 FROM temp; -SELECT t.res FROM (SELECT v <-> '{1,2}' AS res FROM test1) t; -SELECT (SELECT v <-> '{1,2}' FROM test1 LIMIT 1) FROM test1; -SELECT COALESCE(v <-> '{1,2}', 0) FROM test1; -SELECT EXISTS (SELECT v <-> '{1,2}' FROM test1); -SELECT test1.v <-> test2.v FROM test1 JOIN test2 USING (id); -SELECT v <-> '{1,2}' FROM test1 UNION SELECT v <-> '{1,3}' FROM test1; -(SELECT v <-> '{1,2}' FROM test1 WHERE id < 5) UNION (SELECT v <-> '{1,3}' FROM test1 WHERE id >= 5); -SELECT MAX(v <-> '{1,2}') FROM test1; -SELECT * FROM test1 JOIN test2 ON test1.v <-> test2.v < 0.5; -SELECT test1.v FROM test1 JOIN test2 ON test1.v <-> '{1,2}' = test2.v <-> '{1,3}'; -SELECT (v <-> '{1,2}') + (v <-> '{1,3}') FROM test1; -SELECT CASE WHEN v <-> '{1,2}' > 1 THEN 'High' ELSE 'Low' END FROM test1; -INSERT INTO test1 (v) VALUES ('{2,3}') RETURNING v <-> '{1,2}'; -SELECT 1 FROM test1 GROUP BY v <-> '{1,3}'; -SELECT 1 FROM test1 ORDER BY (('{1,2}'::real[] <-> '{3,4}'::real[]) - 0); -SELECT 1 FROM test1 ORDER BY '{1,2}'::REAL[] <-> '{3,4}'::REAL[]; -SELECT 1 FROM test1 ORDER BY v <-> ARRAY[(SELECT '{1,4}'::REAL[] <-> '{4,2}'::REAL[]), 3]; +INSERT INTO test1 (v) VALUES (ARRAY['{1,2}'::REAL[] '{4,2}'::REAL[], 0]); +SELECT v '{1,2}' FROM test1 ORDER BY v '{1,3}'; +SELECT v '{1,2}' FROM test1; +WITH temp AS (SELECT v '{1,2}' FROM test1) SELECT 1 FROM temp; +SELECT t.res FROM (SELECT v '{1,2}' AS res FROM test1) t; +SELECT (SELECT v '{1,2}' FROM test1 LIMIT 1) FROM test1; +SELECT COALESCE(v '{1,2}', 0) FROM test1; +SELECT EXISTS (SELECT v '{1,2}' FROM test1); +SELECT test1.v test2.v FROM test1 JOIN test2 USING (id); +SELECT v '{1,2}' FROM test1 UNION SELECT v '{1,3}' FROM test1; +(SELECT v '{1,2}' FROM test1 WHERE id < 5) UNION (SELECT v '{1,3}' FROM test1 WHERE id >= 5); +SELECT MAX(v '{1,2}') FROM test1; +SELECT * FROM test1 JOIN test2 ON test1.v test2.v < 0.5; +SELECT test1.v FROM test1 JOIN test2 ON test1.v '{1,2}' = test2.v '{1,3}'; +SELECT (v '{1,2}') + (v '{1,3}') FROM test1; +SELECT CASE WHEN v '{1,2}' > 1 THEN 'High' ELSE 'Low' END FROM test1; +INSERT INTO test1 (v) VALUES ('{2,3}') RETURNING v '{1,2}'; +SELECT 1 FROM test1 GROUP BY v '{1,3}'; +SELECT 1 FROM test1 ORDER BY (('{1,2}'::real[] '{3,4}'::real[]) - 0); +SELECT 1 FROM test1 ORDER BY '{1,2}'::REAL[] '{3,4}'::REAL[]; +SELECT 1 FROM test1 ORDER BY v ARRAY[(SELECT '{1,4}'::REAL[] '{4,2}'::REAL[]), 3]; -- Expect errors due to index not existing -SELECT id FROM test1 ORDER BY v <-> '{1,2}'; -SELECT 1 FROM test1 ORDER BY v <-> (SELECT '{1,3}'::real[]); -SELECT t2_results.id FROM test1 t1 JOIN LATERAL (SELECT t2.id FROM test2 t2 ORDER BY t1.v <-> t2.v LIMIT 1) t2_results ON TRUE; -WITH t AS (SELECT id FROM test1 ORDER BY v <-> '{1,2}' LIMIT 1) SELECT DISTINCT id FROM t; -WITH t AS (SELECT id FROM test1 ORDER BY v <-> '{1,2}' LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1; -WITH t AS (SELECT id FROM test1 ORDER BY v <-> '{1,2}') SELECT id FROM t UNION SELECT id FROM t; +SELECT id FROM test1 ORDER BY v '{1,2}'; +SELECT 1 FROM test1 ORDER BY v (SELECT '{1,3}'::real[]); +SELECT t2_results.id FROM test1 t1 JOIN LATERAL (SELECT t2.id FROM test2 t2 ORDER BY t1.v t2.v LIMIT 1) t2_results ON TRUE; +WITH t AS (SELECT id FROM test1 ORDER BY v '{1,2}' LIMIT 1) SELECT DISTINCT id FROM t; +WITH t AS (SELECT id FROM test1 ORDER BY v '{1,2}' LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1; +WITH t AS (SELECT id FROM test1 ORDER BY v '{1,2}') SELECT id FROM t UNION SELECT id FROM t; -- issue #227 -SELECT * from test2 JOIN LATERAL (SELECT * FROM (SELECT id FROM test2 ORDER BY v <-> '{1,2}') as forall) haha on TRUE; +SELECT * from test2 JOIN LATERAL (SELECT * FROM (SELECT id FROM test2 ORDER BY v '{1,2}') as forall) haha on TRUE; -- more complex setup of the above SELECT forall.id, nearest_per_id.* FROM (SELECT * FROM @@ -112,7 +112,7 @@ SELECT forall.id, nearest_per_id.* FROM FROM test2 ORDER BY - v <-> forall.v + v forall.v LIMIT 5 ) as __unused_name @@ -143,7 +143,7 @@ SELECT forall.id, nearest_per_id.* FROM FROM small_world_l2 ORDER BY - v <-> forall.v + v forall.v LIMIT 4 ) as __unused_name @@ -160,7 +160,7 @@ CREATE TABLE extra_small_world_ham ( ); INSERT INTO extra_small_world_ham (v) VALUES ('{0,0}'), ('{1,1}'), ('{2,2}'), ('{3,3}'); CREATE INDEX ON extra_small_world_ham USING hnsw (v dist_hamming_ops) WITH (dim=2); -SELECT ROUND(hamming_dist(v, '{0,0}')::numeric, 2) FROM extra_small_world_ham ORDER BY v <-> '{0,0}'; +SELECT ROUND(hamming_dist(v, '{0,0}')::numeric, 2) FROM extra_small_world_ham ORDER BY v '{0,0}'; SELECT _lantern_internal.validate_index('small_world_l2_v_idx', false); SELECT _lantern_internal.validate_index('small_world_cos_v_idx', false); diff --git a/test/sql/hnsw_ef_search.sql b/test/sql/hnsw_ef_search.sql index f0314d3c5..e1d65c22a 100644 --- a/test/sql/hnsw_ef_search.sql +++ b/test/sql/hnsw_ef_search.sql @@ -20,37 +20,38 @@ SET hnsw.ef = 401; -- Repeat the same query while varying ef parameter -- NOTE: it is not entirely known if the results of these are deterministic -SET enable_seqscan = false; +SET enable_seqscan=FALSE; +SET lantern.pgvector_compat=FALSE; SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset -- Queries below have the same result SET hnsw.ef = 1; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; SET hnsw.ef = 2; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; SET hnsw.ef = 4; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; SET hnsw.ef = 8; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; SET hnsw.ef = 16; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; -- Queries below have the same result, which is different from above SET hnsw.ef = 32; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; SET hnsw.ef = 64; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; SET hnsw.ef = 128; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; SET hnsw.ef = 256; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; SET hnsw.ef = 400; -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; diff --git a/test/sql/hnsw_index_from_file.sql b/test/sql/hnsw_index_from_file.sql index 863e6d181..7978d1c02 100644 --- a/test/sql/hnsw_index_from_file.sql +++ b/test/sql/hnsw_index_from_file.sql @@ -21,17 +21,18 @@ CREATE INDEX hnsw_l2_index ON sift_base1k USING hnsw (v) WITH (_experimental_ind SELECT _lantern_internal.validate_index('hnsw_l2_index', false); SELECT * FROM ldb_get_indexes('sift_base1k'); -SET enable_seqscan = false; +SET enable_seqscan=FALSE; +SET lantern.pgvector_compat=FALSE; SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset -EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; -SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; -- Validate that inserting rows on index created from file works as expected INSERT INTO sift_base1k (id, v) VALUES (1001, array_fill(1, ARRAY[128])), (1002, array_fill(2, ARRAY[128])); SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 10; -- Drop and recreate table DROP TABLE sift_base1k CASCADE; @@ -43,8 +44,8 @@ SELECT _lantern_internal.validate_index('hnsw_cos_index', false); SELECT * FROM ldb_get_indexes('sift_base1k'); SELECT v AS v777 FROM sift_base1k WHERE id = 777 \gset -EXPLAIN (COSTS FALSE) SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; -SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; +SELECT ROUND(cos_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; --- Test scenarious --- ----------------------------------------- @@ -59,4 +60,4 @@ DELETE FROM sift_base1k WHERE id=777; CREATE INDEX hnsw_l2_index ON sift_base1k USING hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2.usearch'); SELECT _lantern_internal.validate_index('hnsw_l2_index', false); -- This should not throw error, but the first result will not be 0 as vector 777 is deleted from the table -SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v <-> :'v777' LIMIT 10; +SELECT ROUND(l2sq_dist(v, :'v777')::numeric, 2) FROM sift_base1k order by v :'v777' LIMIT 10; diff --git a/test/sql/hnsw_insert.sql b/test/sql/hnsw_insert.sql index 7a46e24ca..e9bd57ac1 100644 --- a/test/sql/hnsw_insert.sql +++ b/test/sql/hnsw_insert.sql @@ -47,6 +47,7 @@ set work_mem = '10MB'; CREATE INDEX ON small_world USING hnsw (v) WITH (dim=3); SET enable_seqscan = false; +SET lantern.pgvector_compat = false; -- Inserting vectors of the same dimension and nulls should work INSERT INTO small_world (v) VALUES ('{1,1,2}'); @@ -63,7 +64,7 @@ SELECT FROM small_world ORDER BY - v <-> '{0,0,0}'; + v '{0,0,0}'; -- Ensure the index size remains consistent after inserts SELECT * from ldb_get_indexes('small_world'); @@ -75,7 +76,7 @@ SELECT FROM small_world ORDER BY - v <-> '{0,0,0}' + v '{0,0,0}' LIMIT 10; SELECT _lantern_internal.validate_index('small_world_v_idx', false); @@ -88,6 +89,6 @@ CREATE TABLE sift_base10k ( CREATE INDEX hnsw_idx ON sift_base10k USING hnsw (v dist_l2sq_ops) WITH (M=2, ef_construction=10, ef=4, dim=128); \COPY sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base_arrays.csv' WITH CSV; SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v <-> :'v4444'; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k order by v :'v4444'; SELECT _lantern_internal.validate_index('hnsw_idx', false); diff --git a/test/sql/hnsw_operators.sql b/test/sql/hnsw_operators.sql index 9002c7ed5..2a7b4ec14 100644 --- a/test/sql/hnsw_operators.sql +++ b/test/sql/hnsw_operators.sql @@ -4,20 +4,22 @@ INSERT INTO op_test (v) VALUES (ARRAY[0,0,0]), (ARRAY[1,1,1]); CREATE INDEX cos_idx ON op_test USING hnsw(v dist_cos_ops); -- should rewrite operator SET lantern.pgvector_compat=FALSE; -SELECT * FROM op_test ORDER BY v <-> ARRAY[1,1,1]; +SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; --- should throw error \set ON_ERROR_STOP off -SET lantern.pgvector_compat=FALSE; +SET lantern.pgvector_compat=TRUE; +-- should throw error +SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; +-- should not throw error SELECT * FROM op_test ORDER BY v <=> ARRAY[1,1,1]; --- should throw error +-- should not throw error SELECT * FROM op_test ORDER BY v::INTEGER[] <+> ARRAY[1,1,1]; --- should throw error +-- should not throw error SELECT v <-> ARRAY[1,1,1] FROM op_test ORDER BY v <-> ARRAY[1,1,1]; -SET lantern.pgvector_compat=TRUE; +SET lantern.pgvector_compat=FALSE; SET enable_seqscan=OFF; \set ON_ERROR_STOP on @@ -35,9 +37,9 @@ SELECT cos_dist(ARRAY[.1,0,0]::real[], ARRAY[0,.5,0]::real[]); SELECT ARRAY[1,0,0] <+> ARRAY[0,1,0]; -- NOW THIS IS TRIGGERING INDEX SCAN AS WELL --- BECAUSE WE ARE REGISTERING <-> FOR ALL OPERATOR CLASSES +-- BECAUSE WE ARE REGISTERING FOR ALL OPERATOR CLASSES -- IDEALLY THIS SHOULD NOT TRIGGER INDEX SCAN WHEN lantern.pgvector_compat=TRUE -EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v <-> ARRAY[1,1,1]; +EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; -- should sort with index EXPLAIN (COSTS FALSE) SELECT * FROM op_test ORDER BY v <=> ARRAY[1,1,1]; @@ -59,12 +61,10 @@ SELECT v <-> ARRAY[1,1,1] FROM op_test ORDER BY v <-> ARRAY[1,1,1]; RESET ALL; -- Set false twice to verify that no crash is happening SET lantern.pgvector_compat=FALSE; -SET lantern.pgvector_compat=FALSE; \set ON_ERROR_STOP off -- should rewrite operator -SELECT * FROM op_test ORDER BY v <-> ARRAY[1,1,1]; +SELECT * FROM op_test ORDER BY v ARRAY[1,1,1]; -SET lantern.pgvector_compat=TRUE; SET enable_seqscan=OFF; CREATE INDEX hamming_idx ON op_test USING hnsw(cast(v as INTEGER[]) dist_hamming_ops); diff --git a/test/sql/hnsw_select.sql b/test/sql/hnsw_select.sql index 6f8132f66..7e1432224 100644 --- a/test/sql/hnsw_select.sql +++ b/test/sql/hnsw_select.sql @@ -24,98 +24,98 @@ SELECT 1 FROM test1 WHERE id = 0 + 1; -- Verify that the index is being used SET _lantern_internal.is_test = true; -EXPLAIN (COSTS FALSE) SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 1; +EXPLAIN (COSTS FALSE) SELECT * FROM small_world order by v '{1,0,0}' LIMIT 1; -- Verify that this does not use the index EXPLAIN (COSTS FALSE) SELECT 1 FROM small_world WHERE v = '{0,0,0}'; -- Ensure we can query an index for more elements than the value of init_k WITH neighbors AS ( - SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 3 + SELECT * FROM small_world order by v '{1,0,0}' LIMIT 3 ) SELECT COUNT(*) from neighbors; WITH neighbors AS ( - SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 15 + SELECT * FROM small_world order by v '{1,0,0}' LIMIT 15 ) SELECT COUNT(*) from neighbors; -- Change default k and make sure the number of usearch_searchs makes sense SET hnsw.init_k = 4; WITH neighbors AS ( - SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 3 + SELECT * FROM small_world order by v '{1,0,0}' LIMIT 3 ) SELECT COUNT(*) from neighbors; WITH neighbors AS ( - SELECT * FROM small_world order by v <-> '{1,0,0}' LIMIT 15 + SELECT * FROM small_world order by v '{1,0,0}' LIMIT 15 ) SELECT COUNT(*) from neighbors; RESET client_min_messages; SET _lantern_internal.is_test = false; -- Verify where condition works properly and still uses index -SELECT has_index_scan('EXPLAIN SELECT * FROM small_world WHERE b IS TRUE ORDER BY v <-> ''{0,0,0}'''); +SELECT has_index_scan('EXPLAIN SELECT * FROM small_world WHERE b IS TRUE ORDER BY v ''{0,0,0}'''); -- Verify that the index is not being used when there is no order by SELECT NOT has_index_scan('EXPLAIN SELECT COUNT(*) FROM small_world'); -- Verify swapping order doesn't change anything and still uses index -SELECT has_index_scan('EXPLAIN SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] <-> v'); +SELECT has_index_scan('EXPLAIN SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] v'); -- Verify group by works and uses index -SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] <-> v LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1'); +SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY ''{1,2}''::REAL[] v LIMIT 1) SELECT id, COUNT(*) FROM t GROUP BY 1'); -- Validate distinct works and uses index -SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v <-> ''{1,2}'' LIMIT 1) SELECT DISTINCT id FROM t'); +SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v ''{1,2}'' LIMIT 1) SELECT DISTINCT id FROM t'); -- Validate join lateral works and uses index -SELECT has_index_scan('EXPLAIN SELECT t1_results.id FROM test2 t2 JOIN LATERAL (SELECT t1.id FROM test1 t1 ORDER BY t2.v <-> t1.v LIMIT 1) t1_results ON TRUE'); +SELECT has_index_scan('EXPLAIN SELECT t1_results.id FROM test2 t2 JOIN LATERAL (SELECT t1.id FROM test1 t1 ORDER BY t2.v t1.v LIMIT 1) t1_results ON TRUE'); -- Validate union works and uses index -SELECT has_index_scan('EXPLAIN (SELECT id FROM test1 ORDER BY v <-> ''{1,4}'') UNION (SELECT id FROM test1 ORDER BY v IS NOT NULL LIMIT 1)'); +SELECT has_index_scan('EXPLAIN (SELECT id FROM test1 ORDER BY v ''{1,4}'') UNION (SELECT id FROM test1 ORDER BY v IS NOT NULL LIMIT 1)'); -- Validate CTEs work and still use index -SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v <-> ''{1,4}'') SELECT id FROM t UNION SELECT id FROM t'); +SELECT has_index_scan('EXPLAIN WITH t AS (SELECT id FROM test1 ORDER BY v ''{1,4}'') SELECT id FROM t UNION SELECT id FROM t'); --- Validate <-> is replaced with the matching function when an index is present +-- Validate is replaced with the matching function when an index is present set enable_seqscan = true; set enable_indexscan = false; -EXPLAIN (COSTS false) SELECT * from small_world ORDER BY v <-> '{1,1,1}'; -SELECT * from small_world ORDER BY v <-> '{1,1,1}'; +EXPLAIN (COSTS false) SELECT * from small_world ORDER BY v '{1,1,1}'; +SELECT * from small_world ORDER BY v '{1,1,1}'; begin; INSERT INTO test2 (v) VALUES ('{1,4}'); INSERT INTO test2 (v) VALUES ('{2,4}'); CREATE INDEX test2_cos ON test2 USING hnsw(v dist_cos_ops); -EXPLAIN (COSTS false) SELECT * from test2 ORDER BY v <-> '{1,4}'; +EXPLAIN (COSTS false) SELECT * from test2 ORDER BY v '{1,4}'; -- Some additional cases that trigger operator rewriting -- SampleScan -EXPLAIN (COSTS false) SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v <-> '{1,1,1}' ASC; +EXPLAIN (COSTS false) SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v '{1,1,1}' ASC; -- can't compare direct equality here because it's random -SELECT results_match('EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v <-> ''{1,1,1}'' ASC', +SELECT results_match('EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY v ''{1,1,1}'' ASC', 'EXPLAIN SELECT * FROM small_world TABLESAMPLE BERNOULLI (20) ORDER BY l2sq_dist(v, ''{1,1,1}'') ASC'); -- SetOpt/HashSetOp -EXPLAIN (COSTS false) (SELECT * FROM small_world ORDER BY v <-> '{1,0,1}' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v <-> '{1,1,1}' ASC LIMIT 5); -SELECT results_match('(SELECT * FROM small_world ORDER BY v <-> ''{1,0,1}'' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v <-> ''{1,1,1}'' ASC LIMIT 5)', +EXPLAIN (COSTS false) (SELECT * FROM small_world ORDER BY v '{1,0,1}' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v '{1,1,1}' ASC LIMIT 5); +SELECT results_match('(SELECT * FROM small_world ORDER BY v ''{1,0,1}'' ASC ) EXCEPT (SELECT * FROM small_world ORDER by v ''{1,1,1}'' ASC LIMIT 5)', '(SELECT * FROM small_world ORDER BY l2sq_dist(v, ''{1,0,1}'') ASC ) EXCEPT (SELECT * FROM small_world ORDER by l2sq_dist(v, ''{1,1,1}'') ASC LIMIT 5)'); -- HashAggregate -EXPLAIN (COSTS false) SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v <-> '{1,1,1}'; -SELECT results_match('SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v <-> ''{1,1,1}''', +EXPLAIN (COSTS false) SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v '{1,1,1}'; +SELECT results_match('SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY v ''{1,1,1}''', 'SELECT v, COUNT(*) FROM small_world GROUP BY v ORDER BY l2sq_dist(v, ''{1,1,1}'')'); -- GroupBy this -EXPLAIN (COSTS false) SELECT * FROM small_world GROUP BY id, v, b ORDER BY v <-> '{1,1,1}'; -SELECT results_match('SELECT * FROM small_world GROUP BY id, v, b ORDER BY v <-> ''{1,1,1}''', +EXPLAIN (COSTS false) SELECT * FROM small_world GROUP BY id, v, b ORDER BY v '{1,1,1}'; +SELECT results_match('SELECT * FROM small_world GROUP BY id, v, b ORDER BY v ''{1,1,1}''', 'SELECT * FROM small_world GROUP BY id, v, b ORDER BY l2sq_dist(v, ''{1,1,1}'')'); -- HashJoin/Hash CREATE TABLE small_world_2 AS (SELECT * FROM small_world); -EXPLAIN (COSTS false) SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v <-> '{1,1,1}'; -SELECT results_match('SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v <-> ''{1,1,1}''', +EXPLAIN (COSTS false) SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v '{1,1,1}'; +SELECT results_match('SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY v ''{1,1,1}''', 'SELECT * FROM small_world JOIN small_world_2 using (v) ORDER BY l2sq_dist(v, ''{1,1,1}'')'); -- MixedAggregate (this doesn't require additional logic, but I include it here as an example of generating the path) -EXPLAIN (COSTS false) SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v <-> '{1,1,1}'; -SELECT results_match('SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v <-> ''{1,1,1}''', +EXPLAIN (COSTS false) SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v '{1,1,1}'; +SELECT results_match('SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY v ''{1,1,1}''', 'SELECT v FROM small_world GROUP BY ROLLUP(v) ORDER BY l2sq_dist(v, ''{1,1,1}'')'); -- WindowAgg -EXPLAIN (COSTS false) SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v <-> '{1,1,1}'; -SELECT results_match('SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v <-> ''{1,1,1}''', +EXPLAIN (COSTS false) SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v '{1,1,1}'; +SELECT results_match('SELECT v, EVERY(b) OVER () FROM small_world ORDER BY v ''{1,1,1}''', 'SELECT v, EVERY(b) OVER () FROM small_world ORDER BY l2sq_dist(v, ''{1,1,1}'')'); -- LockRows -EXPLAIN (COSTS false) SELECT * FROM small_world ORDER BY v <-> '{1,1,1}' ASC FOR UPDATE; -SELECT results_match('SELECT * FROM small_world ORDER BY v <-> ''{1,1,1}'' ASC FOR UPDATE', +EXPLAIN (COSTS false) SELECT * FROM small_world ORDER BY v '{1,1,1}' ASC FOR UPDATE; +SELECT results_match('SELECT * FROM small_world ORDER BY v ''{1,1,1}'' ASC FOR UPDATE', 'SELECT * FROM small_world ORDER BY l2sq_dist(v, ''{1,1,1}'') ASC FOR UPDATE'); rollback; diff --git a/test/sql/hnsw_todo.sql b/test/sql/hnsw_todo.sql index 8f5113254..fa421d32b 100644 --- a/test/sql/hnsw_todo.sql +++ b/test/sql/hnsw_todo.sql @@ -33,7 +33,7 @@ SELECT _lantern_internal.validate_index('small_world_l2_vector_int_idx', false); EXPLAIN (COSTS FALSE) SELECT id, ROUND(l2sq_dist(vector_int, array[0,1,0])::numeric, 2) as dist FROM small_world_l2 -ORDER BY vector_int <-> array[0,1,0] LIMIT 7; +ORDER BY vector_int array[0,1,0] LIMIT 7; --- Test scenarious --- ----------------------------------------- @@ -53,7 +53,7 @@ SELECT v AS v1001 FROM sift_base1k WHERE id = 1001 \gset CREATE INDEX hnsw_l2_index ON sift_base1k USING hnsw (v) WITH (_experimental_index_path='/tmp/lantern/files/index-sift1k-l2.usearch'); SELECT _lantern_internal.validate_index('hnsw_l2_index', false); -- The 1001 and 1002 vectors will be ignored in search, so the first row will not be 0 in result -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 1; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 1; -- Case: -- Index is created externally @@ -67,9 +67,9 @@ SELECT _lantern_internal.validate_index('hnsw_l2_index', false); -- The first row will not be 0 now as the vector under id=777 was updated to 1,1,1,1... but it was indexed with different vector -- So the usearch index can not find 1,1,1,1,1.. vector in the index and wrong results will be returned -- This is an expected behaviour for now -SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v <-> :'v1001' LIMIT 1; +SELECT ROUND(l2sq_dist(v, :'v1001')::numeric, 2) FROM sift_base1k order by v :'v1001' LIMIT 1; ----- Query on expression based index is failing to check correct <-> operator usage -------- +---- Query on expression based index is failing to check correct operator usage -------- CREATE OR REPLACE FUNCTION int_to_fixed_binary_real_array(n INT) RETURNS REAL[] AS $$ DECLARE binary_string TEXT; @@ -91,5 +91,5 @@ INSERT INTO test_table VALUES (0), (1), (7); \set enable_seqscan = off; -- This currently results in an error about using the operator outside of index -- This case should be fixed -SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) <-> '{0,0,0}'::REAL[] LIMIT 2; +SELECT id FROM test_table ORDER BY int_to_fixed_binary_real_array(id) '{0,0,0}'::REAL[] LIMIT 2; diff --git a/test/sql/hnsw_vector.sql b/test/sql/hnsw_vector.sql index 8c7b25726..282ce7ba8 100644 --- a/test/sql/hnsw_vector.sql +++ b/test/sql/hnsw_vector.sql @@ -23,7 +23,7 @@ CREATE INDEX ON items USING lantern_hnsw (trait_ai dist_vec_l2sq_ops) WITH (dim= INSERT INTO items (trait_ai) VALUES ('[6,7,8]'); CREATE INDEX ON items USING lantern_hnsw (trait_ai dist_vec_l2sq_ops) WITH (dim=3, M=4); INSERT INTO items (trait_ai) VALUES ('[10,10,10]'), (NULL); -SELECT * FROM items ORDER BY trait_ai <-> '[0,0,0]' LIMIT 3; +SELECT * FROM items ORDER BY trait_ai '[0,0,0]' LIMIT 3; SELECT * FROM ldb_get_indexes('items'); -- Test index creation on table with existing data @@ -36,14 +36,14 @@ INSERT INTO small_world (v) VALUES (NULL); -- Distance functions SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; EXPLAIN (COSTS FALSE) SELECT ROUND(l2sq_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v <-> '[0,1,0]'::VECTOR LIMIT 7; +FROM small_world ORDER BY v '[0,1,0]'::VECTOR LIMIT 7; -- Verify that index creation on a large vector produces an error CREATE TABLE large_vector (v VECTOR(2001)); @@ -59,25 +59,25 @@ CREATE TABLE sift_base10k ( \COPY sift_base10k (v) FROM '/tmp/lantern/vector_datasets/siftsmall_base.csv' WITH CSV; CREATE INDEX hnsw_idx ON sift_base10k USING lantern_hnsw (v); SELECT v AS v4444 FROM sift_base10k WHERE id = 4444 \gset -EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k ORDER BY v <-> :'v4444' LIMIT 10; +EXPLAIN (COSTS FALSE) SELECT * FROM sift_base10k ORDER BY v :'v4444' LIMIT 10; -- Ensure we can query an index for more elements than the value of init_k SET hnsw.init_k = 4; WITH neighbors AS ( - SELECT * FROM small_world order by v <-> '[1,0,0]' LIMIT 3 + SELECT * FROM small_world order by v '[1,0,0]' LIMIT 3 ) SELECT COUNT(*) from neighbors; WITH neighbors AS ( - SELECT * FROM small_world order by v <-> '[1,0,0]' LIMIT 15 + SELECT * FROM small_world order by v '[1,0,0]' LIMIT 15 ) SELECT COUNT(*) from neighbors; RESET client_min_messages; \set ON_ERROR_STOP off --- Expect error due to improper use of the <-> operator outside of its supported context -SELECT ARRAY[1,2,3] <-> ARRAY[3,2,1]; +-- Expect error due to improper use of the operator outside of its supported context +SELECT ARRAY[1,2,3] ARRAY[3,2,1]; -- Expect error due to mismatching vector dimensions -SELECT 1 FROM small_world ORDER BY v <-> '[0,1,0,1]' LIMIT 1; +SELECT 1 FROM small_world ORDER BY v '[0,1,0,1]' LIMIT 1; SELECT l2sq_dist('[1,1]'::vector, '[0,1,0]'::vector); -- Test creating index with expression @@ -105,14 +105,14 @@ CREATE INDEX ON test_table USING lantern_hnsw (int_to_fixed_binary_vector(id)) W CREATE TABLE small_world_arr (id SERIAL PRIMARY KEY, v REAL[]); INSERT INTO small_world_arr (v) VALUES ('{0,0,0}'), ('{0,0,1}'), ('{0,0,2}'); CREATE INDEX l2_idx ON small_world_arr USING lantern_hnsw(v) WITH (dim=3, m=2); -EXPLAIN (COSTS FALSE) SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; -SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; +EXPLAIN (COSTS FALSE) SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; DROP INDEX l2_idx; CREATE INDEX cos_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=2); -SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; DROP INDEX cos_idx; CREATE INDEX ham_idx ON small_world_arr USING lantern_hnsw(v) WITH (m=3); -SELECT id FROM small_world_arr ORDER BY v <-> ARRAY[0,0,0]; +SELECT id FROM small_world_arr ORDER BY v ARRAY[0,0,0]; -- Test pgvector in lantern.pgvector_compat=TRUE mode DROP TABLE small_world; @@ -143,4 +143,4 @@ SELECT ROUND(cos_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist FROM small_world ORDER BY v <=> '[0,1,0]'::VECTOR LIMIT 7; EXPLAIN (COSTS FALSE) SELECT ROUND(cos_dist(v, '[0,1,0]'::VECTOR)::numeric, 2) as dist -FROM small_world ORDER BY v <=> '[0,1,0]'::VECTOR LIMIT 7; \ No newline at end of file +FROM small_world ORDER BY v <=> '[0,1,0]'::VECTOR LIMIT 7;