From 36022dea4f11671e8aa52612c0fde5ee31b054d7 Mon Sep 17 00:00:00 2001 From: Narek Galstyan Date: Thu, 8 Feb 2024 01:50:02 +0000 Subject: [PATCH] Remove hnswlib remants since we no longer support hnslib as hnsw provider --- CMakeLists.txt | 2 -- src/hnsw/build.c | 8 -------- src/hnsw/build.h | 2 -- src/hnsw/lib_interface.h | 24 ------------------------ src/hnsw/options.c | 10 ---------- src/hnsw/options.h | 9 +++------ src/hnsw/scan.c | 8 -------- src/hnsw/scan.h | 2 -- 8 files changed, 3 insertions(+), 62 deletions(-) delete mode 100644 src/hnsw/lib_interface.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 30da37dea..23a8e8b8b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -214,8 +214,6 @@ endif () target_link_libraries(lantern PRIVATE "libstdc++.a") set_target_properties(lantern PROPERTIES LINKER_LANGUAGE C) - -target_compile_definitions(lantern PRIVATE LANTERN_USE_USEARCH) # the flag instructs usearch/lib.c to builds with lantern-postgres compatible storage, which # assumes storage is handled inside postgres and so usearch allocates no memory for it target_compile_definitions(lantern PRIVATE LANTERN_INSIDE_POSTGRES) diff --git a/src/hnsw/build.c b/src/hnsw/build.c index 330bcd870..990902c68 100644 --- a/src/hnsw/build.c +++ b/src/hnsw/build.c @@ -97,10 +97,6 @@ static void AddTupleToUsearchIndex(ItemPointer tid, Datum *values, HnswBuildStat // casting tid structure to a number to be used as value in vector search // tid has info about disk location of this item and is 6 bytes long usearch_label_t label = GetUsearchLabel(tid); -#ifdef LANTERN_USE_LIBHNSW - if(buildstate->hnsw != NULL) hnsw_add(buildstate->hnsw, vector, label); -#endif -#ifdef LANTERN_USE_USEARCH if(buildstate->usearch_index != NULL) { size_t capacity = usearch_capacity(buildstate->usearch_index, &error); if(capacity == usearch_size(buildstate->usearch_index, &error)) { @@ -115,7 +111,6 @@ static void AddTupleToUsearchIndex(ItemPointer tid, Datum *values, HnswBuildStat } usearch_add(buildstate->usearch_index, label, vector, usearch_scalar, &error); } -#endif assert(error == NULL); buildstate->tuples_indexed++; buildstate->reltuples++; @@ -450,7 +445,6 @@ static void BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo, Hnsw elog(INFO, "done init usearch index"); assert(error == NULL); - buildstate->hnsw = NULL; if(buildstate->index_file_path) { if(access(buildstate->index_file_path, F_OK) != 0) { ereport(ERROR, @@ -582,8 +576,6 @@ static void BuildEmptyIndex(Relation index, IndexInfo *indexInfo, HnswBuildState buildstate->usearch_index = usearch_init(&opts, &error); assert(error == NULL); - buildstate->hnsw = NULL; - char *result_buf = palloc(USEARCH_EMPTY_INDEX_SIZE); usearch_save_buffer(buildstate->usearch_index, result_buf, USEARCH_EMPTY_INDEX_SIZE, &error); assert(error == NULL && result_buf != NULL); diff --git a/src/hnsw/build.h b/src/hnsw/build.h index 91ef0947f..61137f31f 100644 --- a/src/hnsw/build.h +++ b/src/hnsw/build.h @@ -6,7 +6,6 @@ #include #include "hnsw.h" -#include "lib_interface.h" #include "usearch.h" typedef struct HnswBuildState @@ -26,7 +25,6 @@ typedef struct HnswBuildState double reltuples; /* hnsw */ - hnsw_t hnsw; usearch_index_t usearch_index; /* Memory */ diff --git a/src/hnsw/lib_interface.h b/src/hnsw/lib_interface.h deleted file mode 100644 index 0772c2d6b..000000000 --- a/src/hnsw/lib_interface.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef LDB_HNSW_LIB_INTERFACE_H -#define LDB_HNSW_LIB_INTERFACE_H -#ifdef __cplusplus -extern "C" { -#endif -// these are implemented by hnswlib third party dependency -// the goal is to make the interface generic over other vector index providers -typedef void* hnsw_t; -hnsw_t hnsw_new(int dimension, int max_elements, int M, int ef_construction); -hnsw_t hnsw_new_brute(int dimension, int max_elements); -void hnsw_add(hnsw_t hnsw, float* vector, long unsigned int label); -void hnsw_search( - hnsw_t hnsw, float* vector, int k, int* out_num_returned, float* out_distances, long unsigned int* out_labels); -int hnsw_size(hnsw_t hnsw); -// temporary, before I understand WAL and buffer stuff of postgres -void hnsw_save(hnsw_t hnsw, const char* filename); -hnsw_t hnsw_load(const char* filename, int dimension, int max_elements); - -void hnsw_destroy(hnsw_t hnsw); - -#ifdef __cplusplus -} -#endif -#endif // LDB_HNSW_LIB_INTERFACE_H diff --git a/src/hnsw/options.c b/src/hnsw/options.c index f839b4baa..fc9215c1d 100644 --- a/src/hnsw/options.c +++ b/src/hnsw/options.c @@ -157,16 +157,6 @@ void _PG_init(void) // todo:: cross-check with this` // https://github.com/zombodb/zombodb/blob/34c732a0b143b5e424ced64c96e8c4d567a14177/src/access_method/options.rs#L895 ldb_hnsw_index_withopts = add_reloption_kind(); -#if 0 - add_int_reloption(ldb_hnsw_index_withopts, "element_limit", - "Maximum table size (needed for hnswlib)", - HNSW_DEFAULT_ELEMENT_LIMIT, 1, HNSW_MAX_ELEMENT_LIMIT -#if PG_VERSION_NUM >= 130000 - , - AccessExclusiveLock -#endif - ); -#endif add_int_reloption(ldb_hnsw_index_withopts, "dim", "Number of dimensions of the vector", diff --git a/src/hnsw/options.h b/src/hnsw/options.h index f1b57e9a3..05bd83e75 100644 --- a/src/hnsw/options.h +++ b/src/hnsw/options.h @@ -20,11 +20,9 @@ #define HNSW_DEFAULT_EF_CONSTRUCTION 128 #define HNSW_MAX_EF_CONSTRUCTION 400 /* 10 in faiss*/ -#define HNSW_DEFAULT_EF 64 -#define HNSW_MAX_EF 400 -#define HNSW_DEFAULT_PROVIDER "usearch" -#define HNSW_MAX_ELEMENT_LIMIT 200000000 -#define HNSWLIB_DEFAULT_ELEMENT_LIMIT 2000000 +#define HNSW_DEFAULT_EF 64 +#define HNSW_MAX_EF 400 +#define HNSW_MAX_ELEMENT_LIMIT 200000000 #define LDB_HNSW_DEFAULT_K 10 #define LDB_HNSW_MAX_K 1000 @@ -32,7 +30,6 @@ /* HNSW index options */ typedef struct ldb_HnswOptions { - // max elements the table will ever have. required for hnswlib int32 vl_len_; /* varlena header (do not touch directly!) */ int dim; int element_limit; diff --git a/src/hnsw/scan.c b/src/hnsw/scan.c index dba0977bc..46797f5ba 100644 --- a/src/hnsw/scan.c +++ b/src/hnsw/scan.c @@ -109,20 +109,12 @@ void ldb_amendscan(IndexScanDesc scan) // todo:: once VACUUM/DELETE are implemented, during scan we need to hold a pin // on the buffer we have last returned. // make sure to release that pin here - -#ifdef LANTERN_USE_LIBHNSW - if(scanstate->hnsw) hnsw_destroy(scanstate->hnsw); -#endif -#ifdef LANTERN_USE_USEARCH if(scanstate->usearch_index) { usearch_error_t error = NULL; usearch_free(scanstate->usearch_index, &error); ldb_wal_retriever_area_fini(scanstate->retriever_ctx); assert(error == NULL); } -#else - elog(ERROR, "no index implementation specified"); -#endif if(scanstate->distances) pfree(scanstate->distances); diff --git a/src/hnsw/scan.h b/src/hnsw/scan.h index fa31cb3db..dccc7944e 100644 --- a/src/hnsw/scan.h +++ b/src/hnsw/scan.h @@ -6,7 +6,6 @@ #include #include "hnsw.h" -#include "lib_interface.h" #include "retriever.h" #include "usearch.h" @@ -25,7 +24,6 @@ typedef struct HnswScanState int current; // set when the distances and labels are populated int count; - hnsw_t hnsw; usearch_index_t usearch_index; RetrieverCtx *retriever_ctx;