From dbdb2540954a33e49f8d3e871926162ede998124 Mon Sep 17 00:00:00 2001
From: Ezra Varady <ezraavarady@gmail.com>
Date: Tue, 10 Oct 2023 12:57:29 -1000
Subject: [PATCH 01/10] respect maintenance_work_mem during index construction

---
 src/hnsw/build.c          | 23 +++++++++++++++++++++--
 src/hnsw/external_index.c |  2 +-
 src/hnsw/external_index.h | 13 +++++++------
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/src/hnsw/build.c b/src/hnsw/build.c
index 13e53d217..349904639 100644
--- a/src/hnsw/build.c
+++ b/src/hnsw/build.c
@@ -9,6 +9,8 @@
 #include <catalog/pg_type.h>
 #include <executor/executor.h>
 #include <funcapi.h>
+#include <math.h>
+#include <miscadmin.h>
 #include <nodes/execnodes.h>
 #include <storage/bufmgr.h>
 #include <utils/array.h>
@@ -59,7 +61,7 @@
 #define UpdateProgress(index, val) ((void)val)
 #endif
 
-static void AddTupleToUsearchIndex(ItemPointer tid, Datum *values, HnswBuildState *buildstate)
+static void AddTupleToUsearchIndex(ItemPointer tid, Datum *values, HnswBuildState *buildstate, Relation index)
 {
     /* Detoast once for all calls */
     usearch_error_t       error = NULL;
@@ -92,6 +94,15 @@ static void AddTupleToUsearchIndex(ItemPointer tid, Datum *values, HnswBuildStat
     if(buildstate->usearch_index != NULL) {
         size_t capacity = usearch_capacity(buildstate->usearch_index, &error);
         if(capacity == usearch_size(buildstate->usearch_index, &error)) {
+            double             M = ldb_HnswGetM(index);
+            double             mL = 1 / log(M);
+            usearch_metadata_t meta = usearch_metadata(buildstate->usearch_index, &error);
+            uint32             node_size = UsearchNodeBytes(&meta, meta.dimensions * sizeof(float), (int)(mL + .5));
+            if(2 * usearch_size(buildstate->usearch_index, &error) * node_size
+               >= (size_t)maintenance_work_mem * 1024L) {
+                usearch_free(buildstate->usearch_index, &error);
+                elog(ERROR, "index size exceeded maintenance_work_mem during index construction");
+            }
             usearch_reserve(buildstate->usearch_index, 2 * capacity, &error);
             assert(error == NULL);
         }
@@ -130,7 +141,7 @@ static void BuildCallback(
 
     // todo:: the argument values is assumed to be a real[] or vector (they have the same layout)
     // do proper type checking instead of this assumption and test int int arrays and others
-    AddTupleToUsearchIndex(tid, values, buildstate);
+    AddTupleToUsearchIndex(tid, values, buildstate, index);
 
     /* Reset memory context */
     MemoryContextSwitchTo(oldCtx);
@@ -453,6 +464,14 @@ static void BuildIndex(
             // Unlock and release buffer
             UnlockReleaseBuffer(buffer);
         }
+        double             M = ldb_HnswGetM(index);
+        double             mL = 1 / log(M);
+        usearch_metadata_t meta = usearch_metadata(buildstate->usearch_index, &error);
+        uint32             node_size = UsearchNodeBytes(&meta, opts.dimensions * sizeof(float), (int)(mL + .5));
+        // accuracy could be improved by not rounding mL, but otherwise this will never be fully accurate
+        if(node_size * estimated_row_count > maintenance_work_mem * 1024L) {
+            elog(ERROR, "index size exceeded maintenance_work_mem during index construction");
+        }
         usearch_reserve(buildstate->usearch_index, estimated_row_count, &error);
         if(error != NULL) {
             // There's not much we can do if free throws an error, but we want to preserve the contents of the first one
diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c
index e6cc73063..67a9b2685 100644
--- a/src/hnsw/external_index.c
+++ b/src/hnsw/external_index.c
@@ -22,7 +22,7 @@
 
 static BlockNumber getBlockMapPageBlockNumber(uint32 *blockmap_page_group_index, int id);
 
-static uint32 UsearchNodeBytes(usearch_metadata_t *metadata, int vector_bytes, int level)
+uint32 UsearchNodeBytes(usearch_metadata_t *metadata, int vector_bytes, int level)
 {
     const int NODE_HEAD_BYTES = sizeof(usearch_label_t) + 4 /*sizeof dim */ + 4 /*sizeof level*/;
     uint32    node_bytes = 0;
diff --git a/src/hnsw/external_index.h b/src/hnsw/external_index.h
index 6ba94182f..b653719e2 100644
--- a/src/hnsw/external_index.h
+++ b/src/hnsw/external_index.h
@@ -112,12 +112,13 @@ typedef struct
     HnswColumnType  columnType;
 } HnswInsertState;
 
-void StoreExternalIndex(Relation                index,
-                        usearch_index_t         external_index,
-                        ForkNumber              forkNum,
-                        char                   *data,
-                        usearch_init_options_t *opts,
-                        size_t                  num_added_vectors);
+uint32 UsearchNodeBytes(usearch_metadata_t *metadata, int vector_bytes, int level);
+void   StoreExternalIndex(Relation                index,
+                          usearch_index_t         external_index,
+                          ForkNumber              forkNum,
+                          char                   *data,
+                          usearch_init_options_t *opts,
+                          size_t                  num_added_vectors);
 
 // add the fully constructed index tuple to the index via wal
 // hdr is passed in so num_vectors, first_block_no, last_block_no can be updated

From 42b125409cd901a3abfe22f4ecead5ed7d37e90d Mon Sep 17 00:00:00 2001
From: Ezra Varady <ezraavarady@gmail.com>
Date: Tue, 10 Oct 2023 13:51:45 -1000
Subject: [PATCH 02/10] add checks for work_mem in scan and insert

---
 src/hnsw/external_index.c     |  6 ++++++
 src/hnsw/external_index.h     |  2 ++
 src/hnsw/insert.c             | 10 ++++++++++
 src/hnsw/retriever.c          |  4 ++++
 src/hnsw/scan.c               | 23 +++++++++++++++++++++++
 test/expected/hnsw_insert.out |  1 +
 test/sql/hnsw_insert.sql      |  1 +
 7 files changed, 47 insertions(+)

diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c
index 67a9b2685..4f6ed7a95 100644
--- a/src/hnsw/external_index.c
+++ b/src/hnsw/external_index.c
@@ -7,6 +7,7 @@
 #include <assert.h>
 #include <common/relpath.h>
 #include <hnsw/fa_cache.h>
+#include <miscadmin.h>
 #include <pg_config.h>       // BLCKSZ
 #include <storage/bufmgr.h>  // Buffer
 #include <utils/hsearch.h>
@@ -615,6 +616,7 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
             if(!idx_page_prelocked) {
                 UnlockReleaseBuffer(buf);
             }
+            ctx->memory += sizeof(BufferNode) + nodepage->size;
             dlist_push_tail(&ctx->takenbuffers, &buffNode->node);
             return buffNode->buf;
 #else
@@ -625,6 +627,7 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
                 buffNode->buf = buf;
 
                 // Add buffNode to list of pinned buffers
+                ctx->memory += sizeof(BufferNode) + offsetof(HnswIndexTuple, node) + nodepage->size;
                 dlist_push_tail(&ctx->takenbuffers, &buffNode->node);
                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
             }
@@ -634,6 +637,9 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
             return nodepage->node;
 #endif
         }
+        if(ctx->memory >= work_mem * 1024L) {
+            elog(ERROR, "pinned more buffers during query than will fit in work_mem, consider increasing work_mem");
+        }
     }
     if(!idx_page_prelocked) {
         assert(BufferIsValid(buf));
diff --git a/src/hnsw/external_index.h b/src/hnsw/external_index.h
index b653719e2..384bff34f 100644
--- a/src/hnsw/external_index.h
+++ b/src/hnsw/external_index.h
@@ -93,6 +93,8 @@ typedef struct
     FullyAssociativeCache fa_cache;
 
     dlist_head takenbuffers;
+
+    int memory;
 } RetrieverCtx;
 
 typedef struct
diff --git a/src/hnsw/insert.c b/src/hnsw/insert.c
index a669fdcb6..f8d7abe63 100644
--- a/src/hnsw/insert.c
+++ b/src/hnsw/insert.c
@@ -9,6 +9,7 @@
 #endif
 #include <float.h>
 #include <math.h>
+#include <miscadmin.h>
 #include <storage/bufmgr.h>
 #include <utils/array.h>
 #include <utils/rel.h>
@@ -144,6 +145,15 @@ bool ldb_aminsert(Relation         index,
     assert(hdr->magicNumber == LDB_WAL_MAGIC_NUMBER);
     ldb_dlog("Insert: at start num vectors is %d", hdr->num_vectors);
 
+    double M = ldb_HnswGetM(index);
+    double mL = 1 / log(M);
+    uint32 node_size = UsearchNodeBytes(&meta, opts.dimensions * sizeof(float), (int)(mL + .5));
+    // accuracy could be improved by not rounding mL, but otherwise this will never be fully accurate
+    if (node_size * (hdr->num_vectors + 1) > work_mem * 1024L) {
+        usearch_free(uidx, &error);
+        elog(ERROR, "index size exceeded work_mem during insert");
+    }
+
     usearch_reserve(uidx, hdr->num_vectors + 1, &error);
     uint32 level = hnsw_generate_new_level(meta.connectivity);
     if(error != NULL) {
diff --git a/src/hnsw/retriever.c b/src/hnsw/retriever.c
index 1b6a5965d..fcdab63c1 100644
--- a/src/hnsw/retriever.c
+++ b/src/hnsw/retriever.c
@@ -27,6 +27,8 @@ RetrieverCtx *ldb_wal_retriever_area_init(Relation index_rel, HnswIndexHeaderPag
     /* fill in a buffer with blockno index information, before spilling it to disk */
     ctx->block_numbers_cache = cache_create("BlockNumberCache");
 
+    ctx->memory = 0;
+
     return ctx;
 }
 
@@ -48,6 +50,8 @@ void ldb_wal_retriever_area_reset(RetrieverCtx *ctx, HnswIndexHeaderPage *header
     }
     dlist_init(&ctx->takenbuffers);
 
+    ctx->memory = 0;
+
     assert(ctx->header_page_under_wal == header_page_under_wal);
     ctx->header_page_under_wal = header_page_under_wal;
 }
diff --git a/src/hnsw/scan.c b/src/hnsw/scan.c
index 175b0063c..3f97e9256 100644
--- a/src/hnsw/scan.c
+++ b/src/hnsw/scan.c
@@ -3,6 +3,8 @@
 #include "scan.h"
 
 #include <access/relscan.h>
+#include <math.h>
+#include <miscadmin.h>
 #include <pgstat.h>
 #include <utils/rel.h>
 
@@ -192,6 +194,18 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
             scanstate->labels = palloc(k * sizeof(usearch_label_t));
         }
 
+        double M = ldb_HnswGetM(scan->indexRelation);
+        double mL = 1 / log(M);
+        usearch_metadata_t meta = usearch_metadata(scanstate->usearch_index, &error);
+        uint32 node_size = UsearchNodeBytes(&meta, scanstate->dimensions * sizeof(float), (int)(mL + .5));
+        // accuracy could be improved by not rounding mL, but otherwise this will never be fully accurate
+        // I think because of mem_view_lazy a max of k nodes will be held in memory by usearch
+        // there are separate checks on the memory held by takenbuffers
+        if (node_size * k > work_mem * 1024L) {
+            usearch_free(scanstate->usearch_index, &error);
+            elog(ERROR, "index size exceeded work_mem during insert");
+        }
+
         ldb_dlog("LANTERN querying index for %d elements", k);
         num_returned = usearch_search(
             scanstate->usearch_index, vec, usearch_scalar_f32_k, k, scanstate->labels, scanstate->distances, &error);
@@ -227,6 +241,15 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
         scanstate->distances = repalloc(scanstate->distances, k * sizeof(float));
         scanstate->labels = repalloc(scanstate->labels, k * sizeof(usearch_label_t));
 
+        double M = ldb_HnswGetM(scan->indexRelation);
+        double mL = 1 / log(M);
+        usearch_metadata_t meta = usearch_metadata(scanstate->usearch_index, &error);
+        uint32 node_size = UsearchNodeBytes(&meta, scanstate->dimensions * sizeof(float), (int)(mL + .5));
+        if (node_size * k > work_mem * 1024L) {
+            usearch_free(scanstate->usearch_index, &error);
+            elog(ERROR, "index size exceeded work_mem during insert");
+        }
+
         ldb_dlog("LANTERN - querying index for %d elements", k);
         num_returned = usearch_search(
             scanstate->usearch_index, vec, usearch_scalar_f32_k, k, scanstate->labels, scanstate->distances, &error);
diff --git a/test/expected/hnsw_insert.out b/test/expected/hnsw_insert.out
index eda9782ee..d99dc75b6 100644
--- a/test/expected/hnsw_insert.out
+++ b/test/expected/hnsw_insert.out
@@ -1,6 +1,7 @@
 ---------------------------------------------------------------------
 -- Test HNSW index inserts on empty table
 ---------------------------------------------------------------------
+set work_mem = '10MB';
 CREATE TABLE small_world (
     id SERIAL PRIMARY KEY,
     v REAL[2]
diff --git a/test/sql/hnsw_insert.sql b/test/sql/hnsw_insert.sql
index 67e3cd8b3..d55e6184e 100644
--- a/test/sql/hnsw_insert.sql
+++ b/test/sql/hnsw_insert.sql
@@ -1,6 +1,7 @@
 ---------------------------------------------------------------------
 -- Test HNSW index inserts on empty table
 ---------------------------------------------------------------------
+set work_mem = '10MB';
 
 CREATE TABLE small_world (
     id SERIAL PRIMARY KEY,

From 3e3c996ae89aa5b6e80c296276e821ef79e30ab4 Mon Sep 17 00:00:00 2001
From: Ezra Varady <ezraavarady@gmail.com>
Date: Wed, 11 Oct 2023 09:23:23 -1000
Subject: [PATCH 03/10] make exceeding work_mem a warning, change tests so they
 dont trigger it

---
 src/hnsw/external_index.c          | 2 +-
 src/hnsw/insert.c                  | 3 +--
 src/hnsw/scan.c                    | 6 ++----
 test/parallel/expected/insert.out  | 1 +
 test/parallel/expected/insert2.out | 1 +
 test/parallel/expected/insert3.out | 1 +
 test/parallel/sql/insert.sql       | 1 +
 test/parallel/sql/insert2.sql      | 1 +
 test/parallel/sql/insert3.sql      | 1 +
 9 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c
index 4f6ed7a95..bf7d28c5f 100644
--- a/src/hnsw/external_index.c
+++ b/src/hnsw/external_index.c
@@ -638,7 +638,7 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
 #endif
         }
         if(ctx->memory >= work_mem * 1024L) {
-            elog(ERROR, "pinned more buffers during query than will fit in work_mem, consider increasing work_mem");
+            elog(WARNING, "pinned more buffers during query than will fit in work_mem, consider increasing work_mem");
         }
     }
     if(!idx_page_prelocked) {
diff --git a/src/hnsw/insert.c b/src/hnsw/insert.c
index f8d7abe63..2039d2538 100644
--- a/src/hnsw/insert.c
+++ b/src/hnsw/insert.c
@@ -150,8 +150,7 @@ bool ldb_aminsert(Relation         index,
     uint32 node_size = UsearchNodeBytes(&meta, opts.dimensions * sizeof(float), (int)(mL + .5));
     // accuracy could be improved by not rounding mL, but otherwise this will never be fully accurate
     if (node_size * (hdr->num_vectors + 1) > work_mem * 1024L) {
-        usearch_free(uidx, &error);
-        elog(ERROR, "index size exceeded work_mem during insert");
+        elog(WARNING, "index size exceeded work_mem during insert");
     }
 
     usearch_reserve(uidx, hdr->num_vectors + 1, &error);
diff --git a/src/hnsw/scan.c b/src/hnsw/scan.c
index 3f97e9256..a32e1a9b1 100644
--- a/src/hnsw/scan.c
+++ b/src/hnsw/scan.c
@@ -202,8 +202,7 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
         // I think because of mem_view_lazy a max of k nodes will be held in memory by usearch
         // there are separate checks on the memory held by takenbuffers
         if (node_size * k > work_mem * 1024L) {
-            usearch_free(scanstate->usearch_index, &error);
-            elog(ERROR, "index size exceeded work_mem during insert");
+            elog(WARNING, "index size exceeded work_mem during scan");
         }
 
         ldb_dlog("LANTERN querying index for %d elements", k);
@@ -246,8 +245,7 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
         usearch_metadata_t meta = usearch_metadata(scanstate->usearch_index, &error);
         uint32 node_size = UsearchNodeBytes(&meta, scanstate->dimensions * sizeof(float), (int)(mL + .5));
         if (node_size * k > work_mem * 1024L) {
-            usearch_free(scanstate->usearch_index, &error);
-            elog(ERROR, "index size exceeded work_mem during insert");
+            elog(WARNING, "index size exceeded work_mem during scan");
         }
 
         ldb_dlog("LANTERN - querying index for %d elements", k);
diff --git a/test/parallel/expected/insert.out b/test/parallel/expected/insert.out
index 604cf2ccd..420c7253b 100644
--- a/test/parallel/expected/insert.out
+++ b/test/parallel/expected/insert.out
@@ -1,3 +1,4 @@
+SET work_mem='10MB';
 BEGIN;
 INSERT INTO sift_base10k (id, v) VALUES 
     (nextval('serial'), random_array(128, 0, 128)),
diff --git a/test/parallel/expected/insert2.out b/test/parallel/expected/insert2.out
index 604cf2ccd..420c7253b 100644
--- a/test/parallel/expected/insert2.out
+++ b/test/parallel/expected/insert2.out
@@ -1,3 +1,4 @@
+SET work_mem='10MB';
 BEGIN;
 INSERT INTO sift_base10k (id, v) VALUES 
     (nextval('serial'), random_array(128, 0, 128)),
diff --git a/test/parallel/expected/insert3.out b/test/parallel/expected/insert3.out
index 604cf2ccd..420c7253b 100644
--- a/test/parallel/expected/insert3.out
+++ b/test/parallel/expected/insert3.out
@@ -1,3 +1,4 @@
+SET work_mem='10MB';
 BEGIN;
 INSERT INTO sift_base10k (id, v) VALUES 
     (nextval('serial'), random_array(128, 0, 128)),
diff --git a/test/parallel/sql/insert.sql b/test/parallel/sql/insert.sql
index 604cf2ccd..420c7253b 100644
--- a/test/parallel/sql/insert.sql
+++ b/test/parallel/sql/insert.sql
@@ -1,3 +1,4 @@
+SET work_mem='10MB';
 BEGIN;
 INSERT INTO sift_base10k (id, v) VALUES 
     (nextval('serial'), random_array(128, 0, 128)),
diff --git a/test/parallel/sql/insert2.sql b/test/parallel/sql/insert2.sql
index 604cf2ccd..420c7253b 100644
--- a/test/parallel/sql/insert2.sql
+++ b/test/parallel/sql/insert2.sql
@@ -1,3 +1,4 @@
+SET work_mem='10MB';
 BEGIN;
 INSERT INTO sift_base10k (id, v) VALUES 
     (nextval('serial'), random_array(128, 0, 128)),
diff --git a/test/parallel/sql/insert3.sql b/test/parallel/sql/insert3.sql
index 604cf2ccd..420c7253b 100644
--- a/test/parallel/sql/insert3.sql
+++ b/test/parallel/sql/insert3.sql
@@ -1,3 +1,4 @@
+SET work_mem='10MB';
 BEGIN;
 INSERT INTO sift_base10k (id, v) VALUES 
     (nextval('serial'), random_array(128, 0, 128)),

From 16de47287282f8a5c0f7f4650e160b678f06b384 Mon Sep 17 00:00:00 2001
From: Ezra Varady <ezraavarady@gmail.com>
Date: Wed, 11 Oct 2023 10:36:47 -1000
Subject: [PATCH 04/10] clang-format

---
 src/hnsw/insert.c |  2 +-
 src/hnsw/scan.c   | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/hnsw/insert.c b/src/hnsw/insert.c
index 2039d2538..24a686fec 100644
--- a/src/hnsw/insert.c
+++ b/src/hnsw/insert.c
@@ -149,7 +149,7 @@ bool ldb_aminsert(Relation         index,
     double mL = 1 / log(M);
     uint32 node_size = UsearchNodeBytes(&meta, opts.dimensions * sizeof(float), (int)(mL + .5));
     // accuracy could be improved by not rounding mL, but otherwise this will never be fully accurate
-    if (node_size * (hdr->num_vectors + 1) > work_mem * 1024L) {
+    if(node_size * (hdr->num_vectors + 1) > work_mem * 1024L) {
         elog(WARNING, "index size exceeded work_mem during insert");
     }
 
diff --git a/src/hnsw/scan.c b/src/hnsw/scan.c
index a32e1a9b1..54d212722 100644
--- a/src/hnsw/scan.c
+++ b/src/hnsw/scan.c
@@ -194,14 +194,14 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
             scanstate->labels = palloc(k * sizeof(usearch_label_t));
         }
 
-        double M = ldb_HnswGetM(scan->indexRelation);
-        double mL = 1 / log(M);
+        double             M = ldb_HnswGetM(scan->indexRelation);
+        double             mL = 1 / log(M);
         usearch_metadata_t meta = usearch_metadata(scanstate->usearch_index, &error);
-        uint32 node_size = UsearchNodeBytes(&meta, scanstate->dimensions * sizeof(float), (int)(mL + .5));
+        uint32             node_size = UsearchNodeBytes(&meta, scanstate->dimensions * sizeof(float), (int)(mL + .5));
         // accuracy could be improved by not rounding mL, but otherwise this will never be fully accurate
         // I think because of mem_view_lazy a max of k nodes will be held in memory by usearch
         // there are separate checks on the memory held by takenbuffers
-        if (node_size * k > work_mem * 1024L) {
+        if(node_size * k > work_mem * 1024L) {
             elog(WARNING, "index size exceeded work_mem during scan");
         }
 
@@ -240,11 +240,11 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
         scanstate->distances = repalloc(scanstate->distances, k * sizeof(float));
         scanstate->labels = repalloc(scanstate->labels, k * sizeof(usearch_label_t));
 
-        double M = ldb_HnswGetM(scan->indexRelation);
-        double mL = 1 / log(M);
+        double             M = ldb_HnswGetM(scan->indexRelation);
+        double             mL = 1 / log(M);
         usearch_metadata_t meta = usearch_metadata(scanstate->usearch_index, &error);
-        uint32 node_size = UsearchNodeBytes(&meta, scanstate->dimensions * sizeof(float), (int)(mL + .5));
-        if (node_size * k > work_mem * 1024L) {
+        uint32             node_size = UsearchNodeBytes(&meta, scanstate->dimensions * sizeof(float), (int)(mL + .5));
+        if(node_size * k > work_mem * 1024L) {
             elog(WARNING, "index size exceeded work_mem during scan");
         }
 

From 9d45ffc2261c0042f54926000caafacde4f134a4 Mon Sep 17 00:00:00 2001
From: Ezra Varady <ezraavarady@gmail.com>
Date: Sat, 14 Oct 2023 11:40:46 -1000
Subject: [PATCH 05/10] reduce code duplication

---
 src/hnsw/build.c          | 30 ++++++++++++------------------
 src/hnsw/external_index.c |  5 -----
 src/hnsw/external_index.h |  2 --
 src/hnsw/insert.c         | 12 +++++-------
 src/hnsw/retriever.c      |  4 ----
 src/hnsw/scan.c           | 29 ++++++++++-------------------
 src/hnsw/utils.c          | 22 ++++++++++++++++++++++
 src/hnsw/utils.h          |  1 +
 8 files changed, 50 insertions(+), 55 deletions(-)

diff --git a/src/hnsw/build.c b/src/hnsw/build.c
index 349904639..7e0e6e1fb 100644
--- a/src/hnsw/build.c
+++ b/src/hnsw/build.c
@@ -9,7 +9,6 @@
 #include <catalog/pg_type.h>
 #include <executor/executor.h>
 #include <funcapi.h>
-#include <math.h>
 #include <miscadmin.h>
 #include <nodes/execnodes.h>
 #include <storage/bufmgr.h>
@@ -94,15 +93,12 @@ static void AddTupleToUsearchIndex(ItemPointer tid, Datum *values, HnswBuildStat
     if(buildstate->usearch_index != NULL) {
         size_t capacity = usearch_capacity(buildstate->usearch_index, &error);
         if(capacity == usearch_size(buildstate->usearch_index, &error)) {
-            double             M = ldb_HnswGetM(index);
-            double             mL = 1 / log(M);
-            usearch_metadata_t meta = usearch_metadata(buildstate->usearch_index, &error);
-            uint32             node_size = UsearchNodeBytes(&meta, meta.dimensions * sizeof(float), (int)(mL + .5));
-            if(2 * usearch_size(buildstate->usearch_index, &error) * node_size
-               >= (size_t)maintenance_work_mem * 1024L) {
-                usearch_free(buildstate->usearch_index, &error);
-                elog(ERROR, "index size exceeded maintenance_work_mem during index construction");
-            }
+            CheckMem(maintenance_work_mem,
+                     index,
+                     buildstate->usearch_index,
+                     2 * usearch_size(buildstate->usearch_index, &error),
+                     "index size exceeded maintenance_work_mem during index construction, consider increasing "
+                     "maintenance_work_mem");
             usearch_reserve(buildstate->usearch_index, 2 * capacity, &error);
             assert(error == NULL);
         }
@@ -464,14 +460,12 @@ static void BuildIndex(
             // Unlock and release buffer
             UnlockReleaseBuffer(buffer);
         }
-        double             M = ldb_HnswGetM(index);
-        double             mL = 1 / log(M);
-        usearch_metadata_t meta = usearch_metadata(buildstate->usearch_index, &error);
-        uint32             node_size = UsearchNodeBytes(&meta, opts.dimensions * sizeof(float), (int)(mL + .5));
-        // accuracy could be improved by not rounding mL, but otherwise this will never be fully accurate
-        if(node_size * estimated_row_count > maintenance_work_mem * 1024L) {
-            elog(ERROR, "index size exceeded maintenance_work_mem during index construction");
-        }
+        CheckMem(maintenance_work_mem,
+                 index,
+                 buildstate->usearch_index,
+                 estimated_row_count,
+                 "index size exceeded maintenance_work_mem during index construction, consider increasing "
+                 "maintenance_work_mem");
         usearch_reserve(buildstate->usearch_index, estimated_row_count, &error);
         if(error != NULL) {
             // There's not much we can do if free throws an error, but we want to preserve the contents of the first one
diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c
index bf7d28c5f..40638f712 100644
--- a/src/hnsw/external_index.c
+++ b/src/hnsw/external_index.c
@@ -616,7 +616,6 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
             if(!idx_page_prelocked) {
                 UnlockReleaseBuffer(buf);
             }
-            ctx->memory += sizeof(BufferNode) + nodepage->size;
             dlist_push_tail(&ctx->takenbuffers, &buffNode->node);
             return buffNode->buf;
 #else
@@ -627,7 +626,6 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
                 buffNode->buf = buf;
 
                 // Add buffNode to list of pinned buffers
-                ctx->memory += sizeof(BufferNode) + offsetof(HnswIndexTuple, node) + nodepage->size;
                 dlist_push_tail(&ctx->takenbuffers, &buffNode->node);
                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
             }
@@ -637,9 +635,6 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
             return nodepage->node;
 #endif
         }
-        if(ctx->memory >= work_mem * 1024L) {
-            elog(WARNING, "pinned more buffers during query than will fit in work_mem, consider increasing work_mem");
-        }
     }
     if(!idx_page_prelocked) {
         assert(BufferIsValid(buf));
diff --git a/src/hnsw/external_index.h b/src/hnsw/external_index.h
index 384bff34f..b653719e2 100644
--- a/src/hnsw/external_index.h
+++ b/src/hnsw/external_index.h
@@ -93,8 +93,6 @@ typedef struct
     FullyAssociativeCache fa_cache;
 
     dlist_head takenbuffers;
-
-    int memory;
 } RetrieverCtx;
 
 typedef struct
diff --git a/src/hnsw/insert.c b/src/hnsw/insert.c
index 24a686fec..aaff26900 100644
--- a/src/hnsw/insert.c
+++ b/src/hnsw/insert.c
@@ -145,13 +145,11 @@ bool ldb_aminsert(Relation         index,
     assert(hdr->magicNumber == LDB_WAL_MAGIC_NUMBER);
     ldb_dlog("Insert: at start num vectors is %d", hdr->num_vectors);
 
-    double M = ldb_HnswGetM(index);
-    double mL = 1 / log(M);
-    uint32 node_size = UsearchNodeBytes(&meta, opts.dimensions * sizeof(float), (int)(mL + .5));
-    // accuracy could be improved by not rounding mL, but otherwise this will never be fully accurate
-    if(node_size * (hdr->num_vectors + 1) > work_mem * 1024L) {
-        elog(WARNING, "index size exceeded work_mem during insert");
-    }
+    CheckMem(work_mem,
+             index,
+             uidx,
+             hdr->num_vectors,
+             "index size exceeded work_mem during insert, consider increasing work_mem");
 
     usearch_reserve(uidx, hdr->num_vectors + 1, &error);
     uint32 level = hnsw_generate_new_level(meta.connectivity);
diff --git a/src/hnsw/retriever.c b/src/hnsw/retriever.c
index fcdab63c1..1b6a5965d 100644
--- a/src/hnsw/retriever.c
+++ b/src/hnsw/retriever.c
@@ -27,8 +27,6 @@ RetrieverCtx *ldb_wal_retriever_area_init(Relation index_rel, HnswIndexHeaderPag
     /* fill in a buffer with blockno index information, before spilling it to disk */
     ctx->block_numbers_cache = cache_create("BlockNumberCache");
 
-    ctx->memory = 0;
-
     return ctx;
 }
 
@@ -50,8 +48,6 @@ void ldb_wal_retriever_area_reset(RetrieverCtx *ctx, HnswIndexHeaderPage *header
     }
     dlist_init(&ctx->takenbuffers);
 
-    ctx->memory = 0;
-
     assert(ctx->header_page_under_wal == header_page_under_wal);
     ctx->header_page_under_wal = header_page_under_wal;
 }
diff --git a/src/hnsw/scan.c b/src/hnsw/scan.c
index 54d212722..a8f70f63d 100644
--- a/src/hnsw/scan.c
+++ b/src/hnsw/scan.c
@@ -3,7 +3,6 @@
 #include "scan.h"
 
 #include <access/relscan.h>
-#include <math.h>
 #include <miscadmin.h>
 #include <pgstat.h>
 #include <utils/rel.h>
@@ -194,17 +193,11 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
             scanstate->labels = palloc(k * sizeof(usearch_label_t));
         }
 
-        double             M = ldb_HnswGetM(scan->indexRelation);
-        double             mL = 1 / log(M);
-        usearch_metadata_t meta = usearch_metadata(scanstate->usearch_index, &error);
-        uint32             node_size = UsearchNodeBytes(&meta, scanstate->dimensions * sizeof(float), (int)(mL + .5));
-        // accuracy could be improved by not rounding mL, but otherwise this will never be fully accurate
-        // I think because of mem_view_lazy a max of k nodes will be held in memory by usearch
-        // there are separate checks on the memory held by takenbuffers
-        if(node_size * k > work_mem * 1024L) {
-            elog(WARNING, "index size exceeded work_mem during scan");
-        }
-
+        CheckMem(work_mem,
+                 scan->indexRelation,
+                 scanstate->usearch_index,
+                 k,
+                 "index size exceeded work_mem during scan, consider increasing work_mem");
         ldb_dlog("LANTERN querying index for %d elements", k);
         num_returned = usearch_search(
             scanstate->usearch_index, vec, usearch_scalar_f32_k, k, scanstate->labels, scanstate->distances, &error);
@@ -240,13 +233,11 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
         scanstate->distances = repalloc(scanstate->distances, k * sizeof(float));
         scanstate->labels = repalloc(scanstate->labels, k * sizeof(usearch_label_t));
 
-        double             M = ldb_HnswGetM(scan->indexRelation);
-        double             mL = 1 / log(M);
-        usearch_metadata_t meta = usearch_metadata(scanstate->usearch_index, &error);
-        uint32             node_size = UsearchNodeBytes(&meta, scanstate->dimensions * sizeof(float), (int)(mL + .5));
-        if(node_size * k > work_mem * 1024L) {
-            elog(WARNING, "index size exceeded work_mem during scan");
-        }
+        CheckMem(work_mem,
+                 scan->indexRelation,
+                 scanstate->usearch_index,
+                 k,
+                 "index size exceeded work_mem during scan, consider increasing work_mem");
 
         ldb_dlog("LANTERN - querying index for %d elements", k);
         num_returned = usearch_search(
diff --git a/src/hnsw/utils.c b/src/hnsw/utils.c
index 1c79894ad..8c15aefbc 100644
--- a/src/hnsw/utils.c
+++ b/src/hnsw/utils.c
@@ -3,9 +3,13 @@
 #include "utils.h"
 
 #include <assert.h>
+#include <math.h>
+#include <miscadmin.h>
 #include <regex.h>
 #include <string.h>
+#include <utils/memutils.h>
 
+#include "external_index.h"
 #include "hnsw.h"
 #include "options.h"
 #include "usearch.h"
@@ -48,3 +52,21 @@ usearch_label_t GetUsearchLabel(ItemPointer itemPtr)
     memcpy((unsigned long *)&label, itemPtr, 6);
     return label;
 }
+
+void CheckMem(int limit, Relation index, usearch_index_t uidx, uint32 n_nodes, char *msg)
+{
+    usearch_error_t    error;
+    double             M = ldb_HnswGetM(index);
+    double             mL = 1 / log(M);
+    usearch_metadata_t meta = usearch_metadata(uidx, &error);
+    // todo:: update sizeof(float) to correct vector size once #19 is merged
+    uint32 node_size = UsearchNodeBytes(&meta, meta.dimensions * sizeof(float), (int)round(mL + 1));
+    Size   pg_mem = MemoryContextMemAllocated(CurrentMemoryContext, true);
+
+    // The average number of layers for an element to be added in is mL+1 per section 4.2.2
+    // Accuracy could maybe be improved by not rounding
+    // This is a guess, but it's a reasonably good one
+    if(pg_mem + node_size * n_nodes > (uint32)limit * 1024UL) {
+        elog(WARNING, msg);
+    }
+}
diff --git a/src/hnsw/utils.h b/src/hnsw/utils.h
index 024ac21f7..106d296ea 100644
--- a/src/hnsw/utils.h
+++ b/src/hnsw/utils.h
@@ -5,6 +5,7 @@
 #include "options.h"
 #include "usearch.h"
 
+void            CheckMem(int limit, Relation index, usearch_index_t uidx, uint32 n_nodes, char *msg);
 void            LogUsearchOptions(usearch_init_options_t *opts);
 void            PopulateUsearchOpts(Relation index, usearch_init_options_t *opts);
 usearch_label_t GetUsearchLabel(ItemPointer itemPtr);

From 76dd81d7eb7356fdb566b9130c6163f051deb99e Mon Sep 17 00:00:00 2001
From: Ezra Varady <ezraavarady@gmail.com>
Date: Sat, 14 Oct 2023 12:06:24 -1000
Subject: [PATCH 06/10] add test back in to external_index, fix elog in
 CheckMem

---
 src/hnsw/external_index.c | 10 ++++++++++
 src/hnsw/utils.c          | 19 +++++++++++--------
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c
index 40638f712..c7c1099df 100644
--- a/src/hnsw/external_index.c
+++ b/src/hnsw/external_index.c
@@ -630,7 +630,17 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
             }
 
+<<<<<<< HEAD
             fa_cache_insert(&ctx->fa_cache, id, nodepage->node);
+=======
+            CheckMem(work_mem,
+                     NULL,
+                     NULL,
+                     0,
+                     "Pinned more tuples during node retrieval than will fir in work_mem, cosider increasing work_mem");
+
+            cache_set_item(&ctx->node_cache, &id, nodepage->node);
+>>>>>>> 1175c02 (add test back in to external_index, fix elog in CheckMem)
 
             return nodepage->node;
 #endif
diff --git a/src/hnsw/utils.c b/src/hnsw/utils.c
index 8c15aefbc..ce316d23a 100644
--- a/src/hnsw/utils.c
+++ b/src/hnsw/utils.c
@@ -55,18 +55,21 @@ usearch_label_t GetUsearchLabel(ItemPointer itemPtr)
 
 void CheckMem(int limit, Relation index, usearch_index_t uidx, uint32 n_nodes, char *msg)
 {
-    usearch_error_t    error;
-    double             M = ldb_HnswGetM(index);
-    double             mL = 1 / log(M);
-    usearch_metadata_t meta = usearch_metadata(uidx, &error);
-    // todo:: update sizeof(float) to correct vector size once #19 is merged
-    uint32 node_size = UsearchNodeBytes(&meta, meta.dimensions * sizeof(float), (int)round(mL + 1));
-    Size   pg_mem = MemoryContextMemAllocated(CurrentMemoryContext, true);
+    uint32 node_size = 0;
+    if(index != NULL) {
+        usearch_error_t    error;
+        double             M = ldb_HnswGetM(index);
+        double             mL = 1 / log(M);
+        usearch_metadata_t meta = usearch_metadata(uidx, &error);
+        // todo:: update sizeof(float) to correct vector size once #19 is merged
+        node_size = UsearchNodeBytes(&meta, meta.dimensions * sizeof(float), (int)round(mL + 1));
+    }
+    Size pg_mem = MemoryContextMemAllocated(CurrentMemoryContext, true);
 
     // The average number of layers for an element to be added in is mL+1 per section 4.2.2
     // Accuracy could maybe be improved by not rounding
     // This is a guess, but it's a reasonably good one
     if(pg_mem + node_size * n_nodes > (uint32)limit * 1024UL) {
-        elog(WARNING, msg);
+        elog(WARNING, "%s", msg);
     }
 }

From 22c6d6b2e1a5ba3773d50c22ccacc74e7cc01e3c Mon Sep 17 00:00:00 2001
From: Ezra Varady <ezraavarady@gmail.com>
Date: Sat, 14 Oct 2023 12:14:56 -1000
Subject: [PATCH 07/10] fix merge error I missed

---
 src/hnsw/external_index.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c
index c7c1099df..85a9b729e 100644
--- a/src/hnsw/external_index.c
+++ b/src/hnsw/external_index.c
@@ -630,17 +630,13 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
             }
 
-<<<<<<< HEAD
-            fa_cache_insert(&ctx->fa_cache, id, nodepage->node);
-=======
             CheckMem(work_mem,
                      NULL,
                      NULL,
                      0,
                      "Pinned more tuples during node retrieval than will fir in work_mem, cosider increasing work_mem");
+            fa_cache_insert(&ctx->fa_cache, id, nodepage->node);
 
-            cache_set_item(&ctx->node_cache, &id, nodepage->node);
->>>>>>> 1175c02 (add test back in to external_index, fix elog in CheckMem)
 
             return nodepage->node;
 #endif

From 379546769ca9993ee50ba59147ccfccb978503cf Mon Sep 17 00:00:00 2001
From: Ezra Varady <ezraavarady@gmail.com>
Date: Sat, 14 Oct 2023 12:39:39 -1000
Subject: [PATCH 08/10] add guards for pg>12. There int an obvious way to do
 this before pg13

---
 src/hnsw/external_index.c | 8 ++++++--
 src/hnsw/utils.c          | 8 ++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c
index 85a9b729e..7049a507a 100644
--- a/src/hnsw/external_index.c
+++ b/src/hnsw/external_index.c
@@ -7,7 +7,6 @@
 #include <assert.h>
 #include <common/relpath.h>
 #include <hnsw/fa_cache.h>
-#include <miscadmin.h>
 #include <pg_config.h>       // BLCKSZ
 #include <storage/bufmgr.h>  // Buffer
 #include <utils/hsearch.h>
@@ -21,6 +20,10 @@
 #include "usearch.h"
 #include "utils.h"
 
+#if PG_VERSION_NUM >= 120000
+#include <miscadmin.h>
+#endif
+
 static BlockNumber getBlockMapPageBlockNumber(uint32 *blockmap_page_group_index, int id);
 
 uint32 UsearchNodeBytes(usearch_metadata_t *metadata, int vector_bytes, int level)
@@ -630,14 +633,15 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
             }
 
+#if PG_VERSION_NUM >= 120000
             CheckMem(work_mem,
                      NULL,
                      NULL,
                      0,
                      "Pinned more tuples during node retrieval than will fir in work_mem, cosider increasing work_mem");
+#endif
             fa_cache_insert(&ctx->fa_cache, id, nodepage->node);
 
-
             return nodepage->node;
 #endif
         }
diff --git a/src/hnsw/utils.c b/src/hnsw/utils.c
index ce316d23a..ccb22648e 100644
--- a/src/hnsw/utils.c
+++ b/src/hnsw/utils.c
@@ -7,7 +7,10 @@
 #include <miscadmin.h>
 #include <regex.h>
 #include <string.h>
+
+#if PG_VERSION_NUM >= 120000
 #include <utils/memutils.h>
+#endif
 
 #include "external_index.h"
 #include "hnsw.h"
@@ -64,7 +67,12 @@ void CheckMem(int limit, Relation index, usearch_index_t uidx, uint32 n_nodes, c
         // todo:: update sizeof(float) to correct vector size once #19 is merged
         node_size = UsearchNodeBytes(&meta, meta.dimensions * sizeof(float), (int)round(mL + 1));
     }
+    // todo:: there's figure out a way to check this in pg <= 12
+#if PG_VERSION_NUM >= 120000
     Size pg_mem = MemoryContextMemAllocated(CurrentMemoryContext, true);
+#else
+    Size pg_mem = 0;
+#endif
 
     // The average number of layers for an element to be added in is mL+1 per section 4.2.2
     // Accuracy could maybe be improved by not rounding

From eed89799308ba50c3188b45e65e139076bb4b341 Mon Sep 17 00:00:00 2001
From: Ezra Varady <ezraavarady@gmail.com>
Date: Sat, 14 Oct 2023 12:42:00 -1000
Subject: [PATCH 09/10] fix guards

---
 src/hnsw/external_index.c | 4 ++--
 src/hnsw/utils.c          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c
index 7049a507a..7d7ab7849 100644
--- a/src/hnsw/external_index.c
+++ b/src/hnsw/external_index.c
@@ -20,7 +20,7 @@
 #include "usearch.h"
 #include "utils.h"
 
-#if PG_VERSION_NUM >= 120000
+#if PG_VERSION_NUM > 120000
 #include <miscadmin.h>
 #endif
 
@@ -633,7 +633,7 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
             }
 
-#if PG_VERSION_NUM >= 120000
+#if PG_VERSION_NUM > 120000
             CheckMem(work_mem,
                      NULL,
                      NULL,
diff --git a/src/hnsw/utils.c b/src/hnsw/utils.c
index ccb22648e..be0ae49f7 100644
--- a/src/hnsw/utils.c
+++ b/src/hnsw/utils.c
@@ -8,7 +8,7 @@
 #include <regex.h>
 #include <string.h>
 
-#if PG_VERSION_NUM >= 120000
+#if PG_VERSION_NUM > 120000
 #include <utils/memutils.h>
 #endif
 
@@ -68,7 +68,7 @@ void CheckMem(int limit, Relation index, usearch_index_t uidx, uint32 n_nodes, c
         node_size = UsearchNodeBytes(&meta, meta.dimensions * sizeof(float), (int)round(mL + 1));
     }
     // todo:: there's figure out a way to check this in pg <= 12
-#if PG_VERSION_NUM >= 120000
+#if PG_VERSION_NUM > 120000
     Size pg_mem = MemoryContextMemAllocated(CurrentMemoryContext, true);
 #else
     Size pg_mem = 0;

From e5fe05fab6740f4aacff663e705b80a5242f3a76 Mon Sep 17 00:00:00 2001
From: Ezra Varady <ezraavarady@gmail.com>
Date: Sat, 14 Oct 2023 12:46:43 -1000
Subject: [PATCH 10/10] actually fix guards

---
 src/hnsw/external_index.c | 4 ++--
 src/hnsw/utils.c          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/hnsw/external_index.c b/src/hnsw/external_index.c
index 7d7ab7849..aee3aa9de 100644
--- a/src/hnsw/external_index.c
+++ b/src/hnsw/external_index.c
@@ -20,7 +20,7 @@
 #include "usearch.h"
 #include "utils.h"
 
-#if PG_VERSION_NUM > 120000
+#if PG_VERSION_NUM >= 130000
 #include <miscadmin.h>
 #endif
 
@@ -633,7 +633,7 @@ void *ldb_wal_index_node_retriever(void *ctxp, int id)
                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
             }
 
-#if PG_VERSION_NUM > 120000
+#if PG_VERSION_NUM >= 130000
             CheckMem(work_mem,
                      NULL,
                      NULL,
diff --git a/src/hnsw/utils.c b/src/hnsw/utils.c
index be0ae49f7..00be528d3 100644
--- a/src/hnsw/utils.c
+++ b/src/hnsw/utils.c
@@ -8,7 +8,7 @@
 #include <regex.h>
 #include <string.h>
 
-#if PG_VERSION_NUM > 120000
+#if PG_VERSION_NUM >= 130000
 #include <utils/memutils.h>
 #endif
 
@@ -68,7 +68,7 @@ void CheckMem(int limit, Relation index, usearch_index_t uidx, uint32 n_nodes, c
         node_size = UsearchNodeBytes(&meta, meta.dimensions * sizeof(float), (int)round(mL + 1));
     }
     // todo:: there's figure out a way to check this in pg <= 12
-#if PG_VERSION_NUM > 120000
+#if PG_VERSION_NUM >= 130000
     Size pg_mem = MemoryContextMemAllocated(CurrentMemoryContext, true);
 #else
     Size pg_mem = 0;