From 5e0836b5835063a91c286662ef0a16521d4c8567 Mon Sep 17 00:00:00 2001 From: usamoi Date: Mon, 20 Jan 2025 16:27:30 +0800 Subject: [PATCH] refactor: move algorithm to a crate Signed-off-by: usamoi --- .taplo.toml | 19 +- Cargo.lock | 47 +- Cargo.toml | 9 +- crates/algorithm/Cargo.toml | 25 + crates/algorithm/src/build.rs | 101 ++ crates/algorithm/src/bulkdelete.rs | 167 +++ .../algorithm/src}/freepages.rs | 16 +- .../algorithm/src}/insert.rs | 50 +- .../mod.rs => crates/algorithm/src/lib.rs | 36 +- crates/algorithm/src/maintain.rs | 147 +++ .../algorithm/src}/operator.rs | 35 +- {src/utils => crates/algorithm/src}/pipe.rs | 0 .../algorithm/src}/prewarm.rs | 27 +- .../scan.rs => crates/algorithm/src/search.rs | 36 +- .../algorithm/src}/tape.rs | 41 +- .../algorithm/src}/tuples.rs | 17 +- {src => crates/algorithm/src}/types.rs | 13 +- .../algorithm/src}/vectors.rs | 32 +- crates/k_means/Cargo.toml | 15 + .../k_means.rs => crates/k_means/src/lib.rs | 73 +- crates/rabitq/src/block.rs | 9 +- crates/simd/Cargo.toml | 1 - crates/simd/src/f16.rs | 2 - crates/simd/src/f32.rs | 8 +- crates/simd/src/lib.rs | 15 +- crates/vector/Cargo.toml | 1 - crates/vector/src/bvect.rs | 15 +- crates/vector/src/lib.rs | 2 +- crates/vector/src/scalar8.rs | 7 +- crates/vector/src/svect.rs | 15 +- crates/vector/src/vect.rs | 7 +- rustfmt.toml | 1 + src/algorithm/build.rs | 373 ------ src/algorithm/vacuum.rs | 311 ----- src/bin/pgrx_embed.rs | 1 + src/datatype/memory_halfvec.rs | 20 +- src/datatype/memory_scalar8.rs | 20 +- src/datatype/memory_vector.rs | 20 +- src/gucs/mod.rs | 14 - src/gucs/prewarm.rs | 32 - src/index/am.rs | 1103 ----------------- src/index/am/am_build.rs | 950 ++++++++++++++ src/index/am/am_scan.rs | 285 +++++ src/index/am/mod.rs | 324 +++++ src/index/am_options.rs | 235 ---- src/index/am_scan.rs | 186 --- src/index/functions.rs | 27 +- src/{gucs/executing.rs => index/gucs.rs} | 39 +- src/index/mod.rs | 14 +- src/index/opclass.rs | 146 +++ src/{ => index}/projection.rs | 24 + src/{postgres.rs => index/storage.rs} | 37 +- src/index/utils.rs | 34 - src/lib.rs | 16 +- src/{upgrade/symbols.rs => upgrade.rs} | 0 src/upgrade/mod.rs | 1 - src/utils/mod.rs | 3 - src/utils/parallelism.rs | 62 - 58 files changed, 2583 insertions(+), 2683 deletions(-) create mode 100644 crates/algorithm/Cargo.toml create mode 100644 crates/algorithm/src/build.rs create mode 100644 crates/algorithm/src/bulkdelete.rs rename {src/algorithm => crates/algorithm/src}/freepages.rs (80%) rename {src/algorithm => crates/algorithm/src}/insert.rs (83%) rename src/algorithm/mod.rs => crates/algorithm/src/lib.rs (70%) create mode 100644 crates/algorithm/src/maintain.rs rename {src/algorithm => crates/algorithm/src}/operator.rs (94%) rename {src/utils => crates/algorithm/src}/pipe.rs (100%) rename {src/algorithm => crates/algorithm/src}/prewarm.rs (82%) rename src/algorithm/scan.rs => crates/algorithm/src/search.rs (88%) rename {src/algorithm => crates/algorithm/src}/tape.rs (94%) rename {src/algorithm => crates/algorithm/src}/tuples.rs (98%) rename {src => crates/algorithm/src}/types.rs (95%) rename {src/algorithm => crates/algorithm/src}/vectors.rs (83%) create mode 100644 crates/k_means/Cargo.toml rename src/utils/k_means.rs => crates/k_means/src/lib.rs (81%) delete mode 100644 src/algorithm/build.rs delete mode 100644 src/algorithm/vacuum.rs delete mode 100644 src/gucs/mod.rs delete mode 100644 src/gucs/prewarm.rs delete mode 100644 src/index/am.rs create mode 100644 src/index/am/am_build.rs create mode 100644 src/index/am/am_scan.rs create mode 100644 src/index/am/mod.rs delete mode 100644 src/index/am_options.rs delete mode 100644 src/index/am_scan.rs rename src/{gucs/executing.rs => index/gucs.rs} (61%) rename src/{ => index}/projection.rs (51%) rename src/{postgres.rs => index/storage.rs} (91%) delete mode 100644 src/index/utils.rs rename src/{upgrade/symbols.rs => upgrade.rs} (100%) delete mode 100644 src/upgrade/mod.rs delete mode 100644 src/utils/mod.rs delete mode 100644 src/utils/parallelism.rs diff --git a/.taplo.toml b/.taplo.toml index d9a9fda..41af6e1 100644 --- a/.taplo.toml +++ b/.taplo.toml @@ -2,9 +2,16 @@ indent_string = " " [[rule]] -keys = ["dependencies", "*-denpendencies", "lints", "patch.*", "profile.*"] - -[rule.formatting] -reorder_keys = true -reorder_arrays = true -align_comments = true +include = ["**/Cargo.toml"] +keys = [ + "dependencies", + "dev-dependencies", + "build-dependencies", + "target.*.dependencies", + "lints", + "patch.*", + "profile.*", + "workspace.dependencies", + "lints.dependencies", +] +formatting = { reorder_keys = true, reorder_arrays = true, align_comments = true } diff --git a/Cargo.lock b/Cargo.lock index f5214d8..0150bd0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,27 @@ dependencies = [ "memchr", ] +[[package]] +name = "algorithm" +version = "0.0.0" +dependencies = [ + "always_equal", + "distance", + "half 2.4.1", + "k_means", + "paste", + "rabitq", + "rand", + "random_orthogonal_matrix", + "serde", + "simd", + "toml", + "validator", + "vector", + "zerocopy 0.8.14", + "zerocopy-derive 0.8.14", +] + [[package]] name = "always_equal" version = "0.0.0" @@ -587,6 +608,18 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +[[package]] +name = "k_means" +version = "0.0.0" +dependencies = [ + "half 2.4.1", + "log", + "rabitq", + "rand", + "rayon", + "simd", +] + [[package]] name = "libc" version = "0.2.169" @@ -1209,7 +1242,6 @@ dependencies = [ "cc", "half 2.4.1", "rand", - "serde", "simd_macros", ] @@ -1447,9 +1479,9 @@ dependencies = [ [[package]] name = "validator" -version = "0.19.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0b4a29d8709210980a09379f27ee31549b73292c87ab9899beee1c0d3be6303" +checksum = "43fb22e1a008ece370ce08a3e9e4447a910e92621bb49b85d6e48a45397e7cfa" dependencies = [ "idna", "once_cell", @@ -1463,9 +1495,9 @@ dependencies = [ [[package]] name = "validator_derive" -version = "0.19.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bac855a2ce6f843beb229757e6e570a42e837bcb15e5f449dd48d5747d41bf77" +checksum = "b7df16e474ef958526d1205f6dda359fdfab79d9aa6d54bafcb92dcd07673dca" dependencies = [ "darling", "once_cell", @@ -1479,17 +1511,17 @@ dependencies = [ name = "vchord" version = "0.0.0" dependencies = [ + "algorithm", "always_equal", "distance", "half 2.4.1", - "log", + "k_means", "paste", "pgrx", "pgrx-catalog", "rabitq", "rand", "random_orthogonal_matrix", - "rayon", "serde", "simd", "toml", @@ -1505,7 +1537,6 @@ version = "0.0.0" dependencies = [ "distance", "half 2.4.1", - "serde", "simd", ] diff --git a/Cargo.toml b/Cargo.toml index 3583ae2..0c03eb2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,23 +20,23 @@ pg16 = ["pgrx/pg16", "pgrx-catalog/pg16"] pg17 = ["pgrx/pg17", "pgrx-catalog/pg17"] [dependencies] +algorithm = { path = "./crates/algorithm" } always_equal = { path = "./crates/always_equal" } distance = { path = "./crates/distance" } +k_means = { path = "./crates/k_means" } rabitq = { path = "./crates/rabitq" } random_orthogonal_matrix = { path = "./crates/random_orthogonal_matrix" } simd = { path = "./crates/simd" } vector = { path = "./crates/vector" } half.workspace = true -log = "0.4.25" paste = "1" pgrx = { version = "=0.12.9", default-features = false, features = ["cshim"] } pgrx-catalog = "0.1.0" rand.workspace = true -rayon = "1.10.0" serde.workspace = true toml = "0.8.19" -validator = { version = "0.19.0", features = ["derive"] } +validator.workspace = true zerocopy = "0.8.14" zerocopy-derive = "0.8.14" @@ -56,14 +56,17 @@ edition = "2021" [workspace.dependencies] half = { version = "2.4.1", features = ["serde", "zerocopy"] } +log = "0.4.25" rand = "0.8.5" serde = "1" +validator = { version = "0.20.0", features = ["derive"] } [workspace.lints] clippy.identity_op = "allow" clippy.int_plus_one = "allow" clippy.needless_range_loop = "allow" clippy.nonminimal_bool = "allow" +rust.unsafe_code = "deny" rust.unsafe_op_in_unsafe_fn = "deny" rust.unused_lifetimes = "warn" rust.unused_qualifications = "warn" diff --git a/crates/algorithm/Cargo.toml b/crates/algorithm/Cargo.toml new file mode 100644 index 0000000..71a6a5c --- /dev/null +++ b/crates/algorithm/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "algorithm" +version.workspace = true +edition.workspace = true + +[dependencies] +always_equal = { path = "../always_equal" } +distance = { path = "../distance" } +k_means = { path = "../k_means" } +rabitq = { path = "../rabitq" } +random_orthogonal_matrix = { path = "../random_orthogonal_matrix" } +simd = { path = "../simd" } +vector = { path = "../vector" } + +half.workspace = true +paste = "1" +rand.workspace = true +serde.workspace = true +toml = "0.8.19" +validator.workspace = true +zerocopy = "0.8.14" +zerocopy-derive = "0.8.14" + +[lints] +workspace = true diff --git a/crates/algorithm/src/build.rs b/crates/algorithm/src/build.rs new file mode 100644 index 0000000..685aaf1 --- /dev/null +++ b/crates/algorithm/src/build.rs @@ -0,0 +1,101 @@ +use crate::RelationWrite; +use crate::operator::{Accessor2, Operator, Vector}; +use crate::tape::*; +use crate::tuples::*; +use crate::types::*; +use vector::VectorOwned; + +pub fn build( + vector_options: VectorOptions, + vchordrq_options: VchordrqIndexingOptions, + index: impl RelationWrite, + structures: Vec>, +) { + let dims = vector_options.dims; + let is_residual = vchordrq_options.residual_quantization && O::SUPPORTS_RESIDUAL; + let mut meta = TapeWriter::<_, _, MetaTuple>::create(|| index.extend(false)); + assert_eq!(meta.first(), 0); + let freepage = TapeWriter::<_, _, FreepageTuple>::create(|| index.extend(false)); + let mut vectors = TapeWriter::<_, _, VectorTuple>::create(|| index.extend(true)); + let mut pointer_of_means = Vec::>::new(); + for i in 0..structures.len() { + let mut level = Vec::new(); + for j in 0..structures[i].len() { + let vector = structures[i].means[j].as_borrowed(); + let (metadata, slices) = O::Vector::vector_split(vector); + let mut chain = Ok(metadata); + for i in (0..slices.len()).rev() { + chain = Err(vectors.push(match chain { + Ok(metadata) => VectorTuple::_0 { + payload: None, + elements: slices[i].to_vec(), + metadata, + }, + Err(pointer) => VectorTuple::_1 { + payload: None, + elements: slices[i].to_vec(), + pointer, + }, + })); + } + level.push(chain.err().unwrap()); + } + pointer_of_means.push(level); + } + let mut pointer_of_firsts = Vec::>::new(); + for i in 0..structures.len() { + let mut level = Vec::new(); + for j in 0..structures[i].len() { + if i == 0 { + let tape = TapeWriter::<_, _, H0Tuple>::create(|| index.extend(false)); + let mut jump = TapeWriter::<_, _, JumpTuple>::create(|| index.extend(false)); + jump.push(JumpTuple { + first: tape.first(), + }); + level.push(jump.first()); + } else { + let mut tape = H1TapeWriter::<_, _>::create(|| index.extend(false)); + let h2_mean = structures[i].means[j].as_borrowed(); + let h2_children = structures[i].children[j].as_slice(); + for child in h2_children.iter().copied() { + let h1_mean = structures[i - 1].means[child as usize].as_borrowed(); + let code = if is_residual { + let mut residual_accessor = O::ResidualAccessor::default(); + residual_accessor.push( + O::Vector::elements_and_metadata(h1_mean).0, + O::Vector::elements_and_metadata(h2_mean).0, + ); + let residual = residual_accessor.finish( + O::Vector::elements_and_metadata(h1_mean).1, + O::Vector::elements_and_metadata(h2_mean).1, + ); + O::Vector::code(residual.as_borrowed()) + } else { + O::Vector::code(h1_mean) + }; + tape.push(H1Branch { + mean: pointer_of_means[i - 1][child as usize], + dis_u_2: code.dis_u_2, + factor_ppc: code.factor_ppc, + factor_ip: code.factor_ip, + factor_err: code.factor_err, + signs: code.signs, + first: pointer_of_firsts[i - 1][child as usize], + }); + } + let tape = tape.into_inner(); + level.push(tape.first()); + } + } + pointer_of_firsts.push(level); + } + meta.push(MetaTuple { + dims, + height_of_root: structures.len() as u32, + is_residual, + vectors_first: vectors.first(), + root_mean: pointer_of_means.last().unwrap()[0], + root_first: pointer_of_firsts.last().unwrap()[0], + freepage_first: freepage.first(), + }); +} diff --git a/crates/algorithm/src/bulkdelete.rs b/crates/algorithm/src/bulkdelete.rs new file mode 100644 index 0000000..524ec90 --- /dev/null +++ b/crates/algorithm/src/bulkdelete.rs @@ -0,0 +1,167 @@ +use crate::operator::Operator; +use crate::pipe::Pipe; +use crate::tuples::*; +use crate::{Page, RelationWrite}; +use std::num::NonZeroU64; + +pub fn bulkdelete( + index: impl RelationWrite, + check: impl Fn(), + callback: impl Fn(NonZeroU64) -> bool, +) { + let meta_guard = index.read(0); + let meta_tuple = meta_guard.get(1).unwrap().pipe(read_tuple::); + let height_of_root = meta_tuple.height_of_root(); + let root_first = meta_tuple.root_first(); + let vectors_first = meta_tuple.vectors_first(); + drop(meta_guard); + { + type State = Vec; + let mut state: State = vec![root_first]; + let step = |state: State| { + let mut results = Vec::new(); + for first in state { + let mut current = first; + while current != u32::MAX { + let h1_guard = index.read(current); + for i in 1..=h1_guard.len() { + let h1_tuple = h1_guard + .get(i) + .expect("data corruption") + .pipe(read_tuple::); + match h1_tuple { + H1TupleReader::_0(h1_tuple) => { + for first in h1_tuple.first().iter().copied() { + results.push(first); + } + } + H1TupleReader::_1(_) => (), + } + } + current = h1_guard.get_opaque().next; + } + } + results + }; + for _ in (1..height_of_root).rev() { + state = step(state); + } + for first in state { + let jump_guard = index.read(first); + let jump_tuple = jump_guard + .get(1) + .expect("data corruption") + .pipe(read_tuple::); + let first = jump_tuple.first(); + let mut current = first; + while current != u32::MAX { + check(); + let read = index.read(current); + let flag = 'flag: { + for i in 1..=read.len() { + let h0_tuple = read + .get(i) + .expect("data corruption") + .pipe(read_tuple::); + match h0_tuple { + H0TupleReader::_0(h0_tuple) => { + let p = h0_tuple.payload(); + if let Some(payload) = p { + if callback(payload) { + break 'flag true; + } + } + } + H0TupleReader::_1(h0_tuple) => { + let p = h0_tuple.payload(); + for j in 0..32 { + if let Some(payload) = p[j] { + if callback(payload) { + break 'flag true; + } + } + } + } + H0TupleReader::_2(_) => (), + } + } + false + }; + if flag { + drop(read); + let mut write = index.write(current, false); + for i in 1..=write.len() { + let h0_tuple = write + .get_mut(i) + .expect("data corruption") + .pipe(write_tuple::); + match h0_tuple { + H0TupleWriter::_0(mut h0_tuple) => { + let p = h0_tuple.payload(); + if let Some(payload) = *p { + if callback(payload) { + *p = None; + } + } + } + H0TupleWriter::_1(mut h0_tuple) => { + let p = h0_tuple.payload(); + for j in 0..32 { + if let Some(payload) = p[j] { + if callback(payload) { + p[j] = None; + } + } + } + } + H0TupleWriter::_2(_) => (), + } + } + current = write.get_opaque().next; + } else { + current = read.get_opaque().next; + } + } + } + } + { + let first = vectors_first; + let mut current = first; + while current != u32::MAX { + check(); + let read = index.read(current); + let flag = 'flag: { + for i in 1..=read.len() { + if let Some(vector_bytes) = read.get(i) { + let vector_tuple = vector_bytes.pipe(read_tuple::>); + let p = vector_tuple.payload(); + if let Some(payload) = p { + if callback(payload) { + break 'flag true; + } + } + } + } + false + }; + if flag { + drop(read); + let mut write = index.write(current, true); + for i in 1..=write.len() { + if let Some(vector_bytes) = write.get(i) { + let vector_tuple = vector_bytes.pipe(read_tuple::>); + let p = vector_tuple.payload(); + if let Some(payload) = p { + if callback(payload) { + write.free(i); + } + } + }; + } + current = write.get_opaque().next; + } else { + current = read.get_opaque().next; + } + } + } +} diff --git a/src/algorithm/freepages.rs b/crates/algorithm/src/freepages.rs similarity index 80% rename from src/algorithm/freepages.rs rename to crates/algorithm/src/freepages.rs index 8984d3c..6dc505c 100644 --- a/src/algorithm/freepages.rs +++ b/crates/algorithm/src/freepages.rs @@ -1,9 +1,9 @@ -use crate::algorithm::tuples::*; -use crate::algorithm::*; -use crate::utils::pipe::Pipe; +use crate::pipe::Pipe; +use crate::tuples::*; +use crate::*; use std::cmp::Reverse; -pub fn mark(relation: impl RelationWrite, freepage_first: u32, pages: &[u32]) { +pub fn mark(index: impl RelationWrite, freepage_first: u32, pages: &[u32]) { let mut pages = pages.to_vec(); pages.sort_by_key(|x| Reverse(*x)); pages.dedup(); @@ -18,7 +18,7 @@ pub fn mark(relation: impl RelationWrite, freepage_first: u32, pages: &[u32]) { } local }; - let mut freespace_guard = relation.write(current, false); + let mut freespace_guard = index.write(current, false); if freespace_guard.len() == 0 { freespace_guard.alloc(&serialize(&FreepageTuple {})); } @@ -30,19 +30,19 @@ pub fn mark(relation: impl RelationWrite, freepage_first: u32, pages: &[u32]) { freespace_tuple.mark(local as _); } if freespace_guard.get_opaque().next == u32::MAX { - let extend = relation.extend(false); + let extend = index.extend(false); freespace_guard.get_opaque_mut().next = extend.id(); } (current, offset) = (freespace_guard.get_opaque().next, offset + 32768); } } -pub fn fetch(relation: impl RelationWrite, freepage_first: u32) -> Option { +pub fn fetch(index: impl RelationWrite, freepage_first: u32) -> Option { let first = freepage_first; assert!(first != u32::MAX); let (mut current, mut offset) = (first, 0_u32); loop { - let mut freespace_guard = relation.write(current, false); + let mut freespace_guard = index.write(current, false); if freespace_guard.len() == 0 { return None; } diff --git a/src/algorithm/insert.rs b/crates/algorithm/src/insert.rs similarity index 83% rename from src/algorithm/insert.rs rename to crates/algorithm/src/insert.rs index f2b8cfb..a00b510 100644 --- a/src/algorithm/insert.rs +++ b/crates/algorithm/src/insert.rs @@ -1,24 +1,18 @@ -use crate::algorithm::operator::*; -use crate::algorithm::tape::read_h1_tape; -use crate::algorithm::tuples::*; -use crate::algorithm::vectors::{self}; -use crate::algorithm::{Page, PageGuard, RelationWrite}; -use crate::utils::pipe::Pipe; +use crate::operator::*; +use crate::pipe::Pipe; +use crate::tape::read_h1_tape; +use crate::tuples::*; +use crate::vectors::{self}; +use crate::{Page, PageGuard, RelationWrite}; use always_equal::AlwaysEqual; use distance::Distance; use std::cmp::Reverse; use std::collections::BinaryHeap; use std::num::NonZeroU64; -use vector::VectorBorrowed; -use vector::VectorOwned; +use vector::{VectorBorrowed, VectorOwned}; -pub fn insert( - relation: impl RelationWrite + Clone, - payload: NonZeroU64, - vector: O::Vector, -) { - let vector = O::Vector::random_projection(vector.as_borrowed()); - let meta_guard = relation.read(0); +pub fn insert(index: impl RelationWrite, payload: NonZeroU64, vector: O::Vector) { + let meta_guard = index.read(0); let meta_tuple = meta_guard.get(1).unwrap().pipe(read_tuple::); let dims = meta_tuple.dims(); let is_residual = meta_tuple.is_residual(); @@ -35,19 +29,15 @@ pub fn insert( None }; - let mean = vectors::vector_append::( - relation.clone(), - vectors_first, - vector.as_borrowed(), - payload, - ); + let mean = + vectors::vector_append::(index.clone(), vectors_first, vector.as_borrowed(), payload); type State = (u32, Option<::Vector>); let mut state: State = { let mean = root_mean; if is_residual { let residual_u = vectors::vector_access_1::( - relation.clone(), + index.clone(), mean, LAccess::new( O::Vector::elements_and_metadata(vector.as_borrowed()), @@ -69,7 +59,7 @@ pub fn insert( default_lut_block.as_ref().unwrap() }; read_h1_tape( - relation.clone(), + |id| index.read(id), first, || { RAccess::new( @@ -89,7 +79,7 @@ pub fn insert( let (_, AlwaysEqual(mean), AlwaysEqual(first)) = heap.pop().unwrap(); if is_residual { let (dis_u, residual_u) = vectors::vector_access_1::( - relation.clone(), + index.clone(), mean, LAccess::new( O::Vector::elements_and_metadata(vector.as_borrowed()), @@ -106,7 +96,7 @@ pub fn insert( )); } else { let dis_u = vectors::vector_access_1::( - relation.clone(), + index.clone(), mean, LAccess::new( O::Vector::elements_and_metadata(vector.as_borrowed()), @@ -140,7 +130,7 @@ pub fn insert( elements: rabitq::pack_to_u64(&code.signs), }); - let jump_guard = relation.read(first); + let jump_guard = index.read(first); let jump_tuple = jump_guard .get(1) .expect("data corruption") @@ -151,21 +141,21 @@ pub fn insert( assert!(first != u32::MAX); let mut current = first; loop { - let read = relation.read(current); + let read = index.read(current); if read.get_opaque().next == u32::MAX { drop(read); - let mut write = relation.write(current, false); + let mut write = index.write(current, false); if write.get_opaque().next == u32::MAX { if write.alloc(&bytes).is_some() { return; } - let mut extend = relation.extend(false); + let mut extend = index.extend(false); write.get_opaque_mut().next = extend.id(); drop(write); let fresh = extend.id(); if extend.alloc(&bytes).is_some() { drop(extend); - let mut past = relation.write(first, false); + let mut past = index.write(first, false); past.get_opaque_mut().skip = std::cmp::max(past.get_opaque_mut().skip, fresh); drop(past); return; diff --git a/src/algorithm/mod.rs b/crates/algorithm/src/lib.rs similarity index 70% rename from src/algorithm/mod.rs rename to crates/algorithm/src/lib.rs index 7a9c3ff..dccc459 100644 --- a/src/algorithm/mod.rs +++ b/crates/algorithm/src/lib.rs @@ -1,13 +1,29 @@ -pub mod build; -pub mod freepages; -pub mod insert; +#![allow(clippy::collapsible_else_if)] +#![allow(clippy::type_complexity)] +#![allow(clippy::len_without_is_empty)] +#![feature(vec_pop_if)] + +mod build; +mod bulkdelete; +mod freepages; +mod insert; +mod maintain; +mod pipe; +mod prewarm; +mod search; +mod tape; +mod tuples; +mod vectors; + pub mod operator; -pub mod prewarm; -pub mod scan; -pub mod tape; -pub mod tuples; -pub mod vacuum; -pub mod vectors; +pub mod types; + +pub use build::build; +pub use bulkdelete::bulkdelete; +pub use insert::insert; +pub use maintain::maintain; +pub use prewarm::prewarm; +pub use search::search; use std::ops::{Deref, DerefMut}; @@ -34,7 +50,7 @@ pub trait PageGuard { fn id(&self) -> u32; } -pub trait RelationRead { +pub trait RelationRead: Clone { type Page: Page; type ReadGuard<'a>: PageGuard + Deref where diff --git a/crates/algorithm/src/maintain.rs b/crates/algorithm/src/maintain.rs new file mode 100644 index 0000000..fec057b --- /dev/null +++ b/crates/algorithm/src/maintain.rs @@ -0,0 +1,147 @@ +use crate::operator::Operator; +use crate::pipe::Pipe; +use crate::tape::*; +use crate::tuples::*; +use crate::{Page, RelationWrite, freepages}; +use simd::fast_scan::unpack; + +pub fn maintain(index: impl RelationWrite, check: impl Fn()) { + let meta_guard = index.read(0); + let meta_tuple = meta_guard.get(1).unwrap().pipe(read_tuple::); + let dims = meta_tuple.dims(); + let height_of_root = meta_tuple.height_of_root(); + let root_first = meta_tuple.root_first(); + let freepage_first = meta_tuple.freepage_first(); + drop(meta_guard); + + let firsts = { + type State = Vec; + let mut state: State = vec![root_first]; + let step = |state: State| { + let mut results = Vec::new(); + for first in state { + let mut current = first; + while current != u32::MAX { + check(); + let h1_guard = index.read(current); + for i in 1..=h1_guard.len() { + let h1_tuple = h1_guard + .get(i) + .expect("data corruption") + .pipe(read_tuple::); + match h1_tuple { + H1TupleReader::_0(h1_tuple) => { + for first in h1_tuple.first().iter().copied() { + results.push(first); + } + } + H1TupleReader::_1(_) => (), + } + } + current = h1_guard.get_opaque().next; + } + } + results + }; + for _ in (1..height_of_root).rev() { + state = step(state); + } + state + }; + + for first in firsts { + let mut jump_guard = index.write(first, false); + let mut jump_tuple = jump_guard + .get_mut(1) + .expect("data corruption") + .pipe(write_tuple::); + + let mut tape = H0TapeWriter::<_, _>::create(|| { + if let Some(id) = freepages::fetch(index.clone(), freepage_first) { + let mut write = index.write(id, false); + write.clear(); + write + } else { + index.extend(false) + } + }); + + let mut trace = Vec::new(); + + let first = *jump_tuple.first(); + let mut current = first; + let mut computing = None; + while current != u32::MAX { + check(); + trace.push(current); + let h0_guard = index.read(current); + for i in 1..=h0_guard.len() { + let h0_tuple = h0_guard + .get(i) + .expect("data corruption") + .pipe(read_tuple::); + match h0_tuple { + H0TupleReader::_0(h0_tuple) => { + if let Some(payload) = h0_tuple.payload() { + tape.push(H0Branch { + mean: h0_tuple.mean(), + dis_u_2: h0_tuple.code().0, + factor_ppc: h0_tuple.code().1, + factor_ip: h0_tuple.code().2, + factor_err: h0_tuple.code().3, + signs: h0_tuple + .code() + .4 + .iter() + .flat_map(|x| { + std::array::from_fn::<_, 64, _>(|i| *x & (1 << i) != 0) + }) + .take(dims as _) + .collect::>(), + payload, + }); + } + } + H0TupleReader::_1(h0_tuple) => { + let computing = &mut computing.take().unwrap_or_else(Vec::new); + computing.extend_from_slice(h0_tuple.elements()); + let unpacked = unpack(computing); + for j in 0..32 { + if let Some(payload) = h0_tuple.payload()[j] { + tape.push(H0Branch { + mean: h0_tuple.mean()[j], + dis_u_2: h0_tuple.metadata().0[j], + factor_ppc: h0_tuple.metadata().1[j], + factor_ip: h0_tuple.metadata().2[j], + factor_err: h0_tuple.metadata().3[j], + signs: unpacked[j] + .iter() + .flat_map(|&x| { + [x & 1 != 0, x & 2 != 0, x & 4 != 0, x & 8 != 0] + }) + .collect(), + payload, + }); + } + } + } + H0TupleReader::_2(h0_tuple) => { + let computing = computing.get_or_insert_with(Vec::new); + computing.extend_from_slice(h0_tuple.elements()); + } + } + } + current = h0_guard.get_opaque().next; + drop(h0_guard); + } + + let tape = tape.into_inner(); + let new = tape.first(); + drop(tape); + + *jump_tuple.first() = new; + drop(jump_guard); + + freepages::mark(index.clone(), freepage_first, &trace); + } +} diff --git a/src/algorithm/operator.rs b/crates/algorithm/src/operator.rs similarity index 94% rename from src/algorithm/operator.rs rename to crates/algorithm/src/operator.rs index 9506d7a..4ff72f6 100644 --- a/src/algorithm/operator.rs +++ b/crates/algorithm/src/operator.rs @@ -1,4 +1,4 @@ -use crate::types::{DistanceKind, OwnedVector}; +use crate::types::*; use distance::Distance; use half::f16; use simd::Floating; @@ -324,7 +324,6 @@ pub struct RAccess<'a, E, M, A> { } impl<'a, E, M, A> RAccess<'a, E, M, A> { - #[allow(dead_code)] pub fn new((elements, metadata): (&'a [E], M), accessor: A) -> Self { Self { elements, @@ -356,8 +355,6 @@ pub trait Vector: VectorOwned { fn elements_and_metadata(vector: Self::Borrowed<'_>) -> (&[Self::Element], Self::Metadata); fn from_owned(vector: OwnedVector) -> Self; - fn random_projection(vector: Self::Borrowed<'_>) -> Self; - fn compute_lut_block(vector: Self::Borrowed<'_>) -> (f32, f32, f32, f32, Vec<[u64; 2]>); fn compute_lut( @@ -368,10 +365,6 @@ pub trait Vector: VectorOwned { ); fn code(vector: Self::Borrowed<'_>) -> rabitq::Code; - - fn build_to_vecf32(vector: Self::Borrowed<'_>) -> Vec; - - fn build_from_vecf32(x: &[f32]) -> Self; } impl Vector for VectOwned { @@ -399,10 +392,6 @@ impl Vector for VectOwned { } } - fn random_projection(vector: Self::Borrowed<'_>) -> Self { - Self::new(crate::projection::project(vector.slice())) - } - fn compute_lut_block(vector: Self::Borrowed<'_>) -> (f32, f32, f32, f32, Vec<[u64; 2]>) { rabitq::block::preprocess(vector.slice()) } @@ -419,14 +408,6 @@ impl Vector for VectOwned { fn code(vector: Self::Borrowed<'_>) -> rabitq::Code { rabitq::code(vector.dims(), vector.slice()) } - - fn build_to_vecf32(vector: Self::Borrowed<'_>) -> Vec { - vector.slice().to_vec() - } - - fn build_from_vecf32(x: &[f32]) -> Self { - Self::new(x.to_vec()) - } } impl Vector for VectOwned { @@ -454,12 +435,6 @@ impl Vector for VectOwned { } } - fn random_projection(vector: Self::Borrowed<'_>) -> Self { - Self::new(f16::vector_from_f32(&crate::projection::project( - &f16::vector_to_f32(vector.slice()), - ))) - } - fn compute_lut_block(vector: Self::Borrowed<'_>) -> (f32, f32, f32, f32, Vec<[u64; 2]>) { rabitq::block::preprocess(&f16::vector_to_f32(vector.slice())) } @@ -476,14 +451,6 @@ impl Vector for VectOwned { fn code(vector: Self::Borrowed<'_>) -> rabitq::Code { rabitq::code(vector.dims(), &f16::vector_to_f32(vector.slice())) } - - fn build_to_vecf32(vector: Self::Borrowed<'_>) -> Vec { - f16::vector_to_f32(vector.slice()) - } - - fn build_from_vecf32(x: &[f32]) -> Self { - Self::new(f16::vector_from_f32(x)) - } } pub trait OperatorDistance: 'static + Debug + Copy { diff --git a/src/utils/pipe.rs b/crates/algorithm/src/pipe.rs similarity index 100% rename from src/utils/pipe.rs rename to crates/algorithm/src/pipe.rs diff --git a/src/algorithm/prewarm.rs b/crates/algorithm/src/prewarm.rs similarity index 82% rename from src/algorithm/prewarm.rs rename to crates/algorithm/src/prewarm.rs index 9373f4a..237e8fa 100644 --- a/src/algorithm/prewarm.rs +++ b/crates/algorithm/src/prewarm.rs @@ -1,12 +1,11 @@ -use crate::algorithm::operator::Operator; -use crate::algorithm::tuples::*; -use crate::algorithm::vectors; -use crate::algorithm::{Page, RelationRead}; -use crate::utils::pipe::Pipe; +use crate::operator::Operator; +use crate::pipe::Pipe; +use crate::tuples::*; +use crate::{Page, RelationRead, vectors}; use std::fmt::Write; -pub fn prewarm(relation: impl RelationRead + Clone, height: i32) -> String { - let meta_guard = relation.read(0); +pub fn prewarm(index: impl RelationRead, height: i32, check: impl Fn()) -> String { + let meta_guard = index.read(0); let meta_tuple = meta_guard.get(1).unwrap().pipe(read_tuple::); let height_of_root = meta_tuple.height_of_root(); let root_mean = meta_tuple.root_mean(); @@ -24,7 +23,7 @@ pub fn prewarm(relation: impl RelationRead + Clone, height: i32) -> let mut results = Vec::new(); let counter = 1_usize; { - vectors::vector_access_1::(relation.clone(), root_mean, ()); + vectors::vector_access_1::(index.clone(), root_mean, ()); results.push(root_first); } writeln!(message, "number of tuples: {}", results.len()).unwrap(); @@ -38,8 +37,8 @@ pub fn prewarm(relation: impl RelationRead + Clone, height: i32) -> let mut current = list; while current != u32::MAX { counter += 1; - pgrx::check_for_interrupts!(); - let h1_guard = relation.read(current); + check(); + let h1_guard = index.read(current); for i in 1..=h1_guard.len() { let h1_tuple = h1_guard .get(i) @@ -48,7 +47,7 @@ pub fn prewarm(relation: impl RelationRead + Clone, height: i32) -> match h1_tuple { H1TupleReader::_0(h1_tuple) => { for mean in h1_tuple.mean().iter().copied() { - vectors::vector_access_1::(relation.clone(), mean, ()); + vectors::vector_access_1::(index.clone(), mean, ()); } for first in h1_tuple.first().iter().copied() { results.push(first); @@ -71,7 +70,7 @@ pub fn prewarm(relation: impl RelationRead + Clone, height: i32) -> let mut counter = 0_usize; let mut results = Vec::new(); for list in state { - let jump_guard = relation.read(list); + let jump_guard = index.read(list); let jump_tuple = jump_guard .get(1) .expect("data corruption") @@ -80,8 +79,8 @@ pub fn prewarm(relation: impl RelationRead + Clone, height: i32) -> let mut current = first; while current != u32::MAX { counter += 1; - pgrx::check_for_interrupts!(); - let h0_guard = relation.read(current); + check(); + let h0_guard = index.read(current); for i in 1..=h0_guard.len() { let _h0_tuple = h0_guard .get(i) diff --git a/src/algorithm/scan.rs b/crates/algorithm/src/search.rs similarity index 88% rename from src/algorithm/scan.rs rename to crates/algorithm/src/search.rs index fee6df3..150ae4a 100644 --- a/src/algorithm/scan.rs +++ b/crates/algorithm/src/search.rs @@ -1,26 +1,22 @@ -use crate::algorithm::operator::*; -use crate::algorithm::tape::read_h0_tape; -use crate::algorithm::tape::read_h1_tape; -use crate::algorithm::tuples::*; -use crate::algorithm::vectors; -use crate::algorithm::{Page, RelationRead}; -use crate::utils::pipe::Pipe; +use crate::operator::*; +use crate::pipe::Pipe; +use crate::tape::{read_h0_tape, read_h1_tape}; +use crate::tuples::*; +use crate::{Page, RelationRead, vectors}; use always_equal::AlwaysEqual; use distance::Distance; use std::cmp::Reverse; use std::collections::BinaryHeap; use std::num::NonZeroU64; -use vector::VectorBorrowed; -use vector::VectorOwned; +use vector::{VectorBorrowed, VectorOwned}; -pub fn scan( - relation: impl RelationRead + Clone, +pub fn search( + index: impl RelationRead, vector: O::Vector, probes: Vec, epsilon: f32, ) -> impl Iterator { - let vector = O::Vector::random_projection(vector.as_borrowed()); - let meta_guard = relation.read(0); + let meta_guard = index.read(0); let meta_tuple = meta_guard.get(1).unwrap().pipe(read_tuple::); let dims = meta_tuple.dims(); let is_residual = meta_tuple.is_residual(); @@ -42,7 +38,7 @@ pub fn scan( let mean = root_mean; if is_residual { let residual_u = vectors::vector_access_1::( - relation.clone(), + index.clone(), mean, LAccess::new( O::Vector::elements_and_metadata(vector.as_borrowed()), @@ -63,7 +59,7 @@ pub fn scan( default_lut.as_ref().map(|x| &x.0).unwrap() }; read_h1_tape( - relation.clone(), + |id| index.read(id), first, || { RAccess::new( @@ -83,7 +79,7 @@ pub fn scan( let (_, AlwaysEqual(mean), AlwaysEqual(first)) = heap.pop().unwrap(); if is_residual { let (dis_u, residual_u) = vectors::vector_access_1::( - relation.clone(), + index.clone(), mean, LAccess::new( O::Vector::elements_and_metadata(vector.as_borrowed()), @@ -100,7 +96,7 @@ pub fn scan( )); } else { let dis_u = vectors::vector_access_1::( - relation.clone(), + index.clone(), mean, LAccess::new( O::Vector::elements_and_metadata(vector.as_borrowed()), @@ -127,14 +123,14 @@ pub fn scan( } else { default_lut.as_ref().unwrap() }; - let jump_guard = relation.read(first); + let jump_guard = index.read(first); let jump_tuple = jump_guard .get(1) .expect("data corruption") .pipe(read_tuple::); let first = jump_tuple.first(); read_h0_tape( - relation.clone(), + |id| index.read(id), first, || { RAccess::new( @@ -154,7 +150,7 @@ pub fn scan( while !heap.is_empty() && heap.peek().map(|x| x.0) > cache.peek().map(|x| x.0) { let (_, AlwaysEqual(mean), AlwaysEqual(pay_u)) = heap.pop().unwrap(); if let Some(dis_u) = vectors::vector_access_0::( - relation.clone(), + index.clone(), mean, pay_u, LAccess::new( diff --git a/src/algorithm/tape.rs b/crates/algorithm/src/tape.rs similarity index 94% rename from src/algorithm/tape.rs rename to crates/algorithm/src/tape.rs index 4ee722a..991baa1 100644 --- a/src/algorithm/tape.rs +++ b/crates/algorithm/src/tape.rs @@ -1,15 +1,12 @@ -use super::RelationRead; -use super::operator::Accessor1; -use crate::algorithm::Page; -use crate::algorithm::PageGuard; -use crate::algorithm::tuples::*; -use crate::utils::pipe::Pipe; +use crate::operator::Accessor1; +use crate::pipe::Pipe; +use crate::tuples::*; +use crate::{Page, PageGuard}; use distance::Distance; -use simd::fast_scan::any_pack; -use simd::fast_scan::padding_pack; +use simd::fast_scan::{any_pack, padding_pack}; use std::marker::PhantomData; use std::num::NonZeroU64; -use std::ops::DerefMut; +use std::ops::{Deref, DerefMut}; pub struct TapeWriter { head: G, @@ -178,7 +175,7 @@ where } } -pub struct H0BranchWriter { +pub struct H0Branch { pub mean: IndexPointer, pub dis_u_2: f32, pub factor_ppc: f32, @@ -188,12 +185,12 @@ pub struct H0BranchWriter { pub payload: NonZeroU64, } -pub struct H0Tape { +pub struct H0TapeWriter { tape: TapeWriter, - branches: Vec, + branches: Vec, } -impl H0Tape +impl H0TapeWriter where G: PageGuard + DerefMut, G::Target: Page, @@ -205,7 +202,7 @@ where branches: Vec::new(), } } - pub fn push(&mut self, branch: H0BranchWriter) { + pub fn push(&mut self, branch: H0Branch) { self.branches.push(branch); if self.branches.len() == 32 { let chunk = std::array::from_fn::<_, 32, _>(|_| self.branches.pop().unwrap()); @@ -253,12 +250,14 @@ where } } -pub fn read_h1_tape( - relation: impl RelationRead, +pub fn read_h1_tape( + read: impl Fn(u32) -> G, first: u32, compute_block: impl Fn() -> A + Copy, mut callback: impl FnMut(Distance, IndexPointer, u32), ) where + G: PageGuard + Deref, + G::Target: Page, A: for<'a> Accessor1< [u64; 2], (&'a [f32; 32], &'a [f32; 32], &'a [f32; 32], &'a [f32; 32]), @@ -269,7 +268,7 @@ pub fn read_h1_tape( let mut current = first; let mut computing = None; while current != u32::MAX { - let h1_guard = relation.read(current); + let h1_guard = read(current); for i in 1..=h1_guard.len() { let h1_tuple = h1_guard .get(i) @@ -298,13 +297,15 @@ pub fn read_h1_tape( } } -pub fn read_h0_tape( - relation: impl RelationRead, +pub fn read_h0_tape( + read: impl Fn(u32) -> G, first: u32, compute_block: impl Fn() -> A + Copy, compute_binary: impl Fn((f32, f32, f32, f32, &[u64])) -> Distance, mut callback: impl FnMut(Distance, IndexPointer, NonZeroU64), ) where + G: PageGuard + Deref, + G::Target: Page, A: for<'a> Accessor1< [u64; 2], (&'a [f32; 32], &'a [f32; 32], &'a [f32; 32], &'a [f32; 32]), @@ -315,7 +316,7 @@ pub fn read_h0_tape( let mut current = first; let mut computing = None; while current != u32::MAX { - let h0_guard = relation.read(current); + let h0_guard = read(current); for i in 1..=h0_guard.len() { let h0_tuple = h0_guard .get(i) diff --git a/src/algorithm/tuples.rs b/crates/algorithm/src/tuples.rs similarity index 98% rename from src/algorithm/tuples.rs rename to crates/algorithm/src/tuples.rs index 6ecef02..b8787fe 100644 --- a/src/algorithm/tuples.rs +++ b/crates/algorithm/src/tuples.rs @@ -1,4 +1,4 @@ -use crate::algorithm::operator::Vector; +use crate::operator::Vector; use std::num::{NonZeroU8, NonZeroU64}; use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; use zerocopy_derive::{FromBytes, Immutable, IntoBytes, KnownLayout}; @@ -124,7 +124,7 @@ impl MetaTupleReader<'_> { // freepage tuple #[repr(C, align(8))] -#[derive(Debug, Clone, Copy, PartialEq, FromBytes, IntoBytes, Immutable, KnownLayout)] +#[derive(Debug, Clone, PartialEq, FromBytes, IntoBytes, Immutable, KnownLayout)] struct FreepageTupleHeader { a: [u32; 1], b: [u32; 32], @@ -216,7 +216,7 @@ impl FreepageTupleWriter<'_> { // vector tuple #[repr(C, align(8))] -#[derive(Debug, Clone, Copy, PartialEq, FromBytes, IntoBytes, Immutable, KnownLayout)] +#[derive(Debug, Clone, PartialEq, FromBytes, IntoBytes, Immutable, KnownLayout)] struct VectorTupleHeader0 { payload: Option, metadata_s: usize, @@ -225,7 +225,7 @@ struct VectorTupleHeader0 { } #[repr(C, align(8))] -#[derive(Debug, Clone, Copy, PartialEq, FromBytes, IntoBytes, Immutable, KnownLayout)] +#[derive(Debug, Clone, PartialEq, FromBytes, IntoBytes, Immutable, KnownLayout)] struct VectorTupleHeader1 { payload: Option, pointer: IndexPointer, @@ -1008,16 +1008,15 @@ pub const fn pair_to_pointer(pair: (u32, u16)) -> IndexPointer { IndexPointer(value) } -#[allow(dead_code)] -const fn soundness_check(a: (u32, u16)) { +#[test] +const fn soundness_check() { + let a = (111, 222); let b = pair_to_pointer(a); let c = pointer_to_pair(b); assert!(a.0 == c.0); assert!(a.1 == c.1); } -const _: () = soundness_check((111, 222)); - #[repr(transparent)] #[derive( Debug, @@ -1133,6 +1132,7 @@ impl<'a> MutChecker<'a> { self.flag[i / 64] |= 1 << (i % 64); } } + #[allow(unsafe_code)] let bytes = unsafe { std::slice::from_raw_parts_mut(self.bytes.as_mut_ptr().add(start), end - start) }; @@ -1155,6 +1155,7 @@ impl<'a> MutChecker<'a> { self.flag[i / 64] |= 1 << (i % 64); } } + #[allow(unsafe_code)] let bytes = unsafe { std::slice::from_raw_parts_mut(self.bytes.as_mut_ptr().add(start), end - start) }; diff --git a/src/types.rs b/crates/algorithm/src/types.rs similarity index 95% rename from src/types.rs rename to crates/algorithm/src/types.rs index 4ef2171..a8f3b8e 100644 --- a/src/types.rs +++ b/crates/algorithm/src/types.rs @@ -98,7 +98,7 @@ impl VchordrqIndexingOptions { } } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone)] pub enum OwnedVector { Vecf32(VectOwned), Vecf16(VectOwned), @@ -148,3 +148,14 @@ impl VectorOptions { } } } + +pub struct Structure { + pub means: Vec, + pub children: Vec>, +} + +impl Structure { + pub fn len(&self) -> usize { + self.children.len() + } +} diff --git a/src/algorithm/vectors.rs b/crates/algorithm/src/vectors.rs similarity index 83% rename from src/algorithm/vectors.rs rename to crates/algorithm/src/vectors.rs index d71499b..4a19306 100644 --- a/src/algorithm/vectors.rs +++ b/crates/algorithm/src/vectors.rs @@ -1,7 +1,7 @@ -use crate::algorithm::operator::*; -use crate::algorithm::tuples::*; -use crate::algorithm::{Page, PageGuard, RelationRead, RelationWrite}; -use crate::utils::pipe::Pipe; +use crate::operator::*; +use crate::pipe::Pipe; +use crate::tuples::*; +use crate::{Page, PageGuard, RelationRead, RelationWrite}; use std::num::NonZeroU64; use vector::VectorOwned; @@ -9,14 +9,14 @@ pub fn vector_access_1< O: Operator, A: Accessor1<::Element, ::Metadata>, >( - relation: impl RelationRead, + index: impl RelationRead, mean: IndexPointer, accessor: A, ) -> A::Output { let mut cursor = Err(mean); let mut result = accessor; while let Err(mean) = cursor.map_err(pointer_to_pair) { - let vector_guard = relation.read(mean.0); + let vector_guard = index.read(mean.0); let vector_tuple = vector_guard .get(mean.1) .expect("data corruption") @@ -34,7 +34,7 @@ pub fn vector_access_0< O: Operator, A: Accessor1<::Element, ::Metadata>, >( - relation: impl RelationRead, + index: impl RelationRead, mean: IndexPointer, payload: NonZeroU64, accessor: A, @@ -42,7 +42,7 @@ pub fn vector_access_0< let mut cursor = Err(mean); let mut result = accessor; while let Err(mean) = cursor.map_err(pointer_to_pair) { - let vector_guard = relation.read(mean.0); + let vector_guard = index.read(mean.0); let vector_tuple = vector_guard .get(mean.1)? .pipe(read_tuple::>); @@ -59,34 +59,34 @@ pub fn vector_access_0< } pub fn vector_append( - relation: impl RelationWrite + Clone, + index: impl RelationWrite, vectors_first: u32, vector: ::Borrowed<'_>, payload: NonZeroU64, ) -> IndexPointer { - fn append(relation: impl RelationWrite, first: u32, bytes: &[u8]) -> IndexPointer { - if let Some(mut write) = relation.search(bytes.len()) { + fn append(index: impl RelationWrite, first: u32, bytes: &[u8]) -> IndexPointer { + if let Some(mut write) = index.search(bytes.len()) { let i = write.alloc(bytes).unwrap(); return pair_to_pointer((write.id(), i)); } assert!(first != u32::MAX); let mut current = first; loop { - let read = relation.read(current); + let read = index.read(current); if read.freespace() as usize >= bytes.len() || read.get_opaque().next == u32::MAX { drop(read); - let mut write = relation.write(current, true); + let mut write = index.write(current, true); if let Some(i) = write.alloc(bytes) { return pair_to_pointer((current, i)); } if write.get_opaque().next == u32::MAX { - let mut extend = relation.extend(true); + let mut extend = index.extend(true); write.get_opaque_mut().next = extend.id(); drop(write); if let Some(i) = extend.alloc(bytes) { let result = (extend.id(), i); drop(extend); - let mut past = relation.write(first, true); + let mut past = index.write(first, true); let skip = &mut past.get_opaque_mut().skip; assert!(*skip != u32::MAX); *skip = std::cmp::max(*skip, result.0); @@ -113,7 +113,7 @@ pub fn vector_append( let mut chain = Ok(metadata); for i in (0..slices.len()).rev() { chain = Err(append( - relation.clone(), + index.clone(), vectors_first, &serialize::>(&match chain { Ok(metadata) => VectorTuple::_0 { diff --git a/crates/k_means/Cargo.toml b/crates/k_means/Cargo.toml new file mode 100644 index 0000000..93b82ed --- /dev/null +++ b/crates/k_means/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "k_means" +version.workspace = true +edition.workspace = true + +[dependencies] +half.workspace = true +log.workspace = true +rabitq = { path = "../rabitq" } +rand.workspace = true +rayon = "1.10.0" +simd = { path = "../simd" } + +[lints] +workspace = true diff --git a/src/utils/k_means.rs b/crates/k_means/src/lib.rs similarity index 81% rename from src/utils/k_means.rs rename to crates/k_means/src/lib.rs index b1808c9..3b696c9 100644 --- a/src/utils/k_means.rs +++ b/crates/k_means/src/lib.rs @@ -1,9 +1,72 @@ -use super::parallelism::{ParallelIterator, Parallelism}; +#![allow(clippy::type_complexity)] + use half::f16; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; use simd::Floating; use simd::fast_scan::{any_pack, padding_pack}; +use std::any::Any; +use std::panic::AssertUnwindSafe; +use std::sync::Arc; + +pub use rayon::iter::ParallelIterator; + +pub trait Parallelism: Send + Sync { + fn check(&self); + + fn rayon_into_par_iter(&self, x: I) -> I::Iter; +} + +struct ParallelismCheckPanic(Box); + +pub struct RayonParallelism { + stop: Arc, +} + +impl RayonParallelism { + pub fn scoped( + num_threads: usize, + stop: Arc, + f: impl FnOnce(&Self) -> R, + ) -> Result { + match std::panic::catch_unwind(AssertUnwindSafe(|| { + rayon::ThreadPoolBuilder::new() + .num_threads(num_threads) + .panic_handler(|e| { + if e.downcast_ref::().is_some() { + return; + } + log::error!("Asynchronous task panickied."); + }) + .build_scoped( + |thread| thread.run(), + |_| { + let pool = Self { stop: stop.clone() }; + f(&pool) + }, + ) + })) { + Ok(x) => x, + Err(e) => match e.downcast::() { + Ok(payload) => std::panic::resume_unwind((*payload).0), + Err(e) => std::panic::resume_unwind(e), + }, + } + } +} + +impl Parallelism for RayonParallelism { + fn check(&self) { + match std::panic::catch_unwind(AssertUnwindSafe(|| (self.stop)())) { + Ok(()) => (), + Err(payload) => std::panic::panic_any(ParallelismCheckPanic(payload)), + } + } + + fn rayon_into_par_iter(&self, x: I) -> I::Iter { + x.into_par_iter() + } +} pub fn k_means( parallelism: &P, @@ -134,16 +197,14 @@ fn rabitq_index( parallelism .rayon_into_par_iter(0..n) .map(|i| { - use distance::Distance; let lut = rabitq::block::preprocess(&samples[i]); - let mut result = (Distance::INFINITY, 0); + let mut result = (f32::INFINITY, 0); for block in 0..c.div_ceil(32) { let lowerbound = rabitq::block::process_lowerbound_l2(&lut, blocks[block].code(), 1.9); for j in block * 32..std::cmp::min(block * 32 + 32, c) { - if lowerbound[j - block * 32] < result.0 { - let dis = - Distance::from_f32(f32::reduce_sum_of_d2(&samples[i], ¢roids[j])); + if lowerbound[j - block * 32].to_f32() < result.0 { + let dis = f32::reduce_sum_of_d2(&samples[i], ¢roids[j]); if dis <= result.0 { result = (dis, j); } diff --git a/crates/rabitq/src/block.rs b/crates/rabitq/src/block.rs index 9f26fce..61ee01a 100644 --- a/crates/rabitq/src/block.rs +++ b/crates/rabitq/src/block.rs @@ -57,10 +57,11 @@ pub fn process_lowerbound_dot( } pub fn compress(mut vector: Vec) -> Vec<[u64; 2]> { - let width = vector.len().div_ceil(4); - vector.resize(width * 4, 0); - let mut result = vec![[0u64, 0u64]; width]; - for i in 0..width { + let n = vector.len().div_ceil(4); + vector.resize(n * 4, 0); + let mut result = vec![[0u64, 0u64]; n]; + for i in 0..n { + #[allow(unsafe_code)] unsafe { // this hint is used to skip bound checks std::hint::assert_unchecked(4 * i + 3 < vector.len()); diff --git a/crates/simd/Cargo.toml b/crates/simd/Cargo.toml index 0905fb1..31e271e 100644 --- a/crates/simd/Cargo.toml +++ b/crates/simd/Cargo.toml @@ -5,7 +5,6 @@ edition.workspace = true [dependencies] half.workspace = true -serde.workspace = true simd_macros = { path = "../simd_macros" } [dev-dependencies] diff --git a/crates/simd/src/f16.rs b/crates/simd/src/f16.rs index ce90618..3d7fbd5 100644 --- a/crates/simd/src/f16.rs +++ b/crates/simd/src/f16.rs @@ -129,8 +129,6 @@ impl Floating for f16 { } mod reduce_or_of_is_zero_x { - // FIXME: add manually-implemented SIMD version - use half::f16; #[crate::multiversion("v4", "v3", "v2", "v8.3a:sve", "v8.3a")] diff --git a/crates/simd/src/f32.rs b/crates/simd/src/f32.rs index 555da5c..d0dc2cd 100644 --- a/crates/simd/src/f32.rs +++ b/crates/simd/src/f32.rs @@ -119,8 +119,6 @@ impl Floating for f32 { } mod reduce_or_of_is_zero_x { - // FIXME: add manually-implemented SIMD version - #[crate::multiversion("v4", "v3", "v2", "v8.3a:sve", "v8.3a")] pub fn reduce_or_of_is_zero_x(this: &[f32]) -> bool { for &x in this { @@ -1024,8 +1022,7 @@ mod reduce_min_max_of_x { #[cfg(target_arch = "x86_64")] #[crate::target_cpu(enable = "v3")] fn reduce_min_max_of_x_v3(this: &[f32]) -> (f32, f32) { - use crate::emulate::emulate_mm256_reduce_max_ps; - use crate::emulate::emulate_mm256_reduce_min_ps; + use crate::emulate::{emulate_mm256_reduce_max_ps, emulate_mm256_reduce_min_ps}; unsafe { use std::arch::x86_64::*; let mut n = this.len(); @@ -1081,8 +1078,7 @@ mod reduce_min_max_of_x { #[cfg(target_arch = "x86_64")] #[crate::target_cpu(enable = "v2")] fn reduce_min_max_of_x_v2(this: &[f32]) -> (f32, f32) { - use crate::emulate::emulate_mm_reduce_max_ps; - use crate::emulate::emulate_mm_reduce_min_ps; + use crate::emulate::{emulate_mm_reduce_max_ps, emulate_mm_reduce_min_ps}; unsafe { use std::arch::x86_64::*; let mut n = this.len(); diff --git a/crates/simd/src/lib.rs b/crates/simd/src/lib.rs index aec5c52..2f03d53 100644 --- a/crates/simd/src/lib.rs +++ b/crates/simd/src/lib.rs @@ -2,6 +2,7 @@ #![feature(avx512_target_feature)] #![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512))] #![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_avx512_f16))] +#![allow(unsafe_code)] mod aligned; mod emulate; @@ -15,16 +16,7 @@ pub mod quantize; pub mod u8; pub trait Floating: - Copy - + Send - + Sync - + std::fmt::Debug - + serde::Serialize - + for<'a> serde::Deserialize<'a> - + Default - + 'static - + PartialEq - + PartialOrd + Copy + Send + Sync + std::fmt::Debug + Default + 'static + PartialEq + PartialOrd { fn zero() -> Self; fn infinity() -> Self; @@ -79,8 +71,7 @@ mod internal { pub use is_riscv64_cpu_detected; } -pub use simd_macros::multiversion; -pub use simd_macros::target_cpu; +pub use simd_macros::{multiversion, target_cpu}; #[cfg(target_arch = "x86_64")] #[allow(unused_imports)] diff --git a/crates/vector/Cargo.toml b/crates/vector/Cargo.toml index b910d36..186daf8 100644 --- a/crates/vector/Cargo.toml +++ b/crates/vector/Cargo.toml @@ -6,7 +6,6 @@ edition.workspace = true [dependencies] distance = { path = "../distance" } half.workspace = true -serde.workspace = true simd = { path = "../simd" } [lints] diff --git a/crates/vector/src/bvect.rs b/crates/vector/src/bvect.rs index fe80164..7877add 100644 --- a/crates/vector/src/bvect.rs +++ b/crates/vector/src/bvect.rs @@ -1,12 +1,11 @@ use crate::{VectorBorrowed, VectorOwned}; use distance::Distance; -use serde::{Deserialize, Serialize}; use std::ops::{Bound, RangeBounds}; pub const BVECTOR_WIDTH: u32 = u64::BITS; // When using binary vector, please ensure that the padding bits are always zero. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone)] pub struct BVectOwned { dims: u32, data: Vec, @@ -29,7 +28,10 @@ impl BVectOwned { if dims % BVECTOR_WIDTH != 0 && data[data.len() - 1] >> (dims % BVECTOR_WIDTH) != 0 { return None; } - unsafe { Some(Self::new_unchecked(dims, data)) } + #[allow(unsafe_code)] + unsafe { + Some(Self::new_unchecked(dims, data)) + } } /// # Safety @@ -37,6 +39,7 @@ impl BVectOwned { /// * `dims` must be in `1..=65535`. /// * `data` must be of the correct length. /// * The padding bits must be zero. + #[allow(unsafe_code)] #[inline(always)] pub unsafe fn new_unchecked(dims: u32, data: Vec) -> Self { Self { dims, data } @@ -83,7 +86,10 @@ impl<'a> BVectBorrowed<'a> { if dims % BVECTOR_WIDTH != 0 && data[data.len() - 1] >> (dims % BVECTOR_WIDTH) != 0 { return None; } - unsafe { Some(Self::new_unchecked(dims, data)) } + #[allow(unsafe_code)] + unsafe { + Some(Self::new_unchecked(dims, data)) + } } /// # Safety @@ -91,6 +97,7 @@ impl<'a> BVectBorrowed<'a> { /// * `dims` must be in `1..=65535`. /// * `data` must be of the correct length. /// * The padding bits must be zero. + #[allow(unsafe_code)] #[inline(always)] pub unsafe fn new_unchecked(dims: u32, data: &'a [u64]) -> Self { Self { dims, data } diff --git a/crates/vector/src/lib.rs b/crates/vector/src/lib.rs index e82e4a6..64128c7 100644 --- a/crates/vector/src/lib.rs +++ b/crates/vector/src/lib.rs @@ -3,7 +3,7 @@ pub mod scalar8; pub mod svect; pub mod vect; -pub trait VectorOwned: Clone + serde::Serialize + for<'a> serde::Deserialize<'a> + 'static { +pub trait VectorOwned: Clone + 'static { type Borrowed<'a>: VectorBorrowed; fn as_borrowed(&self) -> Self::Borrowed<'_>; diff --git a/crates/vector/src/scalar8.rs b/crates/vector/src/scalar8.rs index ff9095a..3792e27 100644 --- a/crates/vector/src/scalar8.rs +++ b/crates/vector/src/scalar8.rs @@ -1,9 +1,8 @@ use crate::{VectorBorrowed, VectorOwned}; use distance::Distance; -use serde::{Deserialize, Serialize}; use std::ops::RangeBounds; -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone)] pub struct Scalar8Owned { sum_of_x2: f32, k: f32, @@ -29,12 +28,14 @@ impl Scalar8Owned { if !(1..=65535).contains(&code.len()) { return None; } + #[allow(unsafe_code)] Some(unsafe { Self::new_unchecked(sum_of_x2, k, b, sum_of_code, code) }) } /// # Safety /// /// * `code.len()` must not be zero. + #[allow(unsafe_code)] #[inline(always)] pub unsafe fn new_unchecked( sum_of_x2: f32, @@ -105,12 +106,14 @@ impl<'a> Scalar8Borrowed<'a> { if !(1..=65535).contains(&code.len()) { return None; } + #[allow(unsafe_code)] Some(unsafe { Self::new_unchecked(sum_of_x2, k, b, sum_of_code, code) }) } /// # Safety /// /// * `code.len()` must not be zero. + #[allow(unsafe_code)] #[inline(always)] pub unsafe fn new_unchecked( sum_of_x2: f32, diff --git a/crates/vector/src/svect.rs b/crates/vector/src/svect.rs index 08f678d..26cbf19 100644 --- a/crates/vector/src/svect.rs +++ b/crates/vector/src/svect.rs @@ -1,10 +1,9 @@ use crate::{VectorBorrowed, VectorOwned}; use distance::Distance; -use serde::{Deserialize, Serialize}; use simd::Floating; use std::ops::{Bound, RangeBounds}; -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone)] pub struct SVectOwned { dims: u32, indexes: Vec, @@ -37,7 +36,10 @@ impl SVectOwned { if S::reduce_or_of_is_zero_x(&values) { return None; } - unsafe { Some(Self::new_unchecked(dims, indexes, values)) } + #[allow(unsafe_code)] + unsafe { + Some(Self::new_unchecked(dims, indexes, values)) + } } /// # Safety @@ -46,6 +48,7 @@ impl SVectOwned { /// * `indexes.len()` must be equal to `values.len()`. /// * `indexes` must be a strictly increasing sequence and the last in the sequence must be less than `dims`. /// * A floating number in `values` must not be positive zero or negative zero. + #[allow(unsafe_code)] #[inline(always)] pub unsafe fn new_unchecked(dims: u32, indexes: Vec, values: Vec) -> Self { Self { @@ -119,7 +122,10 @@ impl<'a, S: Floating> SVectBorrowed<'a, S> { return None; } } - unsafe { Some(Self::new_unchecked(dims, indexes, values)) } + #[allow(unsafe_code)] + unsafe { + Some(Self::new_unchecked(dims, indexes, values)) + } } /// # Safety @@ -129,6 +135,7 @@ impl<'a, S: Floating> SVectBorrowed<'a, S> { /// * `indexes` must be a strictly increasing sequence and the last in the sequence must be less than `dims`. /// * A floating number in `values` must not be positive zero or negative zero. #[inline(always)] + #[allow(unsafe_code)] pub unsafe fn new_unchecked(dims: u32, indexes: &'a [u32], values: &'a [S]) -> Self { Self { dims, diff --git a/crates/vector/src/vect.rs b/crates/vector/src/vect.rs index 34b186f..527fc6a 100644 --- a/crates/vector/src/vect.rs +++ b/crates/vector/src/vect.rs @@ -1,11 +1,10 @@ use super::{VectorBorrowed, VectorOwned}; use distance::Distance; -use serde::{Deserialize, Serialize}; use simd::Floating; use std::cmp::Ordering; use std::ops::RangeBounds; -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone)] #[repr(transparent)] pub struct VectOwned(Vec); @@ -20,12 +19,14 @@ impl VectOwned { if !(1..=65535).contains(&slice.len()) { return None; } + #[allow(unsafe_code)] Some(unsafe { Self::new_unchecked(slice) }) } /// # Safety /// /// * `slice.len()` must not be zero. + #[allow(unsafe_code)] #[inline(always)] pub unsafe fn new_unchecked(slice: Vec) -> Self { Self(slice) @@ -76,12 +77,14 @@ impl<'a, S: Floating> VectBorrowed<'a, S> { if !(1..=65535).contains(&slice.len()) { return None; } + #[allow(unsafe_code)] Some(unsafe { Self::new_unchecked(slice) }) } /// # Safety /// /// * `slice.len()` must not be zero. + #[allow(unsafe_code)] #[inline(always)] pub unsafe fn new_unchecked(slice: &'a [S]) -> Self { Self(slice) diff --git a/rustfmt.toml b/rustfmt.toml index 3501136..c32b643 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1 +1,2 @@ style_edition = "2024" +imports_granularity = "Module" diff --git a/src/algorithm/build.rs b/src/algorithm/build.rs deleted file mode 100644 index 4c893b7..0000000 --- a/src/algorithm/build.rs +++ /dev/null @@ -1,373 +0,0 @@ -use crate::algorithm::RelationWrite; -use crate::algorithm::operator::{Operator, Vector}; -use crate::algorithm::tape::*; -use crate::algorithm::tuples::*; -use crate::index::am_options::Opfamily; -use crate::types::VchordrqBuildOptions; -use crate::types::VchordrqExternalBuildOptions; -use crate::types::VchordrqIndexingOptions; -use crate::types::VchordrqInternalBuildOptions; -use crate::types::VectorOptions; -use rand::Rng; -use simd::Floating; -use std::num::NonZeroU64; -use std::sync::Arc; -use vector::VectorBorrowed; -use vector::VectorOwned; - -pub trait HeapRelation { - fn traverse(&self, progress: bool, callback: F) - where - F: FnMut((NonZeroU64, O::Vector)); - fn opfamily(&self) -> Opfamily; -} - -pub trait Reporter { - fn tuples_total(&mut self, tuples_total: u64); -} - -pub fn build, R: Reporter>( - vector_options: VectorOptions, - vchordrq_options: VchordrqIndexingOptions, - heap_relation: T, - relation: impl RelationWrite, - mut reporter: R, -) { - let dims = vector_options.dims; - let is_residual = vchordrq_options.residual_quantization && O::SUPPORTS_RESIDUAL; - let structures = match vchordrq_options.build { - VchordrqBuildOptions::External(external_build) => Structure::extern_build( - vector_options.clone(), - heap_relation.opfamily(), - external_build.clone(), - ), - VchordrqBuildOptions::Internal(internal_build) => { - let mut tuples_total = 0_u64; - let samples = { - let mut rand = rand::thread_rng(); - let max_number_of_samples = internal_build - .lists - .last() - .unwrap() - .saturating_mul(internal_build.sampling_factor); - let mut samples = Vec::new(); - let mut number_of_samples = 0_u32; - heap_relation.traverse(false, |(_, vector)| { - let vector = vector.as_borrowed(); - assert_eq!(dims, vector.dims(), "invalid vector dimensions"); - if number_of_samples < max_number_of_samples { - samples.push(O::Vector::build_to_vecf32(vector)); - number_of_samples += 1; - } else { - let index = rand.gen_range(0..max_number_of_samples) as usize; - samples[index] = O::Vector::build_to_vecf32(vector); - } - tuples_total += 1; - }); - samples - }; - reporter.tuples_total(tuples_total); - Structure::internal_build(vector_options.clone(), internal_build.clone(), samples) - } - }; - let mut meta = TapeWriter::<_, _, MetaTuple>::create(|| relation.extend(false)); - assert_eq!(meta.first(), 0); - let freepage = TapeWriter::<_, _, FreepageTuple>::create(|| relation.extend(false)); - let mut vectors = TapeWriter::<_, _, VectorTuple>::create(|| relation.extend(true)); - let mut pointer_of_means = Vec::>::new(); - for i in 0..structures.len() { - let mut level = Vec::new(); - for j in 0..structures[i].len() { - let vector = O::Vector::build_from_vecf32(&structures[i].means[j]); - let (metadata, slices) = O::Vector::vector_split(vector.as_borrowed()); - let mut chain = Ok(metadata); - for i in (0..slices.len()).rev() { - chain = Err(vectors.push(match chain { - Ok(metadata) => VectorTuple::_0 { - payload: None, - elements: slices[i].to_vec(), - metadata, - }, - Err(pointer) => VectorTuple::_1 { - payload: None, - elements: slices[i].to_vec(), - pointer, - }, - })); - } - level.push(chain.err().unwrap()); - } - pointer_of_means.push(level); - } - let mut pointer_of_firsts = Vec::>::new(); - for i in 0..structures.len() { - let mut level = Vec::new(); - for j in 0..structures[i].len() { - if i == 0 { - let tape = TapeWriter::<_, _, H0Tuple>::create(|| relation.extend(false)); - let mut jump = TapeWriter::<_, _, JumpTuple>::create(|| relation.extend(false)); - jump.push(JumpTuple { - first: tape.first(), - }); - level.push(jump.first()); - } else { - let mut tape = H1TapeWriter::<_, _>::create(|| relation.extend(false)); - let h2_mean = &structures[i].means[j]; - let h2_children = &structures[i].children[j]; - for child in h2_children.iter().copied() { - let h1_mean = &structures[i - 1].means[child as usize]; - let code = if is_residual { - rabitq::code(dims, &f32::vector_sub(h1_mean, h2_mean)) - } else { - rabitq::code(dims, h1_mean) - }; - tape.push(H1Branch { - mean: pointer_of_means[i - 1][child as usize], - dis_u_2: code.dis_u_2, - factor_ppc: code.factor_ppc, - factor_ip: code.factor_ip, - factor_err: code.factor_err, - signs: code.signs, - first: pointer_of_firsts[i - 1][child as usize], - }); - } - let tape = tape.into_inner(); - level.push(tape.first()); - } - } - pointer_of_firsts.push(level); - } - meta.push(MetaTuple { - dims, - height_of_root: structures.len() as u32, - is_residual, - vectors_first: vectors.first(), - root_mean: pointer_of_means.last().unwrap()[0], - root_first: pointer_of_firsts.last().unwrap()[0], - freepage_first: freepage.first(), - }); -} - -struct Structure { - means: Vec>, - children: Vec>, -} - -impl Structure { - fn len(&self) -> usize { - self.children.len() - } - fn internal_build( - vector_options: VectorOptions, - internal_build: VchordrqInternalBuildOptions, - mut samples: Vec>, - ) -> Vec { - use std::iter::once; - for sample in samples.iter_mut() { - *sample = crate::projection::project(sample); - } - let mut result = Vec::::new(); - for w in internal_build.lists.iter().rev().copied().chain(once(1)) { - let means = crate::utils::parallelism::RayonParallelism::scoped( - internal_build.build_threads as _, - Arc::new(|| { - pgrx::check_for_interrupts!(); - }), - |parallelism| { - crate::utils::k_means::k_means( - parallelism, - w as usize, - vector_options.dims as usize, - if let Some(structure) = result.last() { - &structure.means - } else { - &samples - }, - internal_build.spherical_centroids, - 10, - ) - }, - ) - .expect("failed to create thread pool"); - if let Some(structure) = result.last() { - let mut children = vec![Vec::new(); means.len()]; - for i in 0..structure.len() as u32 { - let target = - crate::utils::k_means::k_means_lookup(&structure.means[i as usize], &means); - children[target].push(i); - } - let (means, children) = std::iter::zip(means, children) - .filter(|(_, x)| !x.is_empty()) - .unzip::<_, _, Vec<_>, Vec<_>>(); - result.push(Structure { means, children }); - } else { - let children = vec![Vec::new(); means.len()]; - result.push(Structure { means, children }); - } - } - result - } - fn extern_build( - vector_options: VectorOptions, - _opfamily: Opfamily, - external_build: VchordrqExternalBuildOptions, - ) -> Vec { - use std::collections::BTreeMap; - let VchordrqExternalBuildOptions { table } = external_build; - let mut parents = BTreeMap::new(); - let mut vectors = BTreeMap::new(); - pgrx::spi::Spi::connect(|client| { - use crate::datatype::memory_vector::VectorOutput; - use pgrx::pg_sys::panic::ErrorReportable; - use vector::VectorBorrowed; - let schema_query = "SELECT n.nspname::TEXT - FROM pg_catalog.pg_extension e - LEFT JOIN pg_catalog.pg_namespace n ON n.oid = e.extnamespace - WHERE e.extname = 'vector';"; - let pgvector_schema: String = client - .select(schema_query, None, None) - .unwrap_or_report() - .first() - .get_by_name("nspname") - .expect("external build: cannot get schema of pgvector") - .expect("external build: cannot get schema of pgvector"); - let dump_query = - format!("SELECT id, parent, vector::{pgvector_schema}.vector FROM {table};"); - let centroids = client.select(&dump_query, None, None).unwrap_or_report(); - for row in centroids { - let id: Option = row.get_by_name("id").unwrap(); - let parent: Option = row.get_by_name("parent").unwrap(); - let vector: Option = row.get_by_name("vector").unwrap(); - let id = id.expect("external build: id could not be NULL"); - let vector = vector.expect("external build: vector could not be NULL"); - let pop = parents.insert(id, parent); - if pop.is_some() { - pgrx::error!( - "external build: there are at least two lines have same id, id = {id}" - ); - } - if vector_options.dims != vector.as_borrowed().dims() { - pgrx::error!("external build: incorrect dimension, id = {id}"); - } - vectors.insert(id, crate::projection::project(vector.as_borrowed().slice())); - } - }); - if parents.len() >= 2 && parents.values().all(|x| x.is_none()) { - // if there are more than one vertexs and no edges, - // assume there is an implicit root - let n = parents.len(); - let mut result = Vec::new(); - result.push(Structure { - means: vectors.values().cloned().collect::>(), - children: vec![Vec::new(); n], - }); - result.push(Structure { - means: vec![{ - // compute the vector on root, without normalizing it - let mut sum = vec![0.0f32; vector_options.dims as _]; - for vector in vectors.values() { - f32::vector_add_inplace(&mut sum, vector); - } - f32::vector_mul_scalar_inplace(&mut sum, 1.0 / n as f32); - sum - }], - children: vec![(0..n as u32).collect()], - }); - return result; - } - let mut children = parents - .keys() - .map(|x| (*x, Vec::new())) - .collect::>(); - let mut root = None; - for (&id, &parent) in parents.iter() { - if let Some(parent) = parent { - if let Some(parent) = children.get_mut(&parent) { - parent.push(id); - } else { - pgrx::error!( - "external build: parent does not exist, id = {id}, parent = {parent}" - ); - } - } else { - if let Some(root) = root { - pgrx::error!("external build: two root, id = {root}, id = {id}"); - } else { - root = Some(id); - } - } - } - let Some(root) = root else { - pgrx::error!("external build: there are no root"); - }; - let mut heights = BTreeMap::<_, _>::new(); - fn dfs_for_heights( - heights: &mut BTreeMap>, - children: &BTreeMap>, - u: i32, - ) { - if heights.contains_key(&u) { - pgrx::error!("external build: detect a cycle, id = {u}"); - } - heights.insert(u, None); - let mut height = None; - for &v in children[&u].iter() { - dfs_for_heights(heights, children, v); - let new = heights[&v].unwrap() + 1; - if let Some(height) = height { - if height != new { - pgrx::error!("external build: two heights, id = {u}"); - } - } else { - height = Some(new); - } - } - if height.is_none() { - height = Some(1); - } - heights.insert(u, height); - } - dfs_for_heights(&mut heights, &children, root); - let heights = heights - .into_iter() - .map(|(k, v)| (k, v.expect("not a connected graph"))) - .collect::>(); - if !(1..=8).contains(&(heights[&root] - 1)) { - pgrx::error!( - "external build: unexpected tree height, height = {}", - heights[&root] - ); - } - let mut cursors = vec![0_u32; 1 + heights[&root] as usize]; - let mut labels = BTreeMap::new(); - for id in parents.keys().copied() { - let height = heights[&id]; - let cursor = cursors[height as usize]; - labels.insert(id, (height, cursor)); - cursors[height as usize] += 1; - } - fn extract( - height: u32, - labels: &BTreeMap, - vectors: &BTreeMap>, - children: &BTreeMap>, - ) -> (Vec>, Vec>) { - labels - .iter() - .filter(|(_, (h, _))| *h == height) - .map(|(id, _)| { - ( - vectors[id].clone(), - children[id].iter().map(|id| labels[id].1).collect(), - ) - }) - .unzip() - } - let mut result = Vec::new(); - for height in 1..=heights[&root] { - let (means, children) = extract(height, &labels, &vectors, &children); - result.push(Structure { means, children }); - } - result - } -} diff --git a/src/algorithm/vacuum.rs b/src/algorithm/vacuum.rs deleted file mode 100644 index 1736625..0000000 --- a/src/algorithm/vacuum.rs +++ /dev/null @@ -1,311 +0,0 @@ -use crate::algorithm::freepages; -use crate::algorithm::operator::Operator; -use crate::algorithm::tape::*; -use crate::algorithm::tuples::*; -use crate::algorithm::{Page, RelationWrite}; -use crate::utils::pipe::Pipe; -use simd::fast_scan::unpack; -use std::num::NonZeroU64; - -pub fn bulkdelete( - relation: impl RelationWrite, - delay: impl Fn(), - callback: impl Fn(NonZeroU64) -> bool, -) { - let meta_guard = relation.read(0); - let meta_tuple = meta_guard.get(1).unwrap().pipe(read_tuple::); - let height_of_root = meta_tuple.height_of_root(); - let root_first = meta_tuple.root_first(); - let vectors_first = meta_tuple.vectors_first(); - drop(meta_guard); - { - type State = Vec; - let mut state: State = vec![root_first]; - let step = |state: State| { - let mut results = Vec::new(); - for first in state { - let mut current = first; - while current != u32::MAX { - let h1_guard = relation.read(current); - for i in 1..=h1_guard.len() { - let h1_tuple = h1_guard - .get(i) - .expect("data corruption") - .pipe(read_tuple::); - match h1_tuple { - H1TupleReader::_0(h1_tuple) => { - for first in h1_tuple.first().iter().copied() { - results.push(first); - } - } - H1TupleReader::_1(_) => (), - } - } - current = h1_guard.get_opaque().next; - } - } - results - }; - for _ in (1..height_of_root).rev() { - state = step(state); - } - for first in state { - let jump_guard = relation.read(first); - let jump_tuple = jump_guard - .get(1) - .expect("data corruption") - .pipe(read_tuple::); - let first = jump_tuple.first(); - let mut current = first; - while current != u32::MAX { - delay(); - let read = relation.read(current); - let flag = 'flag: { - for i in 1..=read.len() { - let h0_tuple = read - .get(i) - .expect("data corruption") - .pipe(read_tuple::); - match h0_tuple { - H0TupleReader::_0(h0_tuple) => { - let p = h0_tuple.payload(); - if let Some(payload) = p { - if callback(payload) { - break 'flag true; - } - } - } - H0TupleReader::_1(h0_tuple) => { - let p = h0_tuple.payload(); - for j in 0..32 { - if let Some(payload) = p[j] { - if callback(payload) { - break 'flag true; - } - } - } - } - H0TupleReader::_2(_) => (), - } - } - false - }; - if flag { - drop(read); - let mut write = relation.write(current, false); - for i in 1..=write.len() { - let h0_tuple = write - .get_mut(i) - .expect("data corruption") - .pipe(write_tuple::); - match h0_tuple { - H0TupleWriter::_0(mut h0_tuple) => { - let p = h0_tuple.payload(); - if let Some(payload) = *p { - if callback(payload) { - *p = None; - } - } - } - H0TupleWriter::_1(mut h0_tuple) => { - let p = h0_tuple.payload(); - for j in 0..32 { - if let Some(payload) = p[j] { - if callback(payload) { - p[j] = None; - } - } - } - } - H0TupleWriter::_2(_) => (), - } - } - current = write.get_opaque().next; - } else { - current = read.get_opaque().next; - } - } - } - } - { - let first = vectors_first; - let mut current = first; - while current != u32::MAX { - delay(); - let read = relation.read(current); - let flag = 'flag: { - for i in 1..=read.len() { - if let Some(vector_bytes) = read.get(i) { - let vector_tuple = vector_bytes.pipe(read_tuple::>); - let p = vector_tuple.payload(); - if let Some(payload) = p { - if callback(payload) { - break 'flag true; - } - } - } - } - false - }; - if flag { - drop(read); - let mut write = relation.write(current, true); - for i in 1..=write.len() { - if let Some(vector_bytes) = write.get(i) { - let vector_tuple = vector_bytes.pipe(read_tuple::>); - let p = vector_tuple.payload(); - if let Some(payload) = p { - if callback(payload) { - write.free(i); - } - } - }; - } - current = write.get_opaque().next; - } else { - current = read.get_opaque().next; - } - } - } -} - -pub fn maintain(relation: impl RelationWrite + Clone, delay: impl Fn()) { - let meta_guard = relation.read(0); - let meta_tuple = meta_guard.get(1).unwrap().pipe(read_tuple::); - let dims = meta_tuple.dims(); - let height_of_root = meta_tuple.height_of_root(); - let root_first = meta_tuple.root_first(); - let freepage_first = meta_tuple.freepage_first(); - drop(meta_guard); - - let firsts = { - type State = Vec; - let mut state: State = vec![root_first]; - let step = |state: State| { - let mut results = Vec::new(); - for first in state { - let mut current = first; - while current != u32::MAX { - delay(); - let h1_guard = relation.read(current); - for i in 1..=h1_guard.len() { - let h1_tuple = h1_guard - .get(i) - .expect("data corruption") - .pipe(read_tuple::); - match h1_tuple { - H1TupleReader::_0(h1_tuple) => { - for first in h1_tuple.first().iter().copied() { - results.push(first); - } - } - H1TupleReader::_1(_) => (), - } - } - current = h1_guard.get_opaque().next; - } - } - results - }; - for _ in (1..height_of_root).rev() { - state = step(state); - } - state - }; - - for first in firsts { - let mut jump_guard = relation.write(first, false); - let mut jump_tuple = jump_guard - .get_mut(1) - .expect("data corruption") - .pipe(write_tuple::); - - let mut tape = H0Tape::<_, _>::create(|| { - if let Some(id) = freepages::fetch(relation.clone(), freepage_first) { - let mut write = relation.write(id, false); - write.clear(); - write - } else { - relation.extend(false) - } - }); - - let mut trace = Vec::new(); - - let first = *jump_tuple.first(); - let mut current = first; - let mut computing = None; - while current != u32::MAX { - delay(); - trace.push(current); - let h0_guard = relation.read(current); - for i in 1..=h0_guard.len() { - let h0_tuple = h0_guard - .get(i) - .expect("data corruption") - .pipe(read_tuple::); - match h0_tuple { - H0TupleReader::_0(h0_tuple) => { - if let Some(payload) = h0_tuple.payload() { - tape.push(H0BranchWriter { - mean: h0_tuple.mean(), - dis_u_2: h0_tuple.code().0, - factor_ppc: h0_tuple.code().1, - factor_ip: h0_tuple.code().2, - factor_err: h0_tuple.code().3, - signs: h0_tuple - .code() - .4 - .iter() - .flat_map(|x| { - std::array::from_fn::<_, 64, _>(|i| *x & (1 << i) != 0) - }) - .take(dims as _) - .collect::>(), - payload, - }); - } - } - H0TupleReader::_1(h0_tuple) => { - let computing = &mut computing.take().unwrap_or_else(Vec::new); - computing.extend_from_slice(h0_tuple.elements()); - let unpacked = unpack(computing); - for j in 0..32 { - if let Some(payload) = h0_tuple.payload()[j] { - tape.push(H0BranchWriter { - mean: h0_tuple.mean()[j], - dis_u_2: h0_tuple.metadata().0[j], - factor_ppc: h0_tuple.metadata().1[j], - factor_ip: h0_tuple.metadata().2[j], - factor_err: h0_tuple.metadata().3[j], - signs: unpacked[j] - .iter() - .flat_map(|&x| { - [x & 1 != 0, x & 2 != 0, x & 4 != 0, x & 8 != 0] - }) - .collect(), - payload, - }); - } - } - } - H0TupleReader::_2(h0_tuple) => { - let computing = computing.get_or_insert_with(Vec::new); - computing.extend_from_slice(h0_tuple.elements()); - } - } - } - current = h0_guard.get_opaque().next; - drop(h0_guard); - } - - let tape = tape.into_inner(); - let new = tape.first(); - drop(tape); - - *jump_tuple.first() = new; - drop(jump_guard); - - freepages::mark(relation.clone(), freepage_first, &trace); - } -} diff --git a/src/bin/pgrx_embed.rs b/src/bin/pgrx_embed.rs index 5f5c4d8..afd0164 100644 --- a/src/bin/pgrx_embed.rs +++ b/src/bin/pgrx_embed.rs @@ -1 +1,2 @@ +#![allow(unsafe_code)] ::pgrx::pgrx_embed!(); diff --git a/src/datatype/memory_halfvec.rs b/src/datatype/memory_halfvec.rs index b60f6c5..3e9fe09 100644 --- a/src/datatype/memory_halfvec.rs +++ b/src/datatype/memory_halfvec.rs @@ -1,20 +1,14 @@ use half::f16; -use pgrx::datum::FromDatum; -use pgrx::datum::IntoDatum; -use pgrx::pg_sys::Datum; -use pgrx::pg_sys::Oid; -use pgrx::pgrx_sql_entity_graph::metadata::ArgumentError; -use pgrx::pgrx_sql_entity_graph::metadata::Returns; -use pgrx::pgrx_sql_entity_graph::metadata::ReturnsError; -use pgrx::pgrx_sql_entity_graph::metadata::SqlMapping; -use pgrx::pgrx_sql_entity_graph::metadata::SqlTranslatable; +use pgrx::datum::{FromDatum, IntoDatum}; +use pgrx::pg_sys::{Datum, Oid}; +use pgrx::pgrx_sql_entity_graph::metadata::*; use std::marker::PhantomData; use std::ptr::NonNull; use vector::VectorBorrowed; use vector::vect::VectBorrowed; #[repr(C, align(8))] -pub struct HalfvecHeader { +struct HalfvecHeader { varlena: u32, dims: u16, unused: u16, @@ -28,10 +22,10 @@ impl HalfvecHeader { } (size_of::() + size_of::() * len).next_multiple_of(8) } - pub unsafe fn as_borrowed<'a>(this: NonNull) -> VectBorrowed<'a, f16> { + unsafe fn as_borrowed<'a>(this: NonNull) -> VectBorrowed<'a, f16> { unsafe { let this = this.as_ptr(); - VectBorrowed::new_unchecked(std::slice::from_raw_parts( + VectBorrowed::new(std::slice::from_raw_parts( (&raw const (*this).elements).cast(), (&raw const (*this).dims).read() as usize, )) @@ -93,7 +87,7 @@ impl HalfvecOutput { pub fn as_borrowed(&self) -> VectBorrowed<'_, f16> { unsafe { HalfvecHeader::as_borrowed(self.0) } } - pub fn into_raw(self) -> *mut HalfvecHeader { + fn into_raw(self) -> *mut HalfvecHeader { let result = self.0.as_ptr(); std::mem::forget(self); result diff --git a/src/datatype/memory_scalar8.rs b/src/datatype/memory_scalar8.rs index 4f30654..19e4ff4 100644 --- a/src/datatype/memory_scalar8.rs +++ b/src/datatype/memory_scalar8.rs @@ -1,19 +1,13 @@ -use pgrx::datum::FromDatum; -use pgrx::datum::IntoDatum; -use pgrx::pg_sys::Datum; -use pgrx::pg_sys::Oid; -use pgrx::pgrx_sql_entity_graph::metadata::ArgumentError; -use pgrx::pgrx_sql_entity_graph::metadata::Returns; -use pgrx::pgrx_sql_entity_graph::metadata::ReturnsError; -use pgrx::pgrx_sql_entity_graph::metadata::SqlMapping; -use pgrx::pgrx_sql_entity_graph::metadata::SqlTranslatable; +use pgrx::datum::{FromDatum, IntoDatum}; +use pgrx::pg_sys::{Datum, Oid}; +use pgrx::pgrx_sql_entity_graph::metadata::*; use std::marker::PhantomData; use std::ptr::NonNull; use vector::VectorBorrowed; use vector::scalar8::Scalar8Borrowed; #[repr(C, align(8))] -pub struct Scalar8Header { +struct Scalar8Header { varlena: u32, dims: u16, unused: u16, @@ -31,10 +25,10 @@ impl Scalar8Header { } (size_of::() + size_of::() * len).next_multiple_of(8) } - pub unsafe fn as_borrowed<'a>(this: NonNull) -> Scalar8Borrowed<'a> { + unsafe fn as_borrowed<'a>(this: NonNull) -> Scalar8Borrowed<'a> { unsafe { let this = this.as_ptr(); - Scalar8Borrowed::new_unchecked( + Scalar8Borrowed::new( (&raw const (*this).sum_of_x2).read(), (&raw const (*this).k).read(), (&raw const (*this).b).read(), @@ -105,7 +99,7 @@ impl Scalar8Output { pub fn as_borrowed(&self) -> Scalar8Borrowed<'_> { unsafe { Scalar8Header::as_borrowed(self.0) } } - pub fn into_raw(self) -> *mut Scalar8Header { + fn into_raw(self) -> *mut Scalar8Header { let result = self.0.as_ptr(); std::mem::forget(self); result diff --git a/src/datatype/memory_vector.rs b/src/datatype/memory_vector.rs index de70ba1..4d9f9f2 100644 --- a/src/datatype/memory_vector.rs +++ b/src/datatype/memory_vector.rs @@ -1,19 +1,13 @@ -use pgrx::datum::FromDatum; -use pgrx::datum::IntoDatum; -use pgrx::pg_sys::Datum; -use pgrx::pg_sys::Oid; -use pgrx::pgrx_sql_entity_graph::metadata::ArgumentError; -use pgrx::pgrx_sql_entity_graph::metadata::Returns; -use pgrx::pgrx_sql_entity_graph::metadata::ReturnsError; -use pgrx::pgrx_sql_entity_graph::metadata::SqlMapping; -use pgrx::pgrx_sql_entity_graph::metadata::SqlTranslatable; +use pgrx::datum::{FromDatum, IntoDatum}; +use pgrx::pg_sys::{Datum, Oid}; +use pgrx::pgrx_sql_entity_graph::metadata::*; use std::marker::PhantomData; use std::ptr::NonNull; use vector::VectorBorrowed; use vector::vect::VectBorrowed; #[repr(C, align(8))] -pub struct VectorHeader { +struct VectorHeader { varlena: u32, dims: u16, unused: u16, @@ -27,10 +21,10 @@ impl VectorHeader { } (size_of::() + size_of::() * len).next_multiple_of(8) } - pub unsafe fn as_borrowed<'a>(this: NonNull) -> VectBorrowed<'a, f32> { + unsafe fn as_borrowed<'a>(this: NonNull) -> VectBorrowed<'a, f32> { unsafe { let this = this.as_ptr(); - VectBorrowed::new_unchecked(std::slice::from_raw_parts( + VectBorrowed::new(std::slice::from_raw_parts( (&raw const (*this).elements).cast(), (&raw const (*this).dims).read() as usize, )) @@ -92,7 +86,7 @@ impl VectorOutput { pub fn as_borrowed(&self) -> VectBorrowed<'_, f32> { unsafe { VectorHeader::as_borrowed(self.0) } } - pub fn into_raw(self) -> *mut VectorHeader { + fn into_raw(self) -> *mut VectorHeader { let result = self.0.as_ptr(); std::mem::forget(self); result diff --git a/src/gucs/mod.rs b/src/gucs/mod.rs deleted file mode 100644 index 2fb489e..0000000 --- a/src/gucs/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -pub mod executing; -pub mod prewarm; - -pub unsafe fn init() { - unsafe { - executing::init(); - prewarm::init(); - prewarm::prewarm(); - #[cfg(any(feature = "pg13", feature = "pg14"))] - pgrx::pg_sys::EmitWarningsOnPlaceholders(c"vchordrq".as_ptr()); - #[cfg(any(feature = "pg15", feature = "pg16", feature = "pg17"))] - pgrx::pg_sys::MarkGUCPrefixReserved(c"vchordrq".as_ptr()); - } -} diff --git a/src/gucs/prewarm.rs b/src/gucs/prewarm.rs deleted file mode 100644 index bc48436..0000000 --- a/src/gucs/prewarm.rs +++ /dev/null @@ -1,32 +0,0 @@ -use pgrx::guc::{GucContext, GucFlags, GucRegistry, GucSetting}; -use std::ffi::CStr; - -static PREWARM_DIM: GucSetting> = - GucSetting::>::new(Some(c"64,128,256,384,512,768,1024,1536")); - -pub unsafe fn init() { - GucRegistry::define_string_guc( - "vchordrq.prewarm_dim", - "prewarm_dim when the extension is loading.", - "prewarm_dim when the extension is loading.", - &PREWARM_DIM, - GucContext::Userset, - GucFlags::default(), - ); -} - -pub fn prewarm() { - if let Some(prewarm_dim) = PREWARM_DIM.get() { - if let Ok(prewarm_dim) = prewarm_dim.to_str() { - for dim in prewarm_dim.split(',') { - if let Ok(dim) = dim.trim().parse::() { - crate::projection::prewarm(dim as _); - } else { - pgrx::warning!("{dim:?} is not a valid integer"); - } - } - } else { - pgrx::warning!("vchordrq.prewarm_dim is not a valid UTF-8 string"); - } - } -} diff --git a/src/index/am.rs b/src/index/am.rs deleted file mode 100644 index 5db07d2..0000000 --- a/src/index/am.rs +++ /dev/null @@ -1,1103 +0,0 @@ -use crate::algorithm; -use crate::algorithm::build::{HeapRelation, Reporter}; -use crate::algorithm::operator::{Dot, L2, Op}; -use crate::algorithm::operator::{Operator, Vector}; -use crate::index::am_options::{Opfamily, Reloption}; -use crate::index::am_scan::Scanner; -use crate::index::utils::{ctid_to_pointer, pointer_to_ctid}; -use crate::index::{am_options, am_scan}; -use crate::postgres::PostgresRelation; -use crate::types::{DistanceKind, VectorKind}; -use half::f16; -use pgrx::datum::Internal; -use pgrx::pg_sys::Datum; -use std::num::NonZeroU64; -use vector::vect::VectOwned; - -static mut RELOPT_KIND_VCHORDRQ: pgrx::pg_sys::relopt_kind::Type = 0; - -pub unsafe fn init() { - unsafe { - (&raw mut RELOPT_KIND_VCHORDRQ).write(pgrx::pg_sys::add_reloption_kind()); - pgrx::pg_sys::add_string_reloption( - (&raw const RELOPT_KIND_VCHORDRQ).read(), - c"options".as_ptr(), - c"Vector index options, represented as a TOML string.".as_ptr(), - c"".as_ptr(), - None, - pgrx::pg_sys::AccessExclusiveLock as pgrx::pg_sys::LOCKMODE, - ); - } -} - -#[pgrx::pg_extern(sql = "")] -fn _vchordrq_amhandler(_fcinfo: pgrx::pg_sys::FunctionCallInfo) -> Internal { - type T = pgrx::pg_sys::IndexAmRoutine; - unsafe { - let index_am_routine = pgrx::pg_sys::palloc0(size_of::()) as *mut T; - index_am_routine.write(AM_HANDLER); - Internal::from(Some(Datum::from(index_am_routine))) - } -} - -const AM_HANDLER: pgrx::pg_sys::IndexAmRoutine = { - let mut am_routine = - unsafe { std::mem::MaybeUninit::::zeroed().assume_init() }; - - am_routine.type_ = pgrx::pg_sys::NodeTag::T_IndexAmRoutine; - - am_routine.amsupport = 1; - am_routine.amcanorderbyop = true; - - #[cfg(feature = "pg17")] - { - am_routine.amcanbuildparallel = true; - } - - // Index access methods that set `amoptionalkey` to `false` - // must index all tuples, even if the first column is `NULL`. - // However, PostgreSQL does not generate a path if there is no - // index clauses, even if there is a `ORDER BY` clause. - // So we have to set it to `true` and set costs of every path - // for vector index scans without `ORDER BY` clauses a large number - // and throw errors if someone really wants such a path. - am_routine.amoptionalkey = true; - - am_routine.amvalidate = Some(amvalidate); - am_routine.amoptions = Some(amoptions); - am_routine.amcostestimate = Some(amcostestimate); - - am_routine.ambuild = Some(ambuild); - am_routine.ambuildempty = Some(ambuildempty); - am_routine.aminsert = Some(aminsert); - am_routine.ambulkdelete = Some(ambulkdelete); - am_routine.amvacuumcleanup = Some(amvacuumcleanup); - - am_routine.ambeginscan = Some(ambeginscan); - am_routine.amrescan = Some(amrescan); - am_routine.amgettuple = Some(amgettuple); - am_routine.amendscan = Some(amendscan); - - am_routine -}; - -#[pgrx::pg_guard] -pub unsafe extern "C" fn amvalidate(_opclass_oid: pgrx::pg_sys::Oid) -> bool { - true -} - -#[pgrx::pg_guard] -pub unsafe extern "C" fn amoptions(reloptions: Datum, validate: bool) -> *mut pgrx::pg_sys::bytea { - let rdopts = unsafe { - pgrx::pg_sys::build_reloptions( - reloptions, - validate, - (&raw const RELOPT_KIND_VCHORDRQ).read(), - size_of::(), - Reloption::TAB.as_ptr(), - Reloption::TAB.len() as _, - ) - }; - rdopts as *mut pgrx::pg_sys::bytea -} - -#[pgrx::pg_guard] -pub unsafe extern "C" fn amcostestimate( - _root: *mut pgrx::pg_sys::PlannerInfo, - path: *mut pgrx::pg_sys::IndexPath, - _loop_count: f64, - index_startup_cost: *mut pgrx::pg_sys::Cost, - index_total_cost: *mut pgrx::pg_sys::Cost, - index_selectivity: *mut pgrx::pg_sys::Selectivity, - index_correlation: *mut f64, - index_pages: *mut f64, -) { - unsafe { - if (*path).indexorderbys.is_null() && (*path).indexclauses.is_null() { - *index_startup_cost = f64::MAX; - *index_total_cost = f64::MAX; - *index_selectivity = 0.0; - *index_correlation = 0.0; - *index_pages = 0.0; - return; - } - *index_startup_cost = 0.0; - *index_total_cost = 0.0; - *index_selectivity = 1.0; - *index_correlation = 1.0; - *index_pages = 0.0; - } -} - -#[derive(Debug, Clone)] -struct PgReporter {} - -impl Reporter for PgReporter { - fn tuples_total(&mut self, tuples_total: u64) { - unsafe { - pgrx::pg_sys::pgstat_progress_update_param( - pgrx::pg_sys::PROGRESS_CREATEIDX_TUPLES_TOTAL as _, - tuples_total as _, - ); - } - } -} - -impl PgReporter { - fn tuples_done(&mut self, tuples_done: u64) { - unsafe { - pgrx::pg_sys::pgstat_progress_update_param( - pgrx::pg_sys::PROGRESS_CREATEIDX_TUPLES_DONE as _, - tuples_done as _, - ); - } - } -} - -#[pgrx::pg_guard] -pub unsafe extern "C" fn ambuild( - heap: pgrx::pg_sys::Relation, - index: pgrx::pg_sys::Relation, - index_info: *mut pgrx::pg_sys::IndexInfo, -) -> *mut pgrx::pg_sys::IndexBuildResult { - use validator::Validate; - #[derive(Debug, Clone)] - pub struct Heap { - heap: pgrx::pg_sys::Relation, - index: pgrx::pg_sys::Relation, - index_info: *mut pgrx::pg_sys::IndexInfo, - opfamily: Opfamily, - } - impl HeapRelation for Heap { - fn traverse(&self, progress: bool, callback: F) - where - F: FnMut((NonZeroU64, O::Vector)), - { - pub struct State<'a, F> { - pub this: &'a Heap, - pub callback: F, - } - #[pgrx::pg_guard] - unsafe extern "C" fn call( - _index: pgrx::pg_sys::Relation, - ctid: pgrx::pg_sys::ItemPointer, - values: *mut Datum, - is_null: *mut bool, - _tuple_is_alive: bool, - state: *mut core::ffi::c_void, - ) where - F: FnMut((NonZeroU64, O::Vector)), - { - let state = unsafe { &mut *state.cast::>() }; - let opfamily = state.this.opfamily; - let vector = unsafe { opfamily.datum_to_vector(*values.add(0), *is_null.add(0)) }; - let pointer = unsafe { ctid_to_pointer(ctid.read()) }; - if let Some(vector) = vector { - (state.callback)((pointer, O::Vector::from_owned(vector))); - } - } - let table_am = unsafe { &*(*self.heap).rd_tableam }; - let mut state = State { - this: self, - callback, - }; - unsafe { - table_am.index_build_range_scan.unwrap()( - self.heap, - self.index, - self.index_info, - true, - false, - progress, - 0, - pgrx::pg_sys::InvalidBlockNumber, - Some(call::), - (&mut state) as *mut State as *mut _, - std::ptr::null_mut(), - ); - } - } - - fn opfamily(&self) -> Opfamily { - self.opfamily - } - } - let (vector_options, vchordrq_options) = unsafe { am_options::options(index) }; - if let Err(errors) = Validate::validate(&vector_options) { - pgrx::error!("error while validating options: {}", errors); - } - if vector_options.dims == 0 { - pgrx::error!("error while validating options: dimension cannot be 0"); - } - if vector_options.dims > 60000 { - pgrx::error!("error while validating options: dimension is too large"); - } - if let Err(errors) = Validate::validate(&vchordrq_options) { - pgrx::error!("error while validating options: {}", errors); - } - let opfamily = unsafe { am_options::opfamily(index) }; - let heap_relation = Heap { - heap, - index, - index_info, - opfamily, - }; - let mut reporter = PgReporter {}; - let index_relation = unsafe { PostgresRelation::new(index) }; - match (opfamily.vector_kind(), opfamily.distance_kind()) { - (VectorKind::Vecf32, DistanceKind::L2) => { - algorithm::build::build::, L2>, Heap, _>( - vector_options, - vchordrq_options, - heap_relation.clone(), - index_relation.clone(), - reporter.clone(), - ) - } - (VectorKind::Vecf32, DistanceKind::Dot) => { - algorithm::build::build::, Dot>, Heap, _>( - vector_options, - vchordrq_options, - heap_relation.clone(), - index_relation.clone(), - reporter.clone(), - ) - } - (VectorKind::Vecf16, DistanceKind::L2) => { - algorithm::build::build::, L2>, Heap, _>( - vector_options, - vchordrq_options, - heap_relation.clone(), - index_relation.clone(), - reporter.clone(), - ) - } - (VectorKind::Vecf16, DistanceKind::Dot) => { - algorithm::build::build::, Dot>, Heap, _>( - vector_options, - vchordrq_options, - heap_relation.clone(), - index_relation.clone(), - reporter.clone(), - ) - } - } - if let Some(leader) = unsafe { VchordrqLeader::enter(heap, index, (*index_info).ii_Concurrent) } - { - unsafe { - parallel_build( - index, - heap, - index_info, - leader.tablescandesc, - leader.vchordrqshared, - Some(reporter), - ); - leader.wait(); - let nparticipants = leader.nparticipants; - loop { - pgrx::pg_sys::SpinLockAcquire(&raw mut (*leader.vchordrqshared).mutex); - if (*leader.vchordrqshared).nparticipantsdone == nparticipants { - pgrx::pg_sys::SpinLockRelease(&raw mut (*leader.vchordrqshared).mutex); - break; - } - pgrx::pg_sys::SpinLockRelease(&raw mut (*leader.vchordrqshared).mutex); - pgrx::pg_sys::ConditionVariableSleep( - &raw mut (*leader.vchordrqshared).workersdonecv, - pgrx::pg_sys::WaitEventIPC::WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN, - ); - } - pgrx::pg_sys::ConditionVariableCancelSleep(); - } - } else { - let mut indtuples = 0; - reporter.tuples_done(indtuples); - let relation = unsafe { PostgresRelation::new(index) }; - match (opfamily.vector_kind(), opfamily.distance_kind()) { - (VectorKind::Vecf32, DistanceKind::L2) => { - HeapRelation::, L2>>::traverse( - &heap_relation, - true, - |(pointer, vector)| { - algorithm::insert::insert::, L2>>( - relation.clone(), - pointer, - vector, - ); - indtuples += 1; - reporter.tuples_done(indtuples); - }, - ); - } - (VectorKind::Vecf32, DistanceKind::Dot) => { - HeapRelation::, Dot>>::traverse( - &heap_relation, - true, - |(pointer, vector)| { - algorithm::insert::insert::, Dot>>( - relation.clone(), - pointer, - vector, - ); - indtuples += 1; - reporter.tuples_done(indtuples); - }, - ); - } - (VectorKind::Vecf16, DistanceKind::L2) => { - HeapRelation::, L2>>::traverse( - &heap_relation, - true, - |(pointer, vector)| { - algorithm::insert::insert::, L2>>( - relation.clone(), - pointer, - vector, - ); - indtuples += 1; - reporter.tuples_done(indtuples); - }, - ); - } - (VectorKind::Vecf16, DistanceKind::Dot) => { - HeapRelation::, Dot>>::traverse( - &heap_relation, - true, - |(pointer, vector)| { - algorithm::insert::insert::, Dot>>( - relation.clone(), - pointer, - vector, - ); - indtuples += 1; - reporter.tuples_done(indtuples); - }, - ); - } - } - } - let relation = unsafe { PostgresRelation::new(index) }; - let delay = || { - pgrx::check_for_interrupts!(); - }; - match (opfamily.vector_kind(), opfamily.distance_kind()) { - (VectorKind::Vecf32, DistanceKind::L2) => { - type O = Op, L2>; - algorithm::vacuum::maintain::(relation, delay); - } - (VectorKind::Vecf32, DistanceKind::Dot) => { - type O = Op, Dot>; - algorithm::vacuum::maintain::(relation, delay); - } - (VectorKind::Vecf16, DistanceKind::L2) => { - type O = Op, L2>; - algorithm::vacuum::maintain::(relation, delay); - } - (VectorKind::Vecf16, DistanceKind::Dot) => { - type O = Op, Dot>; - algorithm::vacuum::maintain::(relation, delay); - } - } - unsafe { pgrx::pgbox::PgBox::::alloc0().into_pg() } -} - -struct VchordrqShared { - /* Immutable state */ - heaprelid: pgrx::pg_sys::Oid, - indexrelid: pgrx::pg_sys::Oid, - isconcurrent: bool, - - /* Worker progress */ - workersdonecv: pgrx::pg_sys::ConditionVariable, - - /* Mutex for mutable state */ - mutex: pgrx::pg_sys::slock_t, - - /* Mutable state */ - nparticipantsdone: i32, - indtuples: u64, -} - -fn is_mvcc_snapshot(snapshot: *mut pgrx::pg_sys::SnapshotData) -> bool { - matches!( - unsafe { (*snapshot).snapshot_type }, - pgrx::pg_sys::SnapshotType::SNAPSHOT_MVCC - | pgrx::pg_sys::SnapshotType::SNAPSHOT_HISTORIC_MVCC - ) -} - -struct VchordrqLeader { - pcxt: *mut pgrx::pg_sys::ParallelContext, - nparticipants: i32, - vchordrqshared: *mut VchordrqShared, - tablescandesc: *mut pgrx::pg_sys::ParallelTableScanDescData, - snapshot: pgrx::pg_sys::Snapshot, -} - -impl VchordrqLeader { - pub unsafe fn enter( - heap: pgrx::pg_sys::Relation, - index: pgrx::pg_sys::Relation, - isconcurrent: bool, - ) -> Option { - unsafe fn compute_parallel_workers( - heap: pgrx::pg_sys::Relation, - index: pgrx::pg_sys::Relation, - ) -> i32 { - unsafe { - if pgrx::pg_sys::plan_create_index_workers((*heap).rd_id, (*index).rd_id) == 0 { - return 0; - } - if !(*heap).rd_options.is_null() { - let std_options = (*heap).rd_options.cast::(); - std::cmp::min( - (*std_options).parallel_workers, - pgrx::pg_sys::max_parallel_maintenance_workers, - ) - } else { - pgrx::pg_sys::max_parallel_maintenance_workers - } - } - } - - let request = unsafe { compute_parallel_workers(heap, index) }; - if request <= 0 { - return None; - } - - unsafe { - pgrx::pg_sys::EnterParallelMode(); - } - let pcxt = unsafe { - pgrx::pg_sys::CreateParallelContext( - c"vchord".as_ptr(), - c"vchordrq_parallel_build_main".as_ptr(), - request, - ) - }; - - let snapshot = if isconcurrent { - unsafe { pgrx::pg_sys::RegisterSnapshot(pgrx::pg_sys::GetTransactionSnapshot()) } - } else { - &raw mut pgrx::pg_sys::SnapshotAnyData - }; - - fn estimate_chunk(e: &mut pgrx::pg_sys::shm_toc_estimator, x: usize) { - e.space_for_chunks += x.next_multiple_of(pgrx::pg_sys::ALIGNOF_BUFFER as _); - } - fn estimate_keys(e: &mut pgrx::pg_sys::shm_toc_estimator, x: usize) { - e.number_of_keys += x; - } - let est_tablescandesc = - unsafe { pgrx::pg_sys::table_parallelscan_estimate(heap, snapshot) }; - unsafe { - estimate_chunk(&mut (*pcxt).estimator, size_of::()); - estimate_keys(&mut (*pcxt).estimator, 1); - estimate_chunk(&mut (*pcxt).estimator, est_tablescandesc); - estimate_keys(&mut (*pcxt).estimator, 1); - } - - unsafe { - pgrx::pg_sys::InitializeParallelDSM(pcxt); - if (*pcxt).seg.is_null() { - if is_mvcc_snapshot(snapshot) { - pgrx::pg_sys::UnregisterSnapshot(snapshot); - } - pgrx::pg_sys::DestroyParallelContext(pcxt); - pgrx::pg_sys::ExitParallelMode(); - return None; - } - } - - let vchordrqshared = unsafe { - let vchordrqshared = - pgrx::pg_sys::shm_toc_allocate((*pcxt).toc, size_of::()) - .cast::(); - vchordrqshared.write(VchordrqShared { - heaprelid: (*heap).rd_id, - indexrelid: (*index).rd_id, - isconcurrent, - workersdonecv: std::mem::zeroed(), - mutex: std::mem::zeroed(), - nparticipantsdone: 0, - indtuples: 0, - }); - pgrx::pg_sys::ConditionVariableInit(&raw mut (*vchordrqshared).workersdonecv); - pgrx::pg_sys::SpinLockInit(&raw mut (*vchordrqshared).mutex); - vchordrqshared - }; - - let tablescandesc = unsafe { - let tablescandesc = pgrx::pg_sys::shm_toc_allocate((*pcxt).toc, est_tablescandesc) - .cast::(); - pgrx::pg_sys::table_parallelscan_initialize(heap, tablescandesc, snapshot); - tablescandesc - }; - - unsafe { - pgrx::pg_sys::shm_toc_insert((*pcxt).toc, 0xA000000000000001, vchordrqshared.cast()); - pgrx::pg_sys::shm_toc_insert((*pcxt).toc, 0xA000000000000002, tablescandesc.cast()); - } - - unsafe { - pgrx::pg_sys::LaunchParallelWorkers(pcxt); - } - - let nworkers_launched = unsafe { (*pcxt).nworkers_launched }; - - unsafe { - if nworkers_launched == 0 { - pgrx::pg_sys::WaitForParallelWorkersToFinish(pcxt); - if is_mvcc_snapshot(snapshot) { - pgrx::pg_sys::UnregisterSnapshot(snapshot); - } - pgrx::pg_sys::DestroyParallelContext(pcxt); - pgrx::pg_sys::ExitParallelMode(); - return None; - } - } - - Some(Self { - pcxt, - nparticipants: nworkers_launched + 1, - vchordrqshared, - tablescandesc, - snapshot, - }) - } - - pub fn wait(&self) { - unsafe { - pgrx::pg_sys::WaitForParallelWorkersToAttach(self.pcxt); - } - } -} - -impl Drop for VchordrqLeader { - fn drop(&mut self) { - if !std::thread::panicking() { - unsafe { - pgrx::pg_sys::WaitForParallelWorkersToFinish(self.pcxt); - if is_mvcc_snapshot(self.snapshot) { - pgrx::pg_sys::UnregisterSnapshot(self.snapshot); - } - pgrx::pg_sys::DestroyParallelContext(self.pcxt); - pgrx::pg_sys::ExitParallelMode(); - } - } - } -} - -#[pgrx::pg_guard] -#[unsafe(no_mangle)] -pub unsafe extern "C" fn vchordrq_parallel_build_main( - _seg: *mut pgrx::pg_sys::dsm_segment, - toc: *mut pgrx::pg_sys::shm_toc, -) { - let vchordrqshared = unsafe { - pgrx::pg_sys::shm_toc_lookup(toc, 0xA000000000000001, false).cast::() - }; - let tablescandesc = unsafe { - pgrx::pg_sys::shm_toc_lookup(toc, 0xA000000000000002, false) - .cast::() - }; - let heap_lockmode; - let index_lockmode; - if unsafe { !(*vchordrqshared).isconcurrent } { - heap_lockmode = pgrx::pg_sys::ShareLock as pgrx::pg_sys::LOCKMODE; - index_lockmode = pgrx::pg_sys::AccessExclusiveLock as pgrx::pg_sys::LOCKMODE; - } else { - heap_lockmode = pgrx::pg_sys::ShareUpdateExclusiveLock as pgrx::pg_sys::LOCKMODE; - index_lockmode = pgrx::pg_sys::RowExclusiveLock as pgrx::pg_sys::LOCKMODE; - } - let heap = unsafe { pgrx::pg_sys::table_open((*vchordrqshared).heaprelid, heap_lockmode) }; - let index = unsafe { pgrx::pg_sys::index_open((*vchordrqshared).indexrelid, index_lockmode) }; - let index_info = unsafe { pgrx::pg_sys::BuildIndexInfo(index) }; - unsafe { - (*index_info).ii_Concurrent = (*vchordrqshared).isconcurrent; - } - - unsafe { - parallel_build(index, heap, index_info, tablescandesc, vchordrqshared, None); - } - - unsafe { - pgrx::pg_sys::index_close(index, index_lockmode); - pgrx::pg_sys::table_close(heap, heap_lockmode); - } -} - -unsafe fn parallel_build( - index: *mut pgrx::pg_sys::RelationData, - heap: pgrx::pg_sys::Relation, - index_info: *mut pgrx::pg_sys::IndexInfo, - tablescandesc: *mut pgrx::pg_sys::ParallelTableScanDescData, - vchordrqshared: *mut VchordrqShared, - mut reporter: Option, -) { - #[derive(Debug, Clone)] - pub struct Heap { - heap: pgrx::pg_sys::Relation, - index: pgrx::pg_sys::Relation, - index_info: *mut pgrx::pg_sys::IndexInfo, - opfamily: Opfamily, - scan: *mut pgrx::pg_sys::TableScanDescData, - } - impl HeapRelation for Heap { - fn traverse(&self, progress: bool, callback: F) - where - F: FnMut((NonZeroU64, O::Vector)), - { - pub struct State<'a, F> { - pub this: &'a Heap, - pub callback: F, - } - #[pgrx::pg_guard] - unsafe extern "C" fn call( - _index: pgrx::pg_sys::Relation, - ctid: pgrx::pg_sys::ItemPointer, - values: *mut Datum, - is_null: *mut bool, - _tuple_is_alive: bool, - state: *mut core::ffi::c_void, - ) where - F: FnMut((NonZeroU64, O::Vector)), - { - let state = unsafe { &mut *state.cast::>() }; - let opfamily = state.this.opfamily; - let vector = unsafe { opfamily.datum_to_vector(*values.add(0), *is_null.add(0)) }; - let pointer = unsafe { ctid_to_pointer(ctid.read()) }; - if let Some(vector) = vector { - (state.callback)((pointer, O::Vector::from_owned(vector))); - } - } - let table_am = unsafe { &*(*self.heap).rd_tableam }; - let mut state = State { - this: self, - callback, - }; - unsafe { - table_am.index_build_range_scan.unwrap()( - self.heap, - self.index, - self.index_info, - true, - false, - progress, - 0, - pgrx::pg_sys::InvalidBlockNumber, - Some(call::), - (&mut state) as *mut State as *mut _, - self.scan, - ); - } - } - - fn opfamily(&self) -> Opfamily { - self.opfamily - } - } - - let index_relation = unsafe { PostgresRelation::new(index) }; - - let scan = unsafe { pgrx::pg_sys::table_beginscan_parallel(heap, tablescandesc) }; - let opfamily = unsafe { am_options::opfamily(index) }; - let heap_relation = Heap { - heap, - index, - index_info, - opfamily, - scan, - }; - match (opfamily.vector_kind(), opfamily.distance_kind()) { - (VectorKind::Vecf32, DistanceKind::L2) => { - HeapRelation::, L2>>::traverse( - &heap_relation, - true, - |(pointer, vector)| { - algorithm::insert::insert::, L2>>( - index_relation.clone(), - pointer, - vector, - ); - unsafe { - let indtuples; - { - pgrx::pg_sys::SpinLockAcquire(&raw mut (*vchordrqshared).mutex); - (*vchordrqshared).indtuples += 1; - indtuples = (*vchordrqshared).indtuples; - pgrx::pg_sys::SpinLockRelease(&raw mut (*vchordrqshared).mutex); - } - if let Some(reporter) = reporter.as_mut() { - reporter.tuples_done(indtuples); - } - } - }, - ); - } - (VectorKind::Vecf32, DistanceKind::Dot) => { - HeapRelation::, Dot>>::traverse( - &heap_relation, - true, - |(pointer, vector)| { - algorithm::insert::insert::, Dot>>( - index_relation.clone(), - pointer, - vector, - ); - unsafe { - let indtuples; - { - pgrx::pg_sys::SpinLockAcquire(&raw mut (*vchordrqshared).mutex); - (*vchordrqshared).indtuples += 1; - indtuples = (*vchordrqshared).indtuples; - pgrx::pg_sys::SpinLockRelease(&raw mut (*vchordrqshared).mutex); - } - if let Some(reporter) = reporter.as_mut() { - reporter.tuples_done(indtuples); - } - } - }, - ); - } - (VectorKind::Vecf16, DistanceKind::L2) => { - HeapRelation::, L2>>::traverse( - &heap_relation, - true, - |(pointer, vector)| { - algorithm::insert::insert::, L2>>( - index_relation.clone(), - pointer, - vector, - ); - unsafe { - let indtuples; - { - pgrx::pg_sys::SpinLockAcquire(&raw mut (*vchordrqshared).mutex); - (*vchordrqshared).indtuples += 1; - indtuples = (*vchordrqshared).indtuples; - pgrx::pg_sys::SpinLockRelease(&raw mut (*vchordrqshared).mutex); - } - if let Some(reporter) = reporter.as_mut() { - reporter.tuples_done(indtuples); - } - } - }, - ); - } - (VectorKind::Vecf16, DistanceKind::Dot) => { - HeapRelation::, Dot>>::traverse( - &heap_relation, - true, - |(pointer, vector)| { - algorithm::insert::insert::, Dot>>( - index_relation.clone(), - pointer, - vector, - ); - unsafe { - let indtuples; - { - pgrx::pg_sys::SpinLockAcquire(&raw mut (*vchordrqshared).mutex); - (*vchordrqshared).indtuples += 1; - indtuples = (*vchordrqshared).indtuples; - pgrx::pg_sys::SpinLockRelease(&raw mut (*vchordrqshared).mutex); - } - if let Some(reporter) = reporter.as_mut() { - reporter.tuples_done(indtuples); - } - } - }, - ); - } - } - unsafe { - pgrx::pg_sys::SpinLockAcquire(&raw mut (*vchordrqshared).mutex); - (*vchordrqshared).nparticipantsdone += 1; - pgrx::pg_sys::SpinLockRelease(&raw mut (*vchordrqshared).mutex); - pgrx::pg_sys::ConditionVariableSignal(&raw mut (*vchordrqshared).workersdonecv); - } -} - -#[pgrx::pg_guard] -pub unsafe extern "C" fn ambuildempty(_index: pgrx::pg_sys::Relation) { - pgrx::error!("Unlogged indexes are not supported."); -} - -#[cfg(feature = "pg13")] -#[pgrx::pg_guard] -pub unsafe extern "C" fn aminsert( - index: pgrx::pg_sys::Relation, - values: *mut Datum, - is_null: *mut bool, - heap_tid: pgrx::pg_sys::ItemPointer, - _heap: pgrx::pg_sys::Relation, - _check_unique: pgrx::pg_sys::IndexUniqueCheck::Type, - _index_info: *mut pgrx::pg_sys::IndexInfo, -) -> bool { - let opfamily = unsafe { am_options::opfamily(index) }; - let vector = unsafe { opfamily.datum_to_vector(*values.add(0), *is_null.add(0)) }; - if let Some(vector) = vector { - let pointer = ctid_to_pointer(unsafe { heap_tid.read() }); - match (opfamily.vector_kind(), opfamily.distance_kind()) { - (VectorKind::Vecf32, DistanceKind::L2) => { - algorithm::insert::insert::, L2>>( - unsafe { PostgresRelation::new(index) }, - pointer, - VectOwned::::from_owned(vector), - ) - } - (VectorKind::Vecf32, DistanceKind::Dot) => { - algorithm::insert::insert::, Dot>>( - unsafe { PostgresRelation::new(index) }, - pointer, - VectOwned::::from_owned(vector), - ) - } - (VectorKind::Vecf16, DistanceKind::L2) => { - algorithm::insert::insert::, L2>>( - unsafe { PostgresRelation::new(index) }, - pointer, - VectOwned::::from_owned(vector), - ) - } - (VectorKind::Vecf16, DistanceKind::Dot) => { - algorithm::insert::insert::, Dot>>( - unsafe { PostgresRelation::new(index) }, - pointer, - VectOwned::::from_owned(vector), - ) - } - } - } - false -} - -#[cfg(any(feature = "pg14", feature = "pg15", feature = "pg16", feature = "pg17"))] -#[pgrx::pg_guard] -pub unsafe extern "C" fn aminsert( - index: pgrx::pg_sys::Relation, - values: *mut Datum, - is_null: *mut bool, - heap_tid: pgrx::pg_sys::ItemPointer, - _heap: pgrx::pg_sys::Relation, - _check_unique: pgrx::pg_sys::IndexUniqueCheck::Type, - _index_unchanged: bool, - _index_info: *mut pgrx::pg_sys::IndexInfo, -) -> bool { - let opfamily = unsafe { am_options::opfamily(index) }; - let vector = unsafe { opfamily.datum_to_vector(*values.add(0), *is_null.add(0)) }; - if let Some(vector) = vector { - let pointer = ctid_to_pointer(unsafe { heap_tid.read() }); - match (opfamily.vector_kind(), opfamily.distance_kind()) { - (VectorKind::Vecf32, DistanceKind::L2) => { - algorithm::insert::insert::, L2>>( - unsafe { PostgresRelation::new(index) }, - pointer, - VectOwned::::from_owned(vector), - ) - } - (VectorKind::Vecf32, DistanceKind::Dot) => { - algorithm::insert::insert::, Dot>>( - unsafe { PostgresRelation::new(index) }, - pointer, - VectOwned::::from_owned(vector), - ) - } - (VectorKind::Vecf16, DistanceKind::L2) => { - algorithm::insert::insert::, L2>>( - unsafe { PostgresRelation::new(index) }, - pointer, - VectOwned::::from_owned(vector), - ) - } - (VectorKind::Vecf16, DistanceKind::Dot) => { - algorithm::insert::insert::, Dot>>( - unsafe { PostgresRelation::new(index) }, - pointer, - VectOwned::::from_owned(vector), - ) - } - } - } - false -} - -#[pgrx::pg_guard] -pub unsafe extern "C" fn ambeginscan( - index: pgrx::pg_sys::Relation, - n_keys: std::os::raw::c_int, - n_orderbys: std::os::raw::c_int, -) -> pgrx::pg_sys::IndexScanDesc { - use pgrx::memcxt::PgMemoryContexts::CurrentMemoryContext; - - let scan = unsafe { pgrx::pg_sys::RelationGetIndexScan(index, n_keys, n_orderbys) }; - unsafe { - let scanner = am_scan::scan_make(None, None, false); - (*scan).opaque = CurrentMemoryContext.leak_and_drop_on_delete(scanner).cast(); - } - scan -} - -#[pgrx::pg_guard] -pub unsafe extern "C" fn amrescan( - scan: pgrx::pg_sys::IndexScanDesc, - keys: pgrx::pg_sys::ScanKey, - _n_keys: std::os::raw::c_int, - orderbys: pgrx::pg_sys::ScanKey, - _n_orderbys: std::os::raw::c_int, -) { - unsafe { - if !keys.is_null() && (*scan).numberOfKeys > 0 { - std::ptr::copy(keys, (*scan).keyData, (*scan).numberOfKeys as _); - } - if !orderbys.is_null() && (*scan).numberOfOrderBys > 0 { - std::ptr::copy(orderbys, (*scan).orderByData, (*scan).numberOfOrderBys as _); - } - let opfamily = am_options::opfamily((*scan).indexRelation); - let (orderbys, spheres) = { - let mut orderbys = Vec::new(); - let mut spheres = Vec::new(); - if (*scan).numberOfOrderBys == 0 && (*scan).numberOfKeys == 0 { - pgrx::error!( - "vector search with no WHERE clause and no ORDER BY clause is not supported" - ); - } - for i in 0..(*scan).numberOfOrderBys { - let data = (*scan).orderByData.add(i as usize); - let value = (*data).sk_argument; - let is_null = ((*data).sk_flags & pgrx::pg_sys::SK_ISNULL as i32) != 0; - match (*data).sk_strategy { - 1 => orderbys.push(opfamily.datum_to_vector(value, is_null)), - _ => unreachable!(), - } - } - for i in 0..(*scan).numberOfKeys { - let data = (*scan).keyData.add(i as usize); - let value = (*data).sk_argument; - let is_null = ((*data).sk_flags & pgrx::pg_sys::SK_ISNULL as i32) != 0; - match (*data).sk_strategy { - 2 => spheres.push(opfamily.datum_to_sphere(value, is_null)), - _ => unreachable!(), - } - } - (orderbys, spheres) - }; - let (vector, threshold, recheck) = am_scan::scan_build(orderbys, spheres, opfamily); - let scanner = (*scan).opaque.cast::().as_mut().unwrap_unchecked(); - let scanner = std::mem::replace(scanner, am_scan::scan_make(vector, threshold, recheck)); - am_scan::scan_release(scanner); - } -} - -#[pgrx::pg_guard] -pub unsafe extern "C" fn amgettuple( - scan: pgrx::pg_sys::IndexScanDesc, - direction: pgrx::pg_sys::ScanDirection::Type, -) -> bool { - if direction != pgrx::pg_sys::ScanDirection::ForwardScanDirection { - pgrx::error!("vector search without a forward scan direction is not supported"); - } - // https://www.postgresql.org/docs/current/index-locking.html - // If heap entries referenced physical pointers are deleted before - // they are consumed by PostgreSQL, PostgreSQL will received wrong - // physical pointers: no rows or irreverent rows are referenced. - if unsafe { (*(*scan).xs_snapshot).snapshot_type } != pgrx::pg_sys::SnapshotType::SNAPSHOT_MVCC - { - pgrx::error!("scanning with a non-MVCC-compliant snapshot is not supported"); - } - let scanner = unsafe { (*scan).opaque.cast::().as_mut().unwrap_unchecked() }; - let relation = unsafe { PostgresRelation::new((*scan).indexRelation) }; - if let Some((pointer, recheck)) = am_scan::scan_next(scanner, relation) { - let ctid = pointer_to_ctid(pointer); - unsafe { - (*scan).xs_heaptid = ctid; - (*scan).xs_recheckorderby = false; - (*scan).xs_recheck = recheck; - } - true - } else { - false - } -} - -#[pgrx::pg_guard] -pub unsafe extern "C" fn amendscan(scan: pgrx::pg_sys::IndexScanDesc) { - unsafe { - let scanner = (*scan).opaque.cast::().as_mut().unwrap_unchecked(); - let scanner = std::mem::replace(scanner, am_scan::scan_make(None, None, false)); - am_scan::scan_release(scanner); - } -} - -#[pgrx::pg_guard] -pub unsafe extern "C" fn ambulkdelete( - info: *mut pgrx::pg_sys::IndexVacuumInfo, - stats: *mut pgrx::pg_sys::IndexBulkDeleteResult, - callback: pgrx::pg_sys::IndexBulkDeleteCallback, - callback_state: *mut std::os::raw::c_void, -) -> *mut pgrx::pg_sys::IndexBulkDeleteResult { - let mut stats = stats; - if stats.is_null() { - stats = unsafe { - pgrx::pg_sys::palloc0(size_of::()).cast() - }; - } - let opfamily = unsafe { am_options::opfamily((*info).index) }; - let callback = callback.unwrap(); - let callback = |p: NonZeroU64| unsafe { callback(&mut pointer_to_ctid(p), callback_state) }; - let index = unsafe { PostgresRelation::new((*info).index) }; - let delay = || unsafe { - pgrx::pg_sys::vacuum_delay_point(); - }; - match (opfamily.vector_kind(), opfamily.distance_kind()) { - (VectorKind::Vecf32, DistanceKind::L2) => { - type O = Op, L2>; - algorithm::vacuum::bulkdelete::(index.clone(), delay, callback); - } - (VectorKind::Vecf32, DistanceKind::Dot) => { - type O = Op, Dot>; - algorithm::vacuum::bulkdelete::(index.clone(), delay, callback); - } - (VectorKind::Vecf16, DistanceKind::L2) => { - type O = Op, L2>; - algorithm::vacuum::bulkdelete::(index.clone(), delay, callback); - } - (VectorKind::Vecf16, DistanceKind::Dot) => { - type O = Op, Dot>; - algorithm::vacuum::bulkdelete::(index.clone(), delay, callback); - } - } - stats -} - -#[pgrx::pg_guard] -pub unsafe extern "C" fn amvacuumcleanup( - info: *mut pgrx::pg_sys::IndexVacuumInfo, - _stats: *mut pgrx::pg_sys::IndexBulkDeleteResult, -) -> *mut pgrx::pg_sys::IndexBulkDeleteResult { - let opfamily = unsafe { am_options::opfamily((*info).index) }; - let index = unsafe { PostgresRelation::new((*info).index) }; - let delay = || unsafe { - pgrx::pg_sys::vacuum_delay_point(); - }; - match (opfamily.vector_kind(), opfamily.distance_kind()) { - (VectorKind::Vecf32, DistanceKind::L2) => { - type O = Op, L2>; - algorithm::vacuum::maintain::(index, delay); - } - (VectorKind::Vecf32, DistanceKind::Dot) => { - type O = Op, Dot>; - algorithm::vacuum::maintain::(index, delay); - } - (VectorKind::Vecf16, DistanceKind::L2) => { - type O = Op, L2>; - algorithm::vacuum::maintain::(index, delay); - } - (VectorKind::Vecf16, DistanceKind::Dot) => { - type O = Op, Dot>; - algorithm::vacuum::maintain::(index, delay); - } - } - std::ptr::null_mut() -} diff --git a/src/index/am/am_build.rs b/src/index/am/am_build.rs new file mode 100644 index 0000000..e7d6558 --- /dev/null +++ b/src/index/am/am_build.rs @@ -0,0 +1,950 @@ +use crate::datatype::typmod::Typmod; +use crate::index::am::{Reloption, ctid_to_pointer}; +use crate::index::opclass::{Opfamily, opfamily}; +use crate::index::projection::RandomProject; +use crate::index::storage::PostgresRelation; +use algorithm::operator::{Dot, L2, Op, Vector}; +use algorithm::types::*; +use half::f16; +use pgrx::pg_sys::Datum; +use rand::Rng; +use simd::Floating; +use std::num::NonZeroU64; +use std::sync::Arc; +use vector::vect::VectOwned; +use vector::{VectorBorrowed, VectorOwned}; + +#[derive(Debug, Clone)] +struct Heap { + heap_relation: pgrx::pg_sys::Relation, + index_relation: pgrx::pg_sys::Relation, + index_info: *mut pgrx::pg_sys::IndexInfo, + opfamily: Opfamily, + scan: *mut pgrx::pg_sys::TableScanDescData, +} + +impl Heap { + fn traverse(&self, progress: bool, callback: F) { + pub struct State<'a, F> { + pub this: &'a Heap, + pub callback: F, + } + #[pgrx::pg_guard] + unsafe extern "C" fn call( + _index_relation: pgrx::pg_sys::Relation, + ctid: pgrx::pg_sys::ItemPointer, + values: *mut Datum, + is_null: *mut bool, + _tuple_is_alive: bool, + state: *mut core::ffi::c_void, + ) where + F: FnMut((NonZeroU64, V)), + { + let state = unsafe { &mut *state.cast::>() }; + let opfamily = state.this.opfamily; + let vector = unsafe { opfamily.input_vector(*values.add(0), *is_null.add(0)) }; + let pointer = unsafe { ctid_to_pointer(ctid.read()) }; + if let Some(vector) = vector { + (state.callback)((pointer, V::from_owned(vector))); + } + } + let table_am = unsafe { &*(*self.heap_relation).rd_tableam }; + let mut state = State { + this: self, + callback, + }; + unsafe { + table_am.index_build_range_scan.unwrap()( + self.heap_relation, + self.index_relation, + self.index_info, + true, + false, + progress, + 0, + pgrx::pg_sys::InvalidBlockNumber, + Some(call::), + (&mut state) as *mut State as *mut _, + self.scan, + ); + } + } +} + +#[derive(Debug, Clone)] +struct PostgresReporter {} + +impl PostgresReporter { + fn tuples_total(&mut self, tuples_total: u64) { + unsafe { + pgrx::pg_sys::pgstat_progress_update_param( + pgrx::pg_sys::PROGRESS_CREATEIDX_TUPLES_TOTAL as _, + tuples_total as _, + ); + } + } + fn tuples_done(&mut self, tuples_done: u64) { + unsafe { + pgrx::pg_sys::pgstat_progress_update_param( + pgrx::pg_sys::PROGRESS_CREATEIDX_TUPLES_DONE as _, + tuples_done as _, + ); + } + } +} + +#[pgrx::pg_guard] +pub unsafe extern "C" fn ambuild( + heap_relation: pgrx::pg_sys::Relation, + index_relation: pgrx::pg_sys::Relation, + index_info: *mut pgrx::pg_sys::IndexInfo, +) -> *mut pgrx::pg_sys::IndexBuildResult { + use validator::Validate; + let (vector_options, vchordrq_options) = unsafe { options(index_relation) }; + if let Err(errors) = Validate::validate(&vector_options) { + pgrx::error!("error while validating options: {}", errors); + } + if vector_options.dims == 0 { + pgrx::error!("error while validating options: dimension cannot be 0"); + } + if vector_options.dims > 60000 { + pgrx::error!("error while validating options: dimension is too large"); + } + if let Err(errors) = Validate::validate(&vchordrq_options) { + pgrx::error!("error while validating options: {}", errors); + } + let opfamily = unsafe { opfamily(index_relation) }; + let heap = Heap { + heap_relation, + index_relation, + index_info, + opfamily, + scan: std::ptr::null_mut(), + }; + let index = unsafe { PostgresRelation::new(index_relation) }; + let mut reporter = PostgresReporter {}; + let structures = match vchordrq_options.build.clone() { + VchordrqBuildOptions::External(external_build) => { + make_external_build(vector_options.clone(), opfamily, external_build.clone()) + } + VchordrqBuildOptions::Internal(internal_build) => { + let mut tuples_total = 0_u64; + let samples = { + let mut rand = rand::thread_rng(); + let max_number_of_samples = internal_build + .lists + .last() + .unwrap() + .saturating_mul(internal_build.sampling_factor); + let mut samples = Vec::new(); + let mut number_of_samples = 0_u32; + match opfamily.vector_kind() { + VectorKind::Vecf32 => { + heap.traverse(false, |(_, vector): (_, VectOwned)| { + let vector = vector.as_borrowed(); + assert_eq!( + vector_options.dims, + vector.dims(), + "invalid vector dimensions" + ); + if number_of_samples < max_number_of_samples { + samples.push(VectOwned::::build_to_vecf32(vector)); + number_of_samples += 1; + } else { + let index = rand.gen_range(0..max_number_of_samples) as usize; + samples[index] = VectOwned::::build_to_vecf32(vector); + } + tuples_total += 1; + }); + } + VectorKind::Vecf16 => { + heap.traverse(false, |(_, vector): (_, VectOwned)| { + let vector = vector.as_borrowed(); + assert_eq!( + vector_options.dims, + vector.dims(), + "invalid vector dimensions" + ); + if number_of_samples < max_number_of_samples { + samples.push(VectOwned::::build_to_vecf32(vector)); + number_of_samples += 1; + } else { + let index = rand.gen_range(0..max_number_of_samples) as usize; + samples[index] = VectOwned::::build_to_vecf32(vector); + } + tuples_total += 1; + }); + } + } + samples + }; + reporter.tuples_total(tuples_total); + make_internal_build(vector_options.clone(), internal_build.clone(), samples) + } + }; + match (opfamily.vector_kind(), opfamily.distance_kind()) { + (VectorKind::Vecf32, DistanceKind::L2) => algorithm::build::, L2>>( + vector_options, + vchordrq_options, + index.clone(), + map_structures(structures, |x| InternalBuild::build_from_vecf32(&x)), + ), + (VectorKind::Vecf32, DistanceKind::Dot) => algorithm::build::, Dot>>( + vector_options, + vchordrq_options, + index.clone(), + map_structures(structures, |x| InternalBuild::build_from_vecf32(&x)), + ), + (VectorKind::Vecf16, DistanceKind::L2) => algorithm::build::, L2>>( + vector_options, + vchordrq_options, + index.clone(), + map_structures(structures, |x| InternalBuild::build_from_vecf32(&x)), + ), + (VectorKind::Vecf16, DistanceKind::Dot) => algorithm::build::, Dot>>( + vector_options, + vchordrq_options, + index.clone(), + map_structures(structures, |x| InternalBuild::build_from_vecf32(&x)), + ), + } + if let Some(leader) = + unsafe { VchordrqLeader::enter(heap_relation, index_relation, (*index_info).ii_Concurrent) } + { + unsafe { + parallel_build( + index_relation, + heap_relation, + index_info, + leader.tablescandesc, + leader.vchordrqshared, + Some(reporter), + ); + leader.wait(); + let nparticipants = leader.nparticipants; + loop { + pgrx::pg_sys::SpinLockAcquire(&raw mut (*leader.vchordrqshared).mutex); + if (*leader.vchordrqshared).nparticipantsdone == nparticipants { + pgrx::pg_sys::SpinLockRelease(&raw mut (*leader.vchordrqshared).mutex); + break; + } + pgrx::pg_sys::SpinLockRelease(&raw mut (*leader.vchordrqshared).mutex); + pgrx::pg_sys::ConditionVariableSleep( + &raw mut (*leader.vchordrqshared).workersdonecv, + pgrx::pg_sys::WaitEventIPC::WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN, + ); + } + pgrx::pg_sys::ConditionVariableCancelSleep(); + } + } else { + let mut indtuples = 0; + reporter.tuples_done(indtuples); + let relation = unsafe { PostgresRelation::new(index_relation) }; + match (opfamily.vector_kind(), opfamily.distance_kind()) { + (VectorKind::Vecf32, DistanceKind::L2) => { + heap.traverse(true, |(pointer, vector): (_, VectOwned)| { + algorithm::insert::, L2>>( + relation.clone(), + pointer, + RandomProject::project(vector.as_borrowed()), + ); + indtuples += 1; + reporter.tuples_done(indtuples); + }); + } + (VectorKind::Vecf32, DistanceKind::Dot) => { + heap.traverse(true, |(pointer, vector): (_, VectOwned)| { + algorithm::insert::, Dot>>( + relation.clone(), + pointer, + RandomProject::project(vector.as_borrowed()), + ); + indtuples += 1; + reporter.tuples_done(indtuples); + }); + } + (VectorKind::Vecf16, DistanceKind::L2) => { + heap.traverse(true, |(pointer, vector): (_, VectOwned)| { + algorithm::insert::, L2>>( + relation.clone(), + pointer, + RandomProject::project(vector.as_borrowed()), + ); + indtuples += 1; + reporter.tuples_done(indtuples); + }); + } + (VectorKind::Vecf16, DistanceKind::Dot) => { + heap.traverse(true, |(pointer, vector): (_, VectOwned)| { + algorithm::insert::, Dot>>( + relation.clone(), + pointer, + RandomProject::project(vector.as_borrowed()), + ); + indtuples += 1; + reporter.tuples_done(indtuples); + }); + } + } + } + let check = || { + pgrx::check_for_interrupts!(); + }; + match (opfamily.vector_kind(), opfamily.distance_kind()) { + (VectorKind::Vecf32, DistanceKind::L2) => { + algorithm::maintain::, L2>>(index, check); + } + (VectorKind::Vecf32, DistanceKind::Dot) => { + algorithm::maintain::, Dot>>(index, check); + } + (VectorKind::Vecf16, DistanceKind::L2) => { + algorithm::maintain::, L2>>(index, check); + } + (VectorKind::Vecf16, DistanceKind::Dot) => { + algorithm::maintain::, Dot>>(index, check); + } + } + unsafe { pgrx::pgbox::PgBox::::alloc0().into_pg() } +} + +struct VchordrqShared { + /* Immutable state */ + heaprelid: pgrx::pg_sys::Oid, + indexrelid: pgrx::pg_sys::Oid, + isconcurrent: bool, + + /* Worker progress */ + workersdonecv: pgrx::pg_sys::ConditionVariable, + + /* Mutex for mutable state */ + mutex: pgrx::pg_sys::slock_t, + + /* Mutable state */ + nparticipantsdone: i32, + indtuples: u64, +} + +fn is_mvcc_snapshot(snapshot: *mut pgrx::pg_sys::SnapshotData) -> bool { + matches!( + unsafe { (*snapshot).snapshot_type }, + pgrx::pg_sys::SnapshotType::SNAPSHOT_MVCC + | pgrx::pg_sys::SnapshotType::SNAPSHOT_HISTORIC_MVCC + ) +} + +struct VchordrqLeader { + pcxt: *mut pgrx::pg_sys::ParallelContext, + nparticipants: i32, + vchordrqshared: *mut VchordrqShared, + tablescandesc: *mut pgrx::pg_sys::ParallelTableScanDescData, + snapshot: pgrx::pg_sys::Snapshot, +} + +impl VchordrqLeader { + pub unsafe fn enter( + heap_relation: pgrx::pg_sys::Relation, + index_relation: pgrx::pg_sys::Relation, + isconcurrent: bool, + ) -> Option { + unsafe fn compute_parallel_workers( + heap_relation: pgrx::pg_sys::Relation, + index_relation: pgrx::pg_sys::Relation, + ) -> i32 { + unsafe { + if pgrx::pg_sys::plan_create_index_workers( + (*heap_relation).rd_id, + (*index_relation).rd_id, + ) == 0 + { + return 0; + } + if !(*heap_relation).rd_options.is_null() { + let std_options = (*heap_relation) + .rd_options + .cast::(); + std::cmp::min( + (*std_options).parallel_workers, + pgrx::pg_sys::max_parallel_maintenance_workers, + ) + } else { + pgrx::pg_sys::max_parallel_maintenance_workers + } + } + } + + let request = unsafe { compute_parallel_workers(heap_relation, index_relation) }; + if request <= 0 { + return None; + } + + unsafe { + pgrx::pg_sys::EnterParallelMode(); + } + let pcxt = unsafe { + pgrx::pg_sys::CreateParallelContext( + c"vchord".as_ptr(), + c"vchordrq_parallel_build_main".as_ptr(), + request, + ) + }; + + let snapshot = if isconcurrent { + unsafe { pgrx::pg_sys::RegisterSnapshot(pgrx::pg_sys::GetTransactionSnapshot()) } + } else { + &raw mut pgrx::pg_sys::SnapshotAnyData + }; + + fn estimate_chunk(e: &mut pgrx::pg_sys::shm_toc_estimator, x: usize) { + e.space_for_chunks += x.next_multiple_of(pgrx::pg_sys::ALIGNOF_BUFFER as _); + } + fn estimate_keys(e: &mut pgrx::pg_sys::shm_toc_estimator, x: usize) { + e.number_of_keys += x; + } + let est_tablescandesc = + unsafe { pgrx::pg_sys::table_parallelscan_estimate(heap_relation, snapshot) }; + unsafe { + estimate_chunk(&mut (*pcxt).estimator, size_of::()); + estimate_keys(&mut (*pcxt).estimator, 1); + estimate_chunk(&mut (*pcxt).estimator, est_tablescandesc); + estimate_keys(&mut (*pcxt).estimator, 1); + } + + unsafe { + pgrx::pg_sys::InitializeParallelDSM(pcxt); + if (*pcxt).seg.is_null() { + if is_mvcc_snapshot(snapshot) { + pgrx::pg_sys::UnregisterSnapshot(snapshot); + } + pgrx::pg_sys::DestroyParallelContext(pcxt); + pgrx::pg_sys::ExitParallelMode(); + return None; + } + } + + let vchordrqshared = unsafe { + let vchordrqshared = + pgrx::pg_sys::shm_toc_allocate((*pcxt).toc, size_of::()) + .cast::(); + vchordrqshared.write(VchordrqShared { + heaprelid: (*heap_relation).rd_id, + indexrelid: (*index_relation).rd_id, + isconcurrent, + workersdonecv: std::mem::zeroed(), + mutex: std::mem::zeroed(), + nparticipantsdone: 0, + indtuples: 0, + }); + pgrx::pg_sys::ConditionVariableInit(&raw mut (*vchordrqshared).workersdonecv); + pgrx::pg_sys::SpinLockInit(&raw mut (*vchordrqshared).mutex); + vchordrqshared + }; + + let tablescandesc = unsafe { + let tablescandesc = pgrx::pg_sys::shm_toc_allocate((*pcxt).toc, est_tablescandesc) + .cast::(); + pgrx::pg_sys::table_parallelscan_initialize(heap_relation, tablescandesc, snapshot); + tablescandesc + }; + + unsafe { + pgrx::pg_sys::shm_toc_insert((*pcxt).toc, 0xA000000000000001, vchordrqshared.cast()); + pgrx::pg_sys::shm_toc_insert((*pcxt).toc, 0xA000000000000002, tablescandesc.cast()); + } + + unsafe { + pgrx::pg_sys::LaunchParallelWorkers(pcxt); + } + + let nworkers_launched = unsafe { (*pcxt).nworkers_launched }; + + unsafe { + if nworkers_launched == 0 { + pgrx::pg_sys::WaitForParallelWorkersToFinish(pcxt); + if is_mvcc_snapshot(snapshot) { + pgrx::pg_sys::UnregisterSnapshot(snapshot); + } + pgrx::pg_sys::DestroyParallelContext(pcxt); + pgrx::pg_sys::ExitParallelMode(); + return None; + } + } + + Some(Self { + pcxt, + nparticipants: nworkers_launched + 1, + vchordrqshared, + tablescandesc, + snapshot, + }) + } + + pub fn wait(&self) { + unsafe { + pgrx::pg_sys::WaitForParallelWorkersToAttach(self.pcxt); + } + } +} + +impl Drop for VchordrqLeader { + fn drop(&mut self) { + if !std::thread::panicking() { + unsafe { + pgrx::pg_sys::WaitForParallelWorkersToFinish(self.pcxt); + if is_mvcc_snapshot(self.snapshot) { + pgrx::pg_sys::UnregisterSnapshot(self.snapshot); + } + pgrx::pg_sys::DestroyParallelContext(self.pcxt); + pgrx::pg_sys::ExitParallelMode(); + } + } + } +} + +#[pgrx::pg_guard] +#[unsafe(no_mangle)] +pub unsafe extern "C" fn vchordrq_parallel_build_main( + _seg: *mut pgrx::pg_sys::dsm_segment, + toc: *mut pgrx::pg_sys::shm_toc, +) { + let vchordrqshared = unsafe { + pgrx::pg_sys::shm_toc_lookup(toc, 0xA000000000000001, false).cast::() + }; + let tablescandesc = unsafe { + pgrx::pg_sys::shm_toc_lookup(toc, 0xA000000000000002, false) + .cast::() + }; + let heap_lockmode; + let index_lockmode; + if unsafe { !(*vchordrqshared).isconcurrent } { + heap_lockmode = pgrx::pg_sys::ShareLock as pgrx::pg_sys::LOCKMODE; + index_lockmode = pgrx::pg_sys::AccessExclusiveLock as pgrx::pg_sys::LOCKMODE; + } else { + heap_lockmode = pgrx::pg_sys::ShareUpdateExclusiveLock as pgrx::pg_sys::LOCKMODE; + index_lockmode = pgrx::pg_sys::RowExclusiveLock as pgrx::pg_sys::LOCKMODE; + } + let heap = unsafe { pgrx::pg_sys::table_open((*vchordrqshared).heaprelid, heap_lockmode) }; + let index = unsafe { pgrx::pg_sys::index_open((*vchordrqshared).indexrelid, index_lockmode) }; + let index_info = unsafe { pgrx::pg_sys::BuildIndexInfo(index) }; + unsafe { + (*index_info).ii_Concurrent = (*vchordrqshared).isconcurrent; + } + + unsafe { + parallel_build(index, heap, index_info, tablescandesc, vchordrqshared, None); + } + + unsafe { + pgrx::pg_sys::index_close(index, index_lockmode); + pgrx::pg_sys::table_close(heap, heap_lockmode); + } +} + +unsafe fn parallel_build( + index_relation: pgrx::pg_sys::Relation, + heap_relation: pgrx::pg_sys::Relation, + index_info: *mut pgrx::pg_sys::IndexInfo, + tablescandesc: *mut pgrx::pg_sys::ParallelTableScanDescData, + vchordrqshared: *mut VchordrqShared, + mut reporter: Option, +) { + let index = unsafe { PostgresRelation::new(index_relation) }; + + let scan = unsafe { pgrx::pg_sys::table_beginscan_parallel(heap_relation, tablescandesc) }; + let opfamily = unsafe { opfamily(index_relation) }; + let heap = Heap { + heap_relation, + index_relation, + index_info, + opfamily, + scan, + }; + match (opfamily.vector_kind(), opfamily.distance_kind()) { + (VectorKind::Vecf32, DistanceKind::L2) => { + heap.traverse(true, |(pointer, vector): (_, VectOwned)| { + algorithm::insert::, L2>>( + index.clone(), + pointer, + RandomProject::project(vector.as_borrowed()), + ); + unsafe { + let indtuples; + { + pgrx::pg_sys::SpinLockAcquire(&raw mut (*vchordrqshared).mutex); + (*vchordrqshared).indtuples += 1; + indtuples = (*vchordrqshared).indtuples; + pgrx::pg_sys::SpinLockRelease(&raw mut (*vchordrqshared).mutex); + } + if let Some(reporter) = reporter.as_mut() { + reporter.tuples_done(indtuples); + } + } + }); + } + (VectorKind::Vecf32, DistanceKind::Dot) => { + heap.traverse(true, |(pointer, vector): (_, VectOwned)| { + algorithm::insert::, Dot>>( + index.clone(), + pointer, + RandomProject::project(vector.as_borrowed()), + ); + unsafe { + let indtuples; + { + pgrx::pg_sys::SpinLockAcquire(&raw mut (*vchordrqshared).mutex); + (*vchordrqshared).indtuples += 1; + indtuples = (*vchordrqshared).indtuples; + pgrx::pg_sys::SpinLockRelease(&raw mut (*vchordrqshared).mutex); + } + if let Some(reporter) = reporter.as_mut() { + reporter.tuples_done(indtuples); + } + } + }); + } + (VectorKind::Vecf16, DistanceKind::L2) => { + heap.traverse(true, |(pointer, vector): (_, VectOwned)| { + algorithm::insert::, L2>>( + index.clone(), + pointer, + RandomProject::project(vector.as_borrowed()), + ); + unsafe { + let indtuples; + { + pgrx::pg_sys::SpinLockAcquire(&raw mut (*vchordrqshared).mutex); + (*vchordrqshared).indtuples += 1; + indtuples = (*vchordrqshared).indtuples; + pgrx::pg_sys::SpinLockRelease(&raw mut (*vchordrqshared).mutex); + } + if let Some(reporter) = reporter.as_mut() { + reporter.tuples_done(indtuples); + } + } + }); + } + (VectorKind::Vecf16, DistanceKind::Dot) => { + heap.traverse(true, |(pointer, vector): (_, VectOwned)| { + algorithm::insert::, Dot>>( + index.clone(), + pointer, + RandomProject::project(vector.as_borrowed()), + ); + unsafe { + let indtuples; + { + pgrx::pg_sys::SpinLockAcquire(&raw mut (*vchordrqshared).mutex); + (*vchordrqshared).indtuples += 1; + indtuples = (*vchordrqshared).indtuples; + pgrx::pg_sys::SpinLockRelease(&raw mut (*vchordrqshared).mutex); + } + if let Some(reporter) = reporter.as_mut() { + reporter.tuples_done(indtuples); + } + } + }); + } + } + unsafe { + pgrx::pg_sys::SpinLockAcquire(&raw mut (*vchordrqshared).mutex); + (*vchordrqshared).nparticipantsdone += 1; + pgrx::pg_sys::SpinLockRelease(&raw mut (*vchordrqshared).mutex); + pgrx::pg_sys::ConditionVariableSignal(&raw mut (*vchordrqshared).workersdonecv); + } +} + +#[pgrx::pg_guard] +pub unsafe extern "C" fn ambuildempty(_index_relation: pgrx::pg_sys::Relation) { + pgrx::error!("Unlogged indexes are not supported."); +} + +unsafe fn options( + index_relation: pgrx::pg_sys::Relation, +) -> (VectorOptions, VchordrqIndexingOptions) { + let att = unsafe { &mut *(*index_relation).rd_att }; + let atts = unsafe { att.attrs.as_slice(att.natts as _) }; + if atts.is_empty() { + pgrx::error!("indexing on no columns is not supported"); + } + if atts.len() != 1 { + pgrx::error!("multicolumn index is not supported"); + } + // get dims + let typmod = Typmod::parse_from_i32(atts[0].type_mod()).unwrap(); + let dims = if let Some(dims) = typmod.dims() { + dims.get() + } else { + pgrx::error!( + "Dimensions type modifier of a vector column is needed for building the index." + ); + }; + // get v, d + let opfamily = unsafe { opfamily(index_relation) }; + let vector = VectorOptions { + dims, + v: opfamily.vector_kind(), + d: opfamily.distance_kind(), + }; + // get indexing, segment, optimizing + let rabitq = 'rabitq: { + let reloption = unsafe { (*index_relation).rd_options as *const Reloption }; + if reloption.is_null() || unsafe { (*reloption).options == 0 } { + break 'rabitq Default::default(); + } + let s = unsafe { Reloption::options(reloption) }.to_string_lossy(); + match toml::from_str::(&s) { + Ok(p) => p, + Err(e) => pgrx::error!("failed to parse options: {}", e), + } + }; + (vector, rabitq) +} + +pub fn make_internal_build( + vector_options: VectorOptions, + internal_build: VchordrqInternalBuildOptions, + mut samples: Vec>, +) -> Vec>> { + use std::iter::once; + for sample in samples.iter_mut() { + *sample = crate::index::projection::project(sample); + } + let mut result = Vec::>>::new(); + for w in internal_build.lists.iter().rev().copied().chain(once(1)) { + let means = k_means::RayonParallelism::scoped( + internal_build.build_threads as _, + Arc::new(|| { + pgrx::check_for_interrupts!(); + }), + |parallelism| { + k_means::k_means( + parallelism, + w as usize, + vector_options.dims as usize, + if let Some(structure) = result.last() { + &structure.means + } else { + &samples + }, + internal_build.spherical_centroids, + 10, + ) + }, + ) + .expect("failed to create thread pool"); + if let Some(structure) = result.last() { + let mut children = vec![Vec::new(); means.len()]; + for i in 0..structure.len() as u32 { + let target = k_means::k_means_lookup(&structure.means[i as usize], &means); + children[target].push(i); + } + let (means, children) = std::iter::zip(means, children) + .filter(|(_, x)| !x.is_empty()) + .unzip::<_, _, Vec<_>, Vec<_>>(); + result.push(Structure { means, children }); + } else { + let children = vec![Vec::new(); means.len()]; + result.push(Structure { means, children }); + } + } + result +} + +pub fn make_external_build( + vector_options: VectorOptions, + _opfamily: Opfamily, + external_build: VchordrqExternalBuildOptions, +) -> Vec>> { + use std::collections::BTreeMap; + let VchordrqExternalBuildOptions { table } = external_build; + let mut parents = BTreeMap::new(); + let mut vectors = BTreeMap::new(); + pgrx::spi::Spi::connect(|client| { + use crate::datatype::memory_vector::VectorOutput; + use pgrx::pg_sys::panic::ErrorReportable; + use vector::VectorBorrowed; + let schema_query = "SELECT n.nspname::TEXT + FROM pg_catalog.pg_extension e + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = e.extnamespace + WHERE e.extname = 'vector';"; + let pgvector_schema: String = client + .select(schema_query, None, None) + .unwrap_or_report() + .first() + .get_by_name("nspname") + .expect("external build: cannot get schema of pgvector") + .expect("external build: cannot get schema of pgvector"); + let dump_query = + format!("SELECT id, parent, vector::{pgvector_schema}.vector FROM {table};"); + let centroids = client.select(&dump_query, None, None).unwrap_or_report(); + for row in centroids { + let id: Option = row.get_by_name("id").unwrap(); + let parent: Option = row.get_by_name("parent").unwrap(); + let vector: Option = row.get_by_name("vector").unwrap(); + let id = id.expect("external build: id could not be NULL"); + let vector = vector.expect("external build: vector could not be NULL"); + let pop = parents.insert(id, parent); + if pop.is_some() { + pgrx::error!( + "external build: there are at least two lines have same id, id = {id}" + ); + } + if vector_options.dims != vector.as_borrowed().dims() { + pgrx::error!("external build: incorrect dimension, id = {id}"); + } + vectors.insert( + id, + crate::index::projection::project(vector.as_borrowed().slice()), + ); + } + }); + if parents.len() >= 2 && parents.values().all(|x| x.is_none()) { + // if there are more than one vertexs and no edges, + // assume there is an implicit root + let n = parents.len(); + let mut result = Vec::new(); + result.push(Structure { + means: vectors.values().cloned().collect::>(), + children: vec![Vec::new(); n], + }); + result.push(Structure { + means: vec![{ + // compute the vector on root, without normalizing it + let mut sum = vec![0.0f32; vector_options.dims as _]; + for vector in vectors.values() { + f32::vector_add_inplace(&mut sum, vector); + } + f32::vector_mul_scalar_inplace(&mut sum, 1.0 / n as f32); + sum + }], + children: vec![(0..n as u32).collect()], + }); + return result; + } + let mut children = parents + .keys() + .map(|x| (*x, Vec::new())) + .collect::>(); + let mut root = None; + for (&id, &parent) in parents.iter() { + if let Some(parent) = parent { + if let Some(parent) = children.get_mut(&parent) { + parent.push(id); + } else { + pgrx::error!("external build: parent does not exist, id = {id}, parent = {parent}"); + } + } else { + if let Some(root) = root { + pgrx::error!("external build: two root, id = {root}, id = {id}"); + } else { + root = Some(id); + } + } + } + let Some(root) = root else { + pgrx::error!("external build: there are no root"); + }; + let mut heights = BTreeMap::<_, _>::new(); + fn dfs_for_heights( + heights: &mut BTreeMap>, + children: &BTreeMap>, + u: i32, + ) { + if heights.contains_key(&u) { + pgrx::error!("external build: detect a cycle, id = {u}"); + } + heights.insert(u, None); + let mut height = None; + for &v in children[&u].iter() { + dfs_for_heights(heights, children, v); + let new = heights[&v].unwrap() + 1; + if let Some(height) = height { + if height != new { + pgrx::error!("external build: two heights, id = {u}"); + } + } else { + height = Some(new); + } + } + if height.is_none() { + height = Some(1); + } + heights.insert(u, height); + } + dfs_for_heights(&mut heights, &children, root); + let heights = heights + .into_iter() + .map(|(k, v)| (k, v.expect("not a connected graph"))) + .collect::>(); + if !(1..=8).contains(&(heights[&root] - 1)) { + pgrx::error!( + "external build: unexpected tree height, height = {}", + heights[&root] + ); + } + let mut cursors = vec![0_u32; 1 + heights[&root] as usize]; + let mut labels = BTreeMap::new(); + for id in parents.keys().copied() { + let height = heights[&id]; + let cursor = cursors[height as usize]; + labels.insert(id, (height, cursor)); + cursors[height as usize] += 1; + } + fn extract( + height: u32, + labels: &BTreeMap, + vectors: &BTreeMap>, + children: &BTreeMap>, + ) -> (Vec>, Vec>) { + labels + .iter() + .filter(|(_, (h, _))| *h == height) + .map(|(id, _)| { + ( + vectors[id].clone(), + children[id].iter().map(|id| labels[id].1).collect(), + ) + }) + .unzip() + } + let mut result = Vec::new(); + for height in 1..=heights[&root] { + let (means, children) = extract(height, &labels, &vectors, &children); + result.push(Structure { means, children }); + } + result +} + +pub fn map_structures(x: Vec>, f: impl Fn(T) -> U + Copy) -> Vec> { + x.into_iter() + .map(|Structure { means, children }| Structure { + means: means.into_iter().map(f).collect(), + children, + }) + .collect() +} + +pub trait InternalBuild: VectorOwned { + fn build_to_vecf32(vector: Self::Borrowed<'_>) -> Vec; + + fn build_from_vecf32(x: &[f32]) -> Self; +} + +impl InternalBuild for VectOwned { + fn build_to_vecf32(vector: Self::Borrowed<'_>) -> Vec { + vector.slice().to_vec() + } + + fn build_from_vecf32(x: &[f32]) -> Self { + Self::new(x.to_vec()) + } +} + +impl InternalBuild for VectOwned { + fn build_to_vecf32(vector: Self::Borrowed<'_>) -> Vec { + f16::vector_to_f32(vector.slice()) + } + + fn build_from_vecf32(x: &[f32]) -> Self { + Self::new(f16::vector_from_f32(x)) + } +} diff --git a/src/index/am/am_scan.rs b/src/index/am/am_scan.rs new file mode 100644 index 0000000..3f0c624 --- /dev/null +++ b/src/index/am/am_scan.rs @@ -0,0 +1,285 @@ +use crate::index::am::pointer_to_ctid; +use crate::index::gucs::{epsilon, max_scan_tuples, probes}; +use crate::index::opclass::{Opfamily, Sphere, opfamily}; +use crate::index::projection::RandomProject; +use crate::index::storage::PostgresRelation; +use algorithm::operator::{Dot, L2, Op, Vector}; +use algorithm::types::*; +use half::f16; +use std::num::NonZeroU64; +use vector::VectorOwned; +use vector::vect::VectOwned; + +#[pgrx::pg_guard] +pub unsafe extern "C" fn ambeginscan( + index_relation: pgrx::pg_sys::Relation, + n_keys: std::os::raw::c_int, + n_orderbys: std::os::raw::c_int, +) -> pgrx::pg_sys::IndexScanDesc { + use pgrx::memcxt::PgMemoryContexts::CurrentMemoryContext; + + let scan = unsafe { pgrx::pg_sys::RelationGetIndexScan(index_relation, n_keys, n_orderbys) }; + unsafe { + let scanner = Scanner { + opfamily: opfamily(index_relation), + scanning: Scanning::Empty {}, + }; + (*scan).opaque = CurrentMemoryContext.leak_and_drop_on_delete(scanner).cast(); + } + scan +} + +#[pgrx::pg_guard] +pub unsafe extern "C" fn amrescan( + scan: pgrx::pg_sys::IndexScanDesc, + keys: pgrx::pg_sys::ScanKey, + _n_keys: std::os::raw::c_int, + orderbys: pgrx::pg_sys::ScanKey, + _n_orderbys: std::os::raw::c_int, +) { + unsafe { + if !keys.is_null() && (*scan).numberOfKeys > 0 { + std::ptr::copy(keys, (*scan).keyData, (*scan).numberOfKeys as _); + } + if !orderbys.is_null() && (*scan).numberOfOrderBys > 0 { + std::ptr::copy(orderbys, (*scan).orderByData, (*scan).numberOfOrderBys as _); + } + let opfamily = opfamily((*scan).indexRelation); + let (orderbys, spheres) = { + let mut orderbys = Vec::new(); + let mut spheres = Vec::new(); + if (*scan).numberOfOrderBys == 0 && (*scan).numberOfKeys == 0 { + pgrx::error!( + "vector search with no WHERE clause and no ORDER BY clause is not supported" + ); + } + for i in 0..(*scan).numberOfOrderBys { + let data = (*scan).orderByData.add(i as usize); + let value = (*data).sk_argument; + let is_null = ((*data).sk_flags & pgrx::pg_sys::SK_ISNULL as i32) != 0; + match (*data).sk_strategy { + 1 => orderbys.push(opfamily.input_vector(value, is_null)), + _ => unreachable!(), + } + } + for i in 0..(*scan).numberOfKeys { + let data = (*scan).keyData.add(i as usize); + let value = (*data).sk_argument; + let is_null = ((*data).sk_flags & pgrx::pg_sys::SK_ISNULL as i32) != 0; + match (*data).sk_strategy { + 2 => spheres.push(opfamily.input_sphere(value, is_null)), + _ => unreachable!(), + } + } + (orderbys, spheres) + }; + let (vector, threshold, recheck) = scanner_build(orderbys, spheres); + let scanner = &mut *(*scan).opaque.cast::(); + scanner.scanning = Scanning::Initial { + vector, + threshold, + recheck, + }; + } +} + +#[pgrx::pg_guard] +pub unsafe extern "C" fn amgettuple( + scan: pgrx::pg_sys::IndexScanDesc, + direction: pgrx::pg_sys::ScanDirection::Type, +) -> bool { + if direction != pgrx::pg_sys::ScanDirection::ForwardScanDirection { + pgrx::error!("vector search without a forward scan direction is not supported"); + } + // https://www.postgresql.org/docs/current/index-locking.html + // If heap entries referenced physical pointers are deleted before + // they are consumed by PostgreSQL, PostgreSQL will received wrong + // physical pointers: no rows or irreverent rows are referenced. + if unsafe { (*(*scan).xs_snapshot).snapshot_type } != pgrx::pg_sys::SnapshotType::SNAPSHOT_MVCC + { + pgrx::error!("scanning with a non-MVCC-compliant snapshot is not supported"); + } + let scanner = unsafe { (*scan).opaque.cast::().as_mut().unwrap_unchecked() }; + let relation = unsafe { PostgresRelation::new((*scan).indexRelation) }; + if let Some((pointer, recheck)) = scanner_next(scanner, relation) { + let ctid = pointer_to_ctid(pointer); + unsafe { + (*scan).xs_heaptid = ctid; + (*scan).xs_recheckorderby = false; + (*scan).xs_recheck = recheck; + } + true + } else { + false + } +} + +#[pgrx::pg_guard] +pub unsafe extern "C" fn amendscan(scan: pgrx::pg_sys::IndexScanDesc) { + let scanner = unsafe { &mut *(*scan).opaque.cast::() }; + scanner.scanning = Scanning::Empty {}; +} + +struct Scanner { + opfamily: Opfamily, + scanning: Scanning, +} + +enum Scanning { + Initial { + vector: OwnedVector, + threshold: Option, + recheck: bool, + }, + Vbase { + vbase: Box>, + recheck: bool, + }, + Empty {}, +} + +fn scanner_build( + orderbys: Vec>, + spheres: Vec>>, +) -> (OwnedVector, Option, bool) { + let mut vector = None; + let mut threshold = None; + let mut recheck = false; + for orderby_vector in orderbys.into_iter().flatten() { + if vector.is_none() { + vector = Some(orderby_vector); + } else { + pgrx::error!("vector search with multiple vectors is not supported"); + } + } + for Sphere { center, radius } in spheres.into_iter().flatten() { + if vector.is_none() { + (vector, threshold) = (Some(center), Some(radius)); + } else { + recheck = true; + } + } + let Some(vector) = vector else { + pgrx::error!("vector search without vectors"); + }; + (vector, threshold, recheck) +} + +fn scanner_next(scanner: &mut Scanner, relation: PostgresRelation) -> Option<(NonZeroU64, bool)> { + if let Scanning::Initial { + vector, + threshold, + recheck, + } = &scanner.scanning + { + let opfamily = scanner.opfamily; + let vector = vector.clone(); + let threshold = *threshold; + let recheck = *recheck; + let max_scan_tuples = max_scan_tuples(); + let probes = probes(); + let epsilon = epsilon(); + scanner.scanning = Scanning::Vbase { + vbase: match (opfamily.vector_kind(), opfamily.distance_kind()) { + (VectorKind::Vecf32, DistanceKind::L2) => { + let vector = RandomProject::project( + VectOwned::::from_owned(vector.clone()).as_borrowed(), + ); + let vbase = algorithm::search::, L2>>( + relation, vector, probes, epsilon, + ) + .map(move |(distance, payload)| (opfamily.output(distance), payload)); + match (max_scan_tuples, threshold) { + (None, None) => { + Box::new(vbase.fuse()) as Box> + } + (None, Some(threshold)) => { + Box::new(vbase.take_while(move |(x, _)| *x < threshold)) + } + (Some(max_scan_tuples), None) => Box::new(vbase.take(max_scan_tuples as _)), + (Some(max_scan_tuples), Some(threshold)) => Box::new( + vbase + .take_while(move |(x, _)| *x < threshold) + .take(max_scan_tuples as _), + ), + } + } + (VectorKind::Vecf32, DistanceKind::Dot) => { + let vector = RandomProject::project( + VectOwned::::from_owned(vector.clone()).as_borrowed(), + ); + let vbase = algorithm::search::, Dot>>( + relation, vector, probes, epsilon, + ) + .map(move |(distance, payload)| (opfamily.output(distance), payload)); + match (max_scan_tuples, threshold) { + (None, None) => { + Box::new(vbase) as Box> + } + (None, Some(threshold)) => { + Box::new(vbase.take_while(move |(x, _)| *x < threshold)) + } + (Some(max_scan_tuples), None) => Box::new(vbase.take(max_scan_tuples as _)), + (Some(max_scan_tuples), Some(threshold)) => Box::new( + vbase + .take_while(move |(x, _)| *x < threshold) + .take(max_scan_tuples as _), + ), + } + } + (VectorKind::Vecf16, DistanceKind::L2) => { + let vector = RandomProject::project( + VectOwned::::from_owned(vector.clone()).as_borrowed(), + ); + let vbase = algorithm::search::, L2>>( + relation, vector, probes, epsilon, + ) + .map(move |(distance, payload)| (opfamily.output(distance), payload)); + match (max_scan_tuples, threshold) { + (None, None) => { + Box::new(vbase) as Box> + } + (None, Some(threshold)) => { + Box::new(vbase.take_while(move |(x, _)| *x < threshold)) + } + (Some(max_scan_tuples), None) => Box::new(vbase.take(max_scan_tuples as _)), + (Some(max_scan_tuples), Some(threshold)) => Box::new( + vbase + .take_while(move |(x, _)| *x < threshold) + .take(max_scan_tuples as _), + ), + } + } + (VectorKind::Vecf16, DistanceKind::Dot) => { + let vector = RandomProject::project( + VectOwned::::from_owned(vector.clone()).as_borrowed(), + ); + let vbase = algorithm::search::, Dot>>( + relation, vector, probes, epsilon, + ) + .map(move |(distance, payload)| (opfamily.output(distance), payload)); + match (max_scan_tuples, threshold) { + (None, None) => { + Box::new(vbase) as Box> + } + (None, Some(threshold)) => { + Box::new(vbase.take_while(move |(x, _)| *x < threshold)) + } + (Some(max_scan_tuples), None) => Box::new(vbase.take(max_scan_tuples as _)), + (Some(max_scan_tuples), Some(threshold)) => Box::new( + vbase + .take_while(move |(x, _)| *x < threshold) + .take(max_scan_tuples as _), + ), + } + } + }, + recheck, + }; + } + match &mut scanner.scanning { + Scanning::Initial { .. } => unreachable!(), + Scanning::Vbase { vbase, recheck } => vbase.next().map(|(_, x)| (x, *recheck)), + Scanning::Empty {} => None, + } +} diff --git a/src/index/am/mod.rs b/src/index/am/mod.rs new file mode 100644 index 0000000..dcac231 --- /dev/null +++ b/src/index/am/mod.rs @@ -0,0 +1,324 @@ +pub mod am_build; +pub mod am_scan; + +use crate::index::projection::RandomProject; +use crate::index::storage::PostgresRelation; +use algorithm::operator::{Dot, L2, Op, Vector}; +use algorithm::types::*; +use half::f16; +use pgrx::datum::Internal; +use pgrx::pg_sys::Datum; +use std::ffi::CStr; +use std::num::NonZeroU64; +use std::sync::OnceLock; +use vector::VectorOwned; +use vector::vect::VectOwned; + +#[repr(C)] +struct Reloption { + vl_len_: i32, + pub options: i32, +} + +impl Reloption { + unsafe fn options<'a>(this: *const Self) -> &'a CStr { + unsafe { + let ptr = this + .cast::() + .add((&raw const (*this).options).read() as _); + CStr::from_ptr(ptr.cast()) + } + } +} + +const TABLE: &[pgrx::pg_sys::relopt_parse_elt] = &[pgrx::pg_sys::relopt_parse_elt { + optname: c"options".as_ptr(), + opttype: pgrx::pg_sys::relopt_type::RELOPT_TYPE_STRING, + offset: std::mem::offset_of!(Reloption, options) as i32, +}]; + +static RELOPT_KIND: OnceLock = OnceLock::new(); + +pub fn init() { + RELOPT_KIND.get_or_init(|| { + let kind; + unsafe { + kind = pgrx::pg_sys::add_reloption_kind(); + pgrx::pg_sys::add_string_reloption( + kind, + c"options".as_ptr(), + c"Vector index options, represented as a TOML string.".as_ptr(), + c"".as_ptr(), + None, + pgrx::pg_sys::AccessExclusiveLock as pgrx::pg_sys::LOCKMODE, + ); + } + kind + }); +} + +#[pgrx::pg_extern(sql = "")] +fn _vchordrq_amhandler(_fcinfo: pgrx::pg_sys::FunctionCallInfo) -> Internal { + type T = pgrx::pg_sys::IndexAmRoutine; + unsafe { + let index_am_routine = pgrx::pg_sys::palloc0(size_of::()) as *mut T; + index_am_routine.write(AM_HANDLER); + Internal::from(Some(Datum::from(index_am_routine))) + } +} + +const AM_HANDLER: pgrx::pg_sys::IndexAmRoutine = const { + let mut am_routine = unsafe { std::mem::zeroed::() }; + + am_routine.type_ = pgrx::pg_sys::NodeTag::T_IndexAmRoutine; + + am_routine.amsupport = 1; + am_routine.amcanorderbyop = true; + + #[cfg(feature = "pg17")] + { + am_routine.amcanbuildparallel = true; + } + + // Index access methods that set `amoptionalkey` to `false` + // must index all tuples, even if the first column is `NULL`. + // However, PostgreSQL does not generate a path if there is no + // index clauses, even if there is a `ORDER BY` clause. + // So we have to set it to `true` and set costs of every path + // for vector index scans without `ORDER BY` clauses a large number + // and throw errors if someone really wants such a path. + am_routine.amoptionalkey = true; + + am_routine.amvalidate = Some(amvalidate); + am_routine.amoptions = Some(amoptions); + am_routine.amcostestimate = Some(amcostestimate); + + am_routine.ambuild = Some(am_build::ambuild); + am_routine.ambuildempty = Some(am_build::ambuildempty); + am_routine.aminsert = Some(aminsert); + am_routine.ambulkdelete = Some(ambulkdelete); + am_routine.amvacuumcleanup = Some(amvacuumcleanup); + + am_routine.ambeginscan = Some(am_scan::ambeginscan); + am_routine.amrescan = Some(am_scan::amrescan); + am_routine.amgettuple = Some(am_scan::amgettuple); + am_routine.amendscan = Some(am_scan::amendscan); + + am_routine +}; + +#[pgrx::pg_guard] +pub unsafe extern "C" fn amvalidate(_opclass_oid: pgrx::pg_sys::Oid) -> bool { + true +} + +#[pgrx::pg_guard] +pub unsafe extern "C" fn amoptions(reloptions: Datum, validate: bool) -> *mut pgrx::pg_sys::bytea { + let relopt_kind = RELOPT_KIND.get().copied().expect("init is not called"); + let rdopts = unsafe { + pgrx::pg_sys::build_reloptions( + reloptions, + validate, + relopt_kind, + size_of::(), + TABLE.as_ptr(), + TABLE.len() as _, + ) + }; + rdopts as *mut pgrx::pg_sys::bytea +} + +#[pgrx::pg_guard] +pub unsafe extern "C" fn amcostestimate( + _root: *mut pgrx::pg_sys::PlannerInfo, + path: *mut pgrx::pg_sys::IndexPath, + _loop_count: f64, + index_startup_cost: *mut pgrx::pg_sys::Cost, + index_total_cost: *mut pgrx::pg_sys::Cost, + index_selectivity: *mut pgrx::pg_sys::Selectivity, + index_correlation: *mut f64, + index_pages: *mut f64, +) { + unsafe { + if (*path).indexorderbys.is_null() && (*path).indexclauses.is_null() { + *index_startup_cost = f64::MAX; + *index_total_cost = f64::MAX; + *index_selectivity = 0.0; + *index_correlation = 0.0; + *index_pages = 0.0; + return; + } + *index_startup_cost = 0.0; + *index_total_cost = 0.0; + *index_selectivity = 1.0; + *index_correlation = 1.0; + *index_pages = 0.0; + } +} + +#[cfg(feature = "pg13")] +#[pgrx::pg_guard] +pub unsafe extern "C" fn aminsert( + index_relation: pgrx::pg_sys::Relation, + values: *mut Datum, + is_null: *mut bool, + heap_tid: pgrx::pg_sys::ItemPointer, + _heap_relation: pgrx::pg_sys::Relation, + _check_unique: pgrx::pg_sys::IndexUniqueCheck::Type, + _index_info: *mut pgrx::pg_sys::IndexInfo, +) -> bool { + unsafe { aminsertinner(index_relation, values, is_null, heap_tid) } +} + +#[cfg(any(feature = "pg14", feature = "pg15", feature = "pg16", feature = "pg17"))] +#[pgrx::pg_guard] +pub unsafe extern "C" fn aminsert( + index_relation: pgrx::pg_sys::Relation, + values: *mut Datum, + is_null: *mut bool, + heap_tid: pgrx::pg_sys::ItemPointer, + _heap_relation: pgrx::pg_sys::Relation, + _check_unique: pgrx::pg_sys::IndexUniqueCheck::Type, + _index_unchanged: bool, + _index_info: *mut pgrx::pg_sys::IndexInfo, +) -> bool { + unsafe { aminsertinner(index_relation, values, is_null, heap_tid) } +} + +unsafe fn aminsertinner( + index_relation: pgrx::pg_sys::Relation, + values: *mut Datum, + is_null: *mut bool, + heap_tid: pgrx::pg_sys::ItemPointer, +) -> bool { + let opfamily = unsafe { crate::index::opclass::opfamily(index_relation) }; + let index = unsafe { PostgresRelation::new(index_relation) }; + let payload = ctid_to_pointer(unsafe { heap_tid.read() }); + let vector = unsafe { opfamily.input_vector(*values.add(0), *is_null.add(0)) }; + let Some(vector) = vector else { return false }; + match (opfamily.vector_kind(), opfamily.distance_kind()) { + (VectorKind::Vecf32, DistanceKind::L2) => algorithm::insert::, L2>>( + index, + payload, + RandomProject::project(VectOwned::::from_owned(vector).as_borrowed()), + ), + (VectorKind::Vecf32, DistanceKind::Dot) => algorithm::insert::, Dot>>( + index, + payload, + RandomProject::project(VectOwned::::from_owned(vector).as_borrowed()), + ), + (VectorKind::Vecf16, DistanceKind::L2) => algorithm::insert::, L2>>( + index, + payload, + RandomProject::project(VectOwned::::from_owned(vector).as_borrowed()), + ), + (VectorKind::Vecf16, DistanceKind::Dot) => algorithm::insert::, Dot>>( + index, + payload, + RandomProject::project(VectOwned::::from_owned(vector).as_borrowed()), + ), + } + false +} + +#[pgrx::pg_guard] +pub unsafe extern "C" fn ambulkdelete( + info: *mut pgrx::pg_sys::IndexVacuumInfo, + stats: *mut pgrx::pg_sys::IndexBulkDeleteResult, + callback: pgrx::pg_sys::IndexBulkDeleteCallback, + callback_state: *mut std::os::raw::c_void, +) -> *mut pgrx::pg_sys::IndexBulkDeleteResult { + let mut stats = stats; + if stats.is_null() { + stats = unsafe { + pgrx::pg_sys::palloc0(size_of::()).cast() + }; + } + let opfamily = unsafe { crate::index::opclass::opfamily((*info).index) }; + let index = unsafe { PostgresRelation::new((*info).index) }; + let check = || unsafe { + pgrx::pg_sys::vacuum_delay_point(); + }; + let callback = callback.expect("null function pointer"); + let callback = |p: NonZeroU64| unsafe { callback(&mut pointer_to_ctid(p), callback_state) }; + match (opfamily.vector_kind(), opfamily.distance_kind()) { + (VectorKind::Vecf32, DistanceKind::L2) => { + algorithm::bulkdelete::, L2>>(index, check, callback); + } + (VectorKind::Vecf32, DistanceKind::Dot) => { + algorithm::bulkdelete::, Dot>>(index, check, callback); + } + (VectorKind::Vecf16, DistanceKind::L2) => { + algorithm::bulkdelete::, L2>>(index, check, callback); + } + (VectorKind::Vecf16, DistanceKind::Dot) => { + algorithm::bulkdelete::, Dot>>(index, check, callback); + } + } + stats +} + +#[pgrx::pg_guard] +pub unsafe extern "C" fn amvacuumcleanup( + info: *mut pgrx::pg_sys::IndexVacuumInfo, + stats: *mut pgrx::pg_sys::IndexBulkDeleteResult, +) -> *mut pgrx::pg_sys::IndexBulkDeleteResult { + let mut stats = stats; + if stats.is_null() { + stats = unsafe { + pgrx::pg_sys::palloc0(size_of::()).cast() + }; + } + let opfamily = unsafe { crate::index::opclass::opfamily((*info).index) }; + let index = unsafe { PostgresRelation::new((*info).index) }; + let check = || unsafe { + pgrx::pg_sys::vacuum_delay_point(); + }; + match (opfamily.vector_kind(), opfamily.distance_kind()) { + (VectorKind::Vecf32, DistanceKind::L2) => { + algorithm::maintain::, L2>>(index, check); + } + (VectorKind::Vecf32, DistanceKind::Dot) => { + algorithm::maintain::, Dot>>(index, check); + } + (VectorKind::Vecf16, DistanceKind::L2) => { + algorithm::maintain::, L2>>(index, check); + } + (VectorKind::Vecf16, DistanceKind::Dot) => { + algorithm::maintain::, Dot>>(index, check); + } + } + stats +} + +const fn pointer_to_ctid(pointer: NonZeroU64) -> pgrx::pg_sys::ItemPointerData { + let value = pointer.get(); + pgrx::pg_sys::ItemPointerData { + ip_blkid: pgrx::pg_sys::BlockIdData { + bi_hi: ((value >> 32) & 0xffff) as u16, + bi_lo: ((value >> 16) & 0xffff) as u16, + }, + ip_posid: (value & 0xffff) as u16, + } +} + +const fn ctid_to_pointer(ctid: pgrx::pg_sys::ItemPointerData) -> NonZeroU64 { + let mut value = 0; + value |= (ctid.ip_blkid.bi_hi as u64) << 32; + value |= (ctid.ip_blkid.bi_lo as u64) << 16; + value |= ctid.ip_posid as u64; + NonZeroU64::new(value).expect("invalid pointer") +} + +#[test] +const fn soundness_check() { + let a = pgrx::pg_sys::ItemPointerData { + ip_blkid: pgrx::pg_sys::BlockIdData { bi_hi: 1, bi_lo: 2 }, + ip_posid: 3, + }; + let b = ctid_to_pointer(a); + let c = pointer_to_ctid(b); + assert!(a.ip_blkid.bi_hi == c.ip_blkid.bi_hi); + assert!(a.ip_blkid.bi_lo == c.ip_blkid.bi_lo); + assert!(a.ip_posid == c.ip_posid); +} diff --git a/src/index/am_options.rs b/src/index/am_options.rs deleted file mode 100644 index 06ca8e5..0000000 --- a/src/index/am_options.rs +++ /dev/null @@ -1,235 +0,0 @@ -use crate::datatype::memory_halfvec::HalfvecInput; -use crate::datatype::memory_halfvec::HalfvecOutput; -use crate::datatype::memory_vector::VectorInput; -use crate::datatype::memory_vector::VectorOutput; -use crate::datatype::typmod::Typmod; -use crate::types::{BorrowedVector, OwnedVector}; -use crate::types::{DistanceKind, VectorKind}; -use crate::types::{VchordrqIndexingOptions, VectorOptions}; -use distance::Distance; -use pgrx::datum::FromDatum; -use pgrx::heap_tuple::PgHeapTuple; -use serde::Deserialize; -use std::ffi::CStr; -use std::num::NonZero; -use vector::VectorBorrowed; - -#[derive(Copy, Clone, Debug, Default)] -#[repr(C)] -pub struct Reloption { - vl_len_: i32, - pub options: i32, -} - -impl Reloption { - pub const TAB: &'static [pgrx::pg_sys::relopt_parse_elt] = &[pgrx::pg_sys::relopt_parse_elt { - optname: c"options".as_ptr(), - opttype: pgrx::pg_sys::relopt_type::RELOPT_TYPE_STRING, - offset: std::mem::offset_of!(Reloption, options) as i32, - }]; - unsafe fn options(&self) -> &CStr { - unsafe { - let ptr = (&raw const *self) - .cast::() - .offset(self.options as _); - CStr::from_ptr(ptr) - } - } -} - -#[repr(u8)] -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub enum PgDistanceKind { - L2, - Dot, - Cos, -} - -impl PgDistanceKind { - pub fn to_distance(self) -> DistanceKind { - match self { - PgDistanceKind::L2 => DistanceKind::L2, - PgDistanceKind::Dot | PgDistanceKind::Cos => DistanceKind::Dot, - } - } -} - -fn convert_name_to_vd(name: &str) -> Option<(VectorKind, PgDistanceKind)> { - match name.strip_suffix("_ops") { - Some("vector_l2") => Some((VectorKind::Vecf32, PgDistanceKind::L2)), - Some("vector_ip") => Some((VectorKind::Vecf32, PgDistanceKind::Dot)), - Some("vector_cosine") => Some((VectorKind::Vecf32, PgDistanceKind::Cos)), - Some("halfvec_l2") => Some((VectorKind::Vecf16, PgDistanceKind::L2)), - Some("halfvec_ip") => Some((VectorKind::Vecf16, PgDistanceKind::Dot)), - Some("halfvec_cosine") => Some((VectorKind::Vecf16, PgDistanceKind::Cos)), - _ => None, - } -} - -unsafe fn convert_reloptions_to_options( - reloptions: *const pgrx::pg_sys::varlena, -) -> VchordrqIndexingOptions { - #[derive(Debug, Clone, Deserialize, Default)] - #[serde(deny_unknown_fields)] - struct Parsed { - #[serde(flatten)] - rabitq: VchordrqIndexingOptions, - } - let reloption = reloptions as *const Reloption; - if reloption.is_null() || unsafe { (*reloption).options == 0 } { - return Default::default(); - } - let s = unsafe { (*reloption).options() }.to_string_lossy(); - match toml::from_str::(&s) { - Ok(p) => p.rabitq, - Err(e) => pgrx::error!("failed to parse options: {}", e), - } -} - -pub unsafe fn options(index: pgrx::pg_sys::Relation) -> (VectorOptions, VchordrqIndexingOptions) { - let att = unsafe { &mut *(*index).rd_att }; - let atts = unsafe { att.attrs.as_slice(att.natts as _) }; - if atts.is_empty() { - pgrx::error!("indexing on no columns is not supported"); - } - if atts.len() != 1 { - pgrx::error!("multicolumn index is not supported"); - } - // get dims - let typmod = Typmod::parse_from_i32(atts[0].type_mod()).unwrap(); - let dims = if let Some(dims) = typmod.dims() { - dims.get() - } else { - pgrx::error!( - "Dimensions type modifier of a vector column is needed for building the index." - ); - }; - // get v, d - let opfamily = unsafe { opfamily(index) }; - let vector = VectorOptions { - dims, - v: opfamily.vector, - d: opfamily.distance_kind(), - }; - // get indexing, segment, optimizing - let rabitq = unsafe { convert_reloptions_to_options((*index).rd_options) }; - (vector, rabitq) -} - -#[derive(Debug, Clone, Copy)] -pub struct Opfamily { - vector: VectorKind, - pg_distance: PgDistanceKind, -} - -impl Opfamily { - pub unsafe fn datum_to_vector( - self, - datum: pgrx::pg_sys::Datum, - is_null: bool, - ) -> Option { - if is_null || datum.is_null() { - return None; - } - let vector = match self.vector { - VectorKind::Vecf32 => { - let vector = unsafe { VectorInput::from_datum(datum, false).unwrap() }; - self.preprocess(BorrowedVector::Vecf32(vector.as_borrowed())) - } - VectorKind::Vecf16 => { - let vector = unsafe { HalfvecInput::from_datum(datum, false).unwrap() }; - self.preprocess(BorrowedVector::Vecf16(vector.as_borrowed())) - } - }; - Some(vector) - } - pub unsafe fn datum_to_sphere( - self, - datum: pgrx::pg_sys::Datum, - is_null: bool, - ) -> (Option, Option) { - if is_null || datum.is_null() { - return (None, None); - } - let tuple = unsafe { PgHeapTuple::from_composite_datum(datum) }; - let center = match self.vector { - VectorKind::Vecf32 => tuple - .get_by_index::(NonZero::new(1).unwrap()) - .unwrap() - .map(|vector| self.preprocess(BorrowedVector::Vecf32(vector.as_borrowed()))), - VectorKind::Vecf16 => tuple - .get_by_index::(NonZero::new(1).unwrap()) - .unwrap() - .map(|vector| self.preprocess(BorrowedVector::Vecf16(vector.as_borrowed()))), - }; - let radius = tuple.get_by_index::(NonZero::new(2).unwrap()).unwrap(); - (center, radius) - } - pub fn preprocess(self, vector: BorrowedVector<'_>) -> OwnedVector { - use BorrowedVector as B; - use OwnedVector as O; - match (vector, self.pg_distance) { - (B::Vecf32(x), PgDistanceKind::L2) => O::Vecf32(x.own()), - (B::Vecf32(x), PgDistanceKind::Dot) => O::Vecf32(x.own()), - (B::Vecf32(x), PgDistanceKind::Cos) => O::Vecf32(x.function_normalize()), - (B::Vecf16(x), PgDistanceKind::L2) => O::Vecf16(x.own()), - (B::Vecf16(x), PgDistanceKind::Dot) => O::Vecf16(x.own()), - (B::Vecf16(x), PgDistanceKind::Cos) => O::Vecf16(x.function_normalize()), - } - } - pub fn process(self, x: Distance) -> f32 { - match self.pg_distance { - PgDistanceKind::Cos => f32::from(x) + 1.0f32, - PgDistanceKind::L2 => f32::from(x).sqrt(), - PgDistanceKind::Dot => x.into(), - } - } - pub fn distance_kind(self) -> DistanceKind { - self.pg_distance.to_distance() - } - pub fn vector_kind(self) -> VectorKind { - self.vector - } -} - -pub unsafe fn opfamily(index: pgrx::pg_sys::Relation) -> Opfamily { - use pgrx::pg_sys::Oid; - - let proc = unsafe { pgrx::pg_sys::index_getprocid(index, 1, 1) }; - - if proc == Oid::INVALID { - pgrx::error!("support function 1 is not found"); - } - - let mut flinfo = pgrx::pg_sys::FmgrInfo::default(); - unsafe { - pgrx::pg_sys::fmgr_info(proc, &mut flinfo); - } - - let fn_addr = flinfo.fn_addr.expect("null function pointer"); - - let mut fcinfo = unsafe { std::mem::zeroed::() }; - fcinfo.flinfo = &mut flinfo; - fcinfo.fncollation = pgrx::pg_sys::DEFAULT_COLLATION_OID; - fcinfo.context = std::ptr::null_mut(); - fcinfo.resultinfo = std::ptr::null_mut(); - fcinfo.isnull = true; - fcinfo.nargs = 0; - - let result_datum = unsafe { pgrx::pg_sys::ffi::pg_guard_ffi_boundary(|| fn_addr(&mut fcinfo)) }; - - let result_option = unsafe { String::from_datum(result_datum, fcinfo.isnull) }; - - let result_string = result_option.expect("null string"); - - let (vector, pg_distance) = convert_name_to_vd(&result_string).unwrap(); - - unsafe { - pgrx::pg_sys::pfree(result_datum.cast_mut_ptr()); - } - - Opfamily { - vector, - pg_distance, - } -} diff --git a/src/index/am_scan.rs b/src/index/am_scan.rs deleted file mode 100644 index 83e62f3..0000000 --- a/src/index/am_scan.rs +++ /dev/null @@ -1,186 +0,0 @@ -use super::am_options::Opfamily; -use crate::algorithm::operator::Vector; -use crate::algorithm::operator::{Dot, L2, Op}; -use crate::algorithm::scan::scan; -use crate::gucs::executing::epsilon; -use crate::gucs::executing::max_scan_tuples; -use crate::gucs::executing::probes; -use crate::postgres::PostgresRelation; -use crate::types::DistanceKind; -use crate::types::OwnedVector; -use crate::types::VectorKind; -use distance::Distance; -use half::f16; -use std::num::NonZeroU64; -use vector::vect::VectOwned; - -pub enum Scanner { - Initial { - vector: Option<(OwnedVector, Opfamily)>, - threshold: Option, - recheck: bool, - }, - Vbase { - vbase: Box>, - threshold: Option, - recheck: bool, - opfamily: Opfamily, - }, - Empty {}, -} - -pub fn scan_build( - orderbys: Vec>, - spheres: Vec<(Option, Option)>, - opfamily: Opfamily, -) -> (Option<(OwnedVector, Opfamily)>, Option, bool) { - let mut pair = None; - let mut threshold = None; - let mut recheck = false; - for orderby_vector in orderbys { - if pair.is_none() { - pair = orderby_vector; - } else if orderby_vector.is_some() { - pgrx::error!("vector search with multiple vectors is not supported"); - } - } - for (sphere_vector, sphere_threshold) in spheres { - if pair.is_none() { - pair = sphere_vector; - threshold = sphere_threshold; - } else { - recheck = true; - break; - } - } - (pair.map(|x| (x, opfamily)), threshold, recheck) -} - -pub fn scan_make( - vector: Option<(OwnedVector, Opfamily)>, - threshold: Option, - recheck: bool, -) -> Scanner { - Scanner::Initial { - vector, - threshold, - recheck, - } -} - -pub fn scan_next(scanner: &mut Scanner, relation: PostgresRelation) -> Option<(NonZeroU64, bool)> { - if let Scanner::Initial { - vector, - threshold, - recheck, - } = scanner - { - if let Some((vector, opfamily)) = vector.as_ref() { - match (opfamily.vector_kind(), opfamily.distance_kind()) { - (VectorKind::Vecf32, DistanceKind::L2) => { - let vbase = scan::, L2>>( - relation, - VectOwned::::from_owned(vector.clone()), - probes(), - epsilon(), - ); - *scanner = Scanner::Vbase { - vbase: if let Some(max_scan_tuples) = max_scan_tuples() { - Box::new(vbase.take(max_scan_tuples as usize)) - } else { - Box::new(vbase) - }, - threshold: *threshold, - recheck: *recheck, - opfamily: *opfamily, - }; - } - (VectorKind::Vecf32, DistanceKind::Dot) => { - let vbase = scan::, Dot>>( - relation, - VectOwned::::from_owned(vector.clone()), - probes(), - epsilon(), - ); - *scanner = Scanner::Vbase { - vbase: if let Some(max_scan_tuples) = max_scan_tuples() { - Box::new(vbase.take(max_scan_tuples as usize)) - } else { - Box::new(vbase) - }, - threshold: *threshold, - recheck: *recheck, - opfamily: *opfamily, - }; - } - (VectorKind::Vecf16, DistanceKind::L2) => { - let vbase = scan::, L2>>( - relation, - VectOwned::::from_owned(vector.clone()), - probes(), - epsilon(), - ); - *scanner = Scanner::Vbase { - vbase: if let Some(max_scan_tuples) = max_scan_tuples() { - Box::new(vbase.take(max_scan_tuples as usize)) - } else { - Box::new(vbase) - }, - threshold: *threshold, - recheck: *recheck, - opfamily: *opfamily, - }; - } - (VectorKind::Vecf16, DistanceKind::Dot) => { - let vbase = scan::, Dot>>( - relation, - VectOwned::::from_owned(vector.clone()), - probes(), - epsilon(), - ); - *scanner = Scanner::Vbase { - vbase: if let Some(max_scan_tuples) = max_scan_tuples() { - Box::new(vbase.take(max_scan_tuples as usize)) - } else { - Box::new(vbase) - }, - threshold: *threshold, - recheck: *recheck, - opfamily: *opfamily, - }; - } - } - } else { - *scanner = Scanner::Empty {}; - } - } - match scanner { - Scanner::Initial { .. } => unreachable!(), - Scanner::Vbase { - vbase, - threshold, - recheck, - opfamily, - } => match ( - vbase.next().map(|(d, p)| (opfamily.process(d), p)), - threshold, - ) { - (Some((_, ptr)), None) => Some((ptr, *recheck)), - (Some((distance, ptr)), Some(t)) if distance < *t => Some((ptr, *recheck)), - _ => { - let scanner = std::mem::replace(scanner, Scanner::Empty {}); - scan_release(scanner); - None - } - }, - Scanner::Empty {} => None, - } -} - -pub fn scan_release(scanner: Scanner) { - match scanner { - Scanner::Initial { .. } => {} - Scanner::Vbase { .. } => {} - Scanner::Empty {} => {} - } -} diff --git a/src/index/functions.rs b/src/index/functions.rs index 1f3b4e2..be2f963 100644 --- a/src/index/functions.rs +++ b/src/index/functions.rs @@ -1,9 +1,6 @@ -use super::am_options; -use crate::algorithm::operator::{Dot, L2, Op}; -use crate::algorithm::prewarm::prewarm; -use crate::postgres::PostgresRelation; -use crate::types::DistanceKind; -use crate::types::VectorKind; +use crate::index::storage::PostgresRelation; +use algorithm::operator::{Dot, L2, Op}; +use algorithm::types::*; use half::f16; use pgrx::pg_sys::Oid; use pgrx_catalog::{PgAm, PgClass}; @@ -24,19 +21,27 @@ fn _vchordrq_prewarm(indexrelid: Oid, height: i32) -> String { } let index = unsafe { pgrx::pg_sys::index_open(indexrelid, pgrx::pg_sys::ShareLock as _) }; let relation = unsafe { PostgresRelation::new(index) }; - let opfamily = unsafe { am_options::opfamily(index) }; + let opfamily = unsafe { crate::index::opclass::opfamily(index) }; let message = match (opfamily.vector_kind(), opfamily.distance_kind()) { (VectorKind::Vecf32, DistanceKind::L2) => { - prewarm::, L2>>(relation, height) + algorithm::prewarm::, L2>>(relation, height, || { + pgrx::check_for_interrupts!(); + }) } (VectorKind::Vecf32, DistanceKind::Dot) => { - prewarm::, Dot>>(relation, height) + algorithm::prewarm::, Dot>>(relation, height, || { + pgrx::check_for_interrupts!(); + }) } (VectorKind::Vecf16, DistanceKind::L2) => { - prewarm::, L2>>(relation, height) + algorithm::prewarm::, L2>>(relation, height, || { + pgrx::check_for_interrupts!(); + }) } (VectorKind::Vecf16, DistanceKind::Dot) => { - prewarm::, Dot>>(relation, height) + algorithm::prewarm::, Dot>>(relation, height, || { + pgrx::check_for_interrupts!(); + }) } }; unsafe { diff --git a/src/gucs/executing.rs b/src/index/gucs.rs similarity index 61% rename from src/gucs/executing.rs rename to src/index/gucs.rs index af6cce7..5e4216b 100644 --- a/src/gucs/executing.rs +++ b/src/index/gucs.rs @@ -4,8 +4,10 @@ use std::ffi::CStr; static PROBES: GucSetting> = GucSetting::>::new(Some(c"10")); static EPSILON: GucSetting = GucSetting::::new(1.9); static MAX_SCAN_TUPLES: GucSetting = GucSetting::::new(-1); +static PREWARM_DIM: GucSetting> = + GucSetting::>::new(Some(c"64,128,256,384,512,768,1024,1536")); -pub unsafe fn init() { +pub fn init() { GucRegistry::define_string_guc( "vchordrq.probes", "`probes` argument of vchordrq.", @@ -34,6 +36,20 @@ pub unsafe fn init() { GucContext::Userset, GucFlags::default(), ); + GucRegistry::define_string_guc( + "vchordrq.prewarm_dim", + "prewarm_dim when the extension is loading.", + "prewarm_dim when the extension is loading.", + &PREWARM_DIM, + GucContext::Userset, + GucFlags::default(), + ); + unsafe { + #[cfg(any(feature = "pg13", feature = "pg14"))] + pgrx::pg_sys::EmitWarningsOnPlaceholders(c"vchordrq".as_ptr()); + #[cfg(any(feature = "pg15", feature = "pg16", feature = "pg17"))] + pgrx::pg_sys::MarkGUCPrefixReserved(c"vchordrq".as_ptr()); + } } pub fn probes() -> Vec { @@ -70,3 +86,24 @@ pub fn max_scan_tuples() -> Option { let x = MAX_SCAN_TUPLES.get(); if x < 0 { None } else { Some(x as u32) } } + +pub fn prewarm_dim() -> Vec { + if let Some(prewarm_dim) = PREWARM_DIM.get() { + if let Ok(prewarm_dim) = prewarm_dim.to_str() { + let mut result = Vec::new(); + for dim in prewarm_dim.split(',') { + if let Ok(dim) = dim.trim().parse::() { + result.push(dim); + } else { + pgrx::warning!("{dim:?} is not a valid integer"); + } + } + result + } else { + pgrx::warning!("vchordrq.prewarm_dim is not a valid UTF-8 string"); + Vec::new() + } + } else { + Vec::new() + } +} diff --git a/src/index/mod.rs b/src/index/mod.rs index 5203e4f..511a4f5 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -1,12 +1,14 @@ pub mod am; -pub mod am_options; -pub mod am_scan; pub mod functions; +pub mod gucs; pub mod opclass; -pub mod utils; +pub mod projection; +pub mod storage; -pub unsafe fn init() { - unsafe { - am::init(); +pub fn init() { + am::init(); + gucs::init(); + for x in gucs::prewarm_dim() { + projection::prewarm(x as _); } } diff --git a/src/index/opclass.rs b/src/index/opclass.rs index a2dc861..63a6be5 100644 --- a/src/index/opclass.rs +++ b/src/index/opclass.rs @@ -1,3 +1,13 @@ +use crate::datatype::memory_halfvec::{HalfvecInput, HalfvecOutput}; +use crate::datatype::memory_vector::{VectorInput, VectorOutput}; +use algorithm::types::*; +use distance::Distance; +use pgrx::datum::FromDatum; +use pgrx::heap_tuple::PgHeapTuple; +use pgrx::pg_sys::Datum; +use std::num::NonZero; +use vector::VectorBorrowed; + #[pgrx::pg_extern(immutable, strict, parallel_safe)] fn _vchordrq_support_vector_l2_ops() -> String { "vector_l2_ops".to_string() @@ -27,3 +37,139 @@ fn _vchordrq_support_halfvec_ip_ops() -> String { fn _vchordrq_support_halfvec_cosine_ops() -> String { "halfvec_cosine_ops".to_string() } + +#[repr(u8)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum PostgresDistanceKind { + L2, + Ip, + Cosine, +} + +pub struct Sphere { + pub center: T, + pub radius: f32, +} + +#[derive(Debug, Clone, Copy)] +pub struct Opfamily { + vector: VectorKind, + postgres_distance: PostgresDistanceKind, +} + +impl Opfamily { + fn input(self, vector: BorrowedVector<'_>) -> OwnedVector { + use {BorrowedVector as B, OwnedVector as O, PostgresDistanceKind as D}; + match (vector, self.postgres_distance) { + (B::Vecf32(x), D::L2) => O::Vecf32(x.own()), + (B::Vecf32(x), D::Ip) => O::Vecf32(x.own()), + (B::Vecf32(x), D::Cosine) => O::Vecf32(x.function_normalize()), + (B::Vecf16(x), D::L2) => O::Vecf16(x.own()), + (B::Vecf16(x), D::Ip) => O::Vecf16(x.own()), + (B::Vecf16(x), D::Cosine) => O::Vecf16(x.function_normalize()), + } + } + pub unsafe fn input_vector(self, datum: Datum, is_null: bool) -> Option { + if is_null || datum.is_null() { + return None; + } + let vector = match self.vector { + VectorKind::Vecf32 => { + let vector = unsafe { VectorInput::from_datum(datum, false).unwrap() }; + self.input(BorrowedVector::Vecf32(vector.as_borrowed())) + } + VectorKind::Vecf16 => { + let vector = unsafe { HalfvecInput::from_datum(datum, false).unwrap() }; + self.input(BorrowedVector::Vecf16(vector.as_borrowed())) + } + }; + Some(vector) + } + pub unsafe fn input_sphere(self, datum: Datum, is_null: bool) -> Option> { + if is_null || datum.is_null() { + return None; + } + let attno_1 = NonZero::new(1_usize).unwrap(); + let attno_2 = NonZero::new(2_usize).unwrap(); + let tuple = unsafe { PgHeapTuple::from_composite_datum(datum) }; + let center = match self.vector { + VectorKind::Vecf32 => { + let vector = tuple.get_by_index::(attno_1).unwrap()?; + self.input(BorrowedVector::Vecf32(vector.as_borrowed())) + } + VectorKind::Vecf16 => { + let vector = tuple.get_by_index::(attno_1).unwrap()?; + self.input(BorrowedVector::Vecf16(vector.as_borrowed())) + } + }; + let radius = tuple.get_by_index::(attno_2).unwrap()?; + Some(Sphere { center, radius }) + } + pub fn output(self, x: Distance) -> f32 { + match self.postgres_distance { + PostgresDistanceKind::Cosine => x.to_f32() + 1.0f32, + PostgresDistanceKind::L2 => x.to_f32().sqrt(), + PostgresDistanceKind::Ip => x.to_f32(), + } + } + pub const fn distance_kind(self) -> DistanceKind { + match self.postgres_distance { + PostgresDistanceKind::L2 => DistanceKind::L2, + PostgresDistanceKind::Ip | PostgresDistanceKind::Cosine => DistanceKind::Dot, + } + } + pub const fn vector_kind(self) -> VectorKind { + self.vector + } +} + +pub unsafe fn opfamily(index_relation: pgrx::pg_sys::Relation) -> Opfamily { + use pgrx::pg_sys::Oid; + + let proc = unsafe { pgrx::pg_sys::index_getprocid(index_relation, 1, 1) }; + + if proc == Oid::INVALID { + pgrx::error!("support function 1 is not found"); + } + + let mut flinfo = pgrx::pg_sys::FmgrInfo::default(); + + unsafe { + pgrx::pg_sys::fmgr_info(proc, &mut flinfo); + } + + let fn_addr = flinfo.fn_addr.expect("null function pointer"); + + let mut fcinfo = unsafe { std::mem::zeroed::() }; + fcinfo.flinfo = &mut flinfo; + fcinfo.fncollation = pgrx::pg_sys::DEFAULT_COLLATION_OID; + fcinfo.context = std::ptr::null_mut(); + fcinfo.resultinfo = std::ptr::null_mut(); + fcinfo.isnull = true; + fcinfo.nargs = 0; + + let result_datum = unsafe { pgrx::pg_sys::ffi::pg_guard_ffi_boundary(|| fn_addr(&mut fcinfo)) }; + + let result_option = unsafe { String::from_datum(result_datum, fcinfo.isnull) }; + + let result_string = result_option.expect("null return value"); + + let (vector, postgres_distance) = match result_string.as_str() { + "vector_l2_ops" => (VectorKind::Vecf32, PostgresDistanceKind::L2), + "vector_ip_ops" => (VectorKind::Vecf32, PostgresDistanceKind::Ip), + "vector_cosine_ops" => (VectorKind::Vecf32, PostgresDistanceKind::Cosine), + "halfvec_l2_ops" => (VectorKind::Vecf16, PostgresDistanceKind::L2), + "halfvec_ip_ops" => (VectorKind::Vecf16, PostgresDistanceKind::Ip), + "halfvec_cosine_ops" => (VectorKind::Vecf16, PostgresDistanceKind::Cosine), + _ => pgrx::error!("unknown operator class"), + }; + + unsafe { + pgrx::pg_sys::pfree(result_datum.cast_mut_ptr()); + } + + Opfamily { + vector, + postgres_distance, + } +} diff --git a/src/projection.rs b/src/index/projection.rs similarity index 51% rename from src/projection.rs rename to src/index/projection.rs index fbcaeff..ca07e24 100644 --- a/src/projection.rs +++ b/src/index/projection.rs @@ -1,5 +1,7 @@ +use half::f16; use random_orthogonal_matrix::random_orthogonal_matrix; use std::sync::OnceLock; +use vector::vect::{VectBorrowed, VectOwned}; fn matrix(n: usize) -> Option<&'static Vec>> { static MATRIXS: [OnceLock>>; 1 + 60000] = [const { OnceLock::new() }; 1 + 60000]; @@ -20,3 +22,25 @@ pub fn project(vector: &[f32]) -> Vec { .map(|i| f32::reduce_sum_of_xy(vector, &matrix[i])) .collect() } + +pub trait RandomProject { + type Output; + fn project(self) -> Self::Output; +} + +impl RandomProject for VectBorrowed<'_, f32> { + type Output = VectOwned; + fn project(self) -> VectOwned { + VectOwned::new(project(self.slice())) + } +} + +impl RandomProject for VectBorrowed<'_, f16> { + type Output = VectOwned; + fn project(self) -> VectOwned { + use simd::Floating; + VectOwned::new(f16::vector_from_f32(&project(&f16::vector_to_f32( + self.slice(), + )))) + } +} diff --git a/src/postgres.rs b/src/index/storage.rs similarity index 91% rename from src/postgres.rs rename to src/index/storage.rs index f68d0fa..a190ac0 100644 --- a/src/postgres.rs +++ b/src/index/storage.rs @@ -1,4 +1,4 @@ -use crate::algorithm::{Opaque, Page, PageGuard, RelationRead, RelationWrite}; +use algorithm::{Opaque, Page, PageGuard, RelationRead, RelationWrite}; use std::mem::{MaybeUninit, offset_of}; use std::ops::{Deref, DerefMut}; use std::ptr::NonNull; @@ -42,31 +42,6 @@ impl PostgresPage { assert_eq!(offset_of!(Self, opaque), this.header.pd_special as usize); this } - #[allow(dead_code)] - fn clone_into_boxed(&self) -> Box { - let mut result = Box::new_uninit(); - unsafe { - std::ptr::copy(self as *const Self, result.as_mut_ptr(), 1); - result.assume_init() - } - } - #[allow(dead_code)] - fn reconstruct(&mut self, removes: &[u16]) { - let mut removes = removes.to_vec(); - removes.sort(); - removes.dedup(); - let n = removes.len(); - if n > 0 { - assert!(removes[n - 1] <= self.len()); - unsafe { - pgrx::pg_sys::PageIndexMultiDelete( - (self as *mut Self).cast(), - removes.as_ptr().cast_mut(), - removes.len() as _, - ); - } - } - } } impl Page for PostgresPage { @@ -257,16 +232,6 @@ impl PostgresRelation { pub unsafe fn new(raw: pgrx::pg_sys::Relation) -> Self { Self { raw } } - - #[allow(dead_code)] - pub fn len(&self) -> u32 { - unsafe { - pgrx::pg_sys::RelationGetNumberOfBlocksInFork( - self.raw, - pgrx::pg_sys::ForkNumber::MAIN_FORKNUM, - ) - } - } } impl RelationRead for PostgresRelation { diff --git a/src/index/utils.rs b/src/index/utils.rs deleted file mode 100644 index 18234ac..0000000 --- a/src/index/utils.rs +++ /dev/null @@ -1,34 +0,0 @@ -use std::num::NonZeroU64; - -pub const fn pointer_to_ctid(pointer: NonZeroU64) -> pgrx::pg_sys::ItemPointerData { - let value = pointer.get(); - pgrx::pg_sys::ItemPointerData { - ip_blkid: pgrx::pg_sys::BlockIdData { - bi_hi: ((value >> 32) & 0xffff) as u16, - bi_lo: ((value >> 16) & 0xffff) as u16, - }, - ip_posid: (value & 0xffff) as u16, - } -} - -pub const fn ctid_to_pointer(ctid: pgrx::pg_sys::ItemPointerData) -> NonZeroU64 { - let mut value = 0; - value |= (ctid.ip_blkid.bi_hi as u64) << 32; - value |= (ctid.ip_blkid.bi_lo as u64) << 16; - value |= ctid.ip_posid as u64; - NonZeroU64::new(value).expect("invalid pointer") -} - -#[allow(dead_code)] -const fn soundness_check(a: pgrx::pg_sys::ItemPointerData) { - let b = ctid_to_pointer(a); - let c = pointer_to_ctid(b); - assert!(a.ip_blkid.bi_hi == c.ip_blkid.bi_hi); - assert!(a.ip_blkid.bi_lo == c.ip_blkid.bi_lo); - assert!(a.ip_posid == c.ip_posid); -} - -const _: () = soundness_check(pgrx::pg_sys::ItemPointerData { - ip_blkid: pgrx::pg_sys::BlockIdData { bi_hi: 1, bi_lo: 2 }, - ip_posid: 3, -}); diff --git a/src/lib.rs b/src/lib.rs index 187e3cd..20ce183 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,32 +1,22 @@ -#![feature(vec_pop_if)] #![allow(clippy::collapsible_else_if)] -#![allow(clippy::infallible_destructuring_match)] #![allow(clippy::too_many_arguments)] -#![allow(clippy::type_complexity)] +#![allow(unsafe_code)] -mod algorithm; mod datatype; -mod gucs; mod index; -mod postgres; -mod projection; -mod types; mod upgrade; -mod utils; pgrx::pg_module_magic!(); pgrx::extension_sql_file!("./sql/bootstrap.sql", bootstrap); pgrx::extension_sql_file!("./sql/finalize.sql", finalize); #[pgrx::pg_guard] -unsafe extern "C" fn _PG_init() { +extern "C" fn _PG_init() { if unsafe { pgrx::pg_sys::IsUnderPostmaster } { pgrx::error!("vchord must be loaded via shared_preload_libraries."); } + index::init(); unsafe { - index::init(); - gucs::init(); - #[cfg(any(feature = "pg13", feature = "pg14"))] pgrx::pg_sys::EmitWarningsOnPlaceholders(c"vchord".as_ptr()); #[cfg(any(feature = "pg15", feature = "pg16", feature = "pg17"))] diff --git a/src/upgrade/symbols.rs b/src/upgrade.rs similarity index 100% rename from src/upgrade/symbols.rs rename to src/upgrade.rs diff --git a/src/upgrade/mod.rs b/src/upgrade/mod.rs deleted file mode 100644 index 6eb441d..0000000 --- a/src/upgrade/mod.rs +++ /dev/null @@ -1 +0,0 @@ -mod symbols; diff --git a/src/utils/mod.rs b/src/utils/mod.rs deleted file mode 100644 index 85a84e0..0000000 --- a/src/utils/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod k_means; -pub mod parallelism; -pub mod pipe; diff --git a/src/utils/parallelism.rs b/src/utils/parallelism.rs deleted file mode 100644 index b960b56..0000000 --- a/src/utils/parallelism.rs +++ /dev/null @@ -1,62 +0,0 @@ -use std::any::Any; -use std::panic::AssertUnwindSafe; -use std::sync::Arc; - -pub use rayon::iter::ParallelIterator; - -pub trait Parallelism: Send + Sync { - fn check(&self); - - fn rayon_into_par_iter(&self, x: I) -> I::Iter; -} - -struct ParallelismCheckPanic(Box); - -pub struct RayonParallelism { - stop: Arc, -} - -impl RayonParallelism { - pub fn scoped( - num_threads: usize, - stop: Arc, - f: impl FnOnce(&Self) -> R, - ) -> Result { - match std::panic::catch_unwind(AssertUnwindSafe(|| { - rayon::ThreadPoolBuilder::new() - .num_threads(num_threads) - .panic_handler(|e| { - if e.downcast_ref::().is_some() { - return; - } - log::error!("Asynchronous task panickied."); - }) - .build_scoped( - |thread| thread.run(), - |_| { - let pool = Self { stop: stop.clone() }; - f(&pool) - }, - ) - })) { - Ok(x) => x, - Err(e) => match e.downcast::() { - Ok(payload) => std::panic::resume_unwind((*payload).0), - Err(e) => std::panic::resume_unwind(e), - }, - } - } -} - -impl Parallelism for RayonParallelism { - fn check(&self) { - match std::panic::catch_unwind(AssertUnwindSafe(|| (self.stop)())) { - Ok(()) => (), - Err(payload) => std::panic::panic_any(ParallelismCheckPanic(payload)), - } - } - - fn rayon_into_par_iter(&self, x: I) -> I::Iter { - x.into_par_iter() - } -}