Skip to content

feat: support seismic #552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 100 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

71 changes: 71 additions & 0 deletions crates/base/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,18 @@ impl IndexOptions {
));
}
}
IndexingOptions::Seismic(_) => {
if !matches!(self.vector.d, DistanceKind::Dot) {
return Err(ValidationError::new(
"seismic is not support for distance that is not negative dot product",
));
}
if !matches!(self.vector.v, VectorKind::SVecf32) {
return Err(ValidationError::new(
"seismic is not support for vectors that are not sparse vectors",
));
}
}
}
Ok(())
}
Expand Down Expand Up @@ -294,6 +306,7 @@ pub enum IndexingOptions {
Hnsw(HnswIndexingOptions),
InvertedIndex(InvertedIndexingOptions),
Rabitq(RabitqIndexingOptions),
Seismic(SeismicIndexingOptions),
}

impl IndexingOptions {
Expand Down Expand Up @@ -321,6 +334,12 @@ impl IndexingOptions {
};
x
}
pub fn unwrap_seismic(self) -> SeismicIndexingOptions {
let IndexingOptions::Seismic(x) = self else {
unreachable!()
};
x
}
}

impl Default for IndexingOptions {
Expand All @@ -337,6 +356,7 @@ impl Validate for IndexingOptions {
Self::Hnsw(x) => x.validate(),
Self::InvertedIndex(x) => x.validate(),
Self::Rabitq(x) => x.validate(),
Self::Seismic(x) => x.validate(),
}
}
}
Expand Down Expand Up @@ -462,6 +482,51 @@ impl Default for RabitqIndexingOptions {
}
}

#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[validate(schema(function = "Self::validate_self"))]
#[serde(deny_unknown_fields)]
pub struct SeismicIndexingOptions {
#[serde(default = "SeismicIndexingOptions::default_n_postings")]
#[validate(range(min = 100, max = 100_000))]
pub n_postings: u32,
#[serde(default = "SeismicIndexingOptions::default_centroid_fraction")]
#[validate(range(min = 0.01, max = 1.))]
pub centroid_fraction: f32,
#[serde(default = "SeismicIndexingOptions::default_summary_energy")]
#[validate(range(min = 0., max = 1.))]
pub summary_energy: f32,
}

impl SeismicIndexingOptions {
fn default_n_postings() -> u32 {
4000
}
fn default_centroid_fraction() -> f32 {
0.1
}
fn default_summary_energy() -> f32 {
0.4
}
fn validate_self(&self) -> Result<(), ValidationError> {
if (self.n_postings as f32 * self.centroid_fraction) as u32 > 65535 {
return Err(ValidationError::new(
"centroids number cannot exceed 65535 in seismic indexing",
));
}
Ok(())
}
}

impl Default for SeismicIndexingOptions {
fn default() -> Self {
Self {
n_postings: Self::default_n_postings(),
centroid_fraction: Self::default_centroid_fraction(),
summary_energy: Self::default_summary_energy(),
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "snake_case")]
Expand Down Expand Up @@ -583,6 +648,12 @@ pub struct SearchOptions {
pub rabitq_fast_scan: bool,
#[validate(range(min = 1, max = 65535))]
pub diskann_ef_search: u32,
#[validate(range(min = 1, max = 65535))]
pub seismic_heap_size: u32,
#[validate(range(min = 1, max = 100_000))]
pub seismic_q_cut: u32,
#[validate(range(min = 0.01, max = 1.))]
pub seismic_heap_factor: f32,
}

#[derive(Debug, Serialize, Deserialize)]
Expand Down
3 changes: 3 additions & 0 deletions crates/cli/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ impl QueryArguments {
ivf_pq_fast_scan: false,
rabitq_fast_scan: true,
rabitq_nprobe: self.probe,
seismic_heap_size: 100,
seismic_q_cut: 3,
seismic_heap_factor: 1.0,
}
}
}
Expand Down
1 change: 1 addition & 0 deletions crates/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ base = { path = "../base" }

log.workspace = true
memmap2.workspace = true
parking_lot.workspace = true
rand.workspace = true
rustix.workspace = true
serde.workspace = true
Expand Down
4 changes: 4 additions & 0 deletions crates/common/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
#![cfg_attr(target_arch = "aarch64", feature(stdarch_aarch64_prefetch))]

pub mod always_equal;
pub mod clean;
pub mod dir_ops;
pub mod file_atomic;
pub mod json;
pub mod mmap_array;
pub mod prefetch;
pub mod rand;
pub mod remap;
pub mod sample;
pub mod variants;
pub mod vec2;
pub mod visited;
26 changes: 26 additions & 0 deletions crates/common/src/prefetch.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#[allow(clippy::not_unsafe_ptr_arg_deref)]
#[allow(non_snake_case)]
#[inline]
pub fn prefetch_read_NTA(ptr: *const i8) {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
#[cfg(target_arch = "x86")]
use core::arch::x86::{_mm_prefetch, _MM_HINT_NTA};

#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::{_mm_prefetch, _MM_HINT_NTA};

unsafe {
_mm_prefetch(ptr, _MM_HINT_NTA);
}
}

#[cfg(target_arch = "aarch64")]
{
use core::arch::aarch64::{_prefetch, _PREFETCH_LOCALITY0, _PREFETCH_READ};

unsafe {
_prefetch(ptr, _PREFETCH_READ, _PREFETCH_LOCALITY0);
}
}
}
File renamed without changes.
1 change: 0 additions & 1 deletion crates/graph/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ version.workspace = true
edition.workspace = true

[dependencies]
parking_lot.workspace = true
rand.workspace = true

base = { path = "../base" }
Expand Down
1 change: 0 additions & 1 deletion crates/graph/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@

pub mod prune;
pub mod search;
pub mod visited;
4 changes: 2 additions & 2 deletions crates/graph/src/search.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use crate::visited::VisitedGuard;
use crate::visited::VisitedPool;
use base::scalar::F32;
use base::search::Element;
use base::search::GraphReranker;
use base::search::Payload;
use common::visited::VisitedGuard;
use common::visited::VisitedPool;
use std::cmp::Reverse;
use std::collections::BinaryHeap;

Expand Down
2 changes: 1 addition & 1 deletion crates/hnsw/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use base::vector::VectorBorrowed;
use common::json::Json;
use common::mmap_array::MmapArray;
use common::remap::RemappedCollection;
use graph::visited::VisitedPool;
use common::visited::VisitedPool;
use num_traits::Float;
use parking_lot::RwLock;
use quantization::operator::OperatorQuantization;
Expand Down
1 change: 1 addition & 0 deletions crates/index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ hnsw = { path = "../hnsw" }
inverted = { path = "../inverted" }
ivf = { path = "../ivf" }
rabitq = { path = "../rabitq" }
seismic = { path = "../seismic" }

[lints]
workspace = true
Loading
Loading