Skip to content

Commit

Permalink
Redesign unique
Browse files Browse the repository at this point in the history
  • Loading branch information
IvanUkhov committed May 3, 2024
1 parent c129cc2 commit ed5d102
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -26,7 +26,7 @@
//!
//! [1]: https://fennel.ai/blog/vector-search-in-200-lines-of-rust/
use std::collections::BTreeSet;
use std::collections::{BTreeMap, BTreeSet};

/// An index.
pub struct Index<const N: usize> {
@@ -64,7 +64,7 @@ impl<const N: usize> Index<N> {
debug_assert!(forest_size >= 1);
debug_assert!(leaf_size >= 1);
let mut source = random::default(seed);
let indices = unique(vectors);
let indices = unique(vectors).into_keys().collect::<Vec<_>>();
let roots = (0..forest_size)
.map(|_| Node::build(vectors, &indices, leaf_size, &mut source))
.collect();
@@ -202,22 +202,24 @@ fn subtract<const N: usize>(one: &Vector<N>, other: &Vector<N>) -> Vector<N> {
.unwrap()
}

fn unique<const N: usize>(vectors: &[Vector<N>]) -> Vec<usize> {
let mut indices = Vec::with_capacity(vectors.len());
let mut seen = BTreeSet::default();
fn unique<const N: usize>(vectors: &[Vector<N>]) -> BTreeMap<usize, Vec<usize>> {
let mut mapping = BTreeMap::<usize, Vec<usize>>::default();
let mut seen = BTreeMap::default();
for (index, vector) in vectors.iter().enumerate() {
let key: [u32; N] = vector
.iter()
.map(|value| value.to_bits())
.collect::<Vec<_>>()
.try_into()
.unwrap();
if !seen.contains(&key) {
seen.insert(key);
indices.push(index);
if let Some(first) = seen.get(&key) {
mapping.entry(*first).or_default().push(index);
} else {
mapping.insert(index, Default::default());
seen.insert(key, index);
}
}
indices
mapping
}

#[cfg(test)]

0 comments on commit ed5d102

Please sign in to comment.