diff --git a/rust/blockstore/src/arrow/blockfile.rs b/rust/blockstore/src/arrow/blockfile.rs index 243cc818116..d4a91921eeb 100644 --- a/rust/blockstore/src/arrow/blockfile.rs +++ b/rust/blockstore/src/arrow/blockfile.rs @@ -471,6 +471,15 @@ impl<'me, K: ArrowReadableKey<'me> + Into, V: ArrowReadableValue<'me self.load_blocks(&target_block_ids).await; } + pub(crate) async fn load_blocks_for_prefixes(&self, prefixes: impl IntoIterator) { + let prefix_vec = prefixes.into_iter().collect(); + let target_block_ids = self + .root + .sparse_index + .get_block_ids_for_prefixes(prefix_vec); + self.load_blocks(&target_block_ids).await; + } + pub(crate) async fn get( &'me self, prefix: &str, diff --git a/rust/blockstore/src/arrow/sparse_index.rs b/rust/blockstore/src/arrow/sparse_index.rs index 83ee4a32ef7..2b204e2f05a 100644 --- a/rust/blockstore/src/arrow/sparse_index.rs +++ b/rust/blockstore/src/arrow/sparse_index.rs @@ -362,7 +362,47 @@ impl SparseIndexReader { result_uuids } - pub(super) fn get_block_ids_range<'prefix, 'referred_data, PrefixRange>( + pub(super) fn get_block_ids_for_prefixes(&self, mut prefixes: Vec<&str>) -> Vec { + prefixes.sort(); + let mut result_uuids = Vec::new(); + let block_start = self.data.forward.iter(); + let block_end = block_start + .clone() + .skip(1) + .map(|(delim, _)| match delim { + SparseIndexDelimiter::Start => { + unreachable!("The start delimiter should only appear in the first block") + } + SparseIndexDelimiter::Key(composite_key) => Some(composite_key.prefix.as_str()), + }) + .chain([None]); + let mut prefix_iter = prefixes.into_iter().peekable(); + for ((start_delim, block), end_prefix) in block_start.zip(block_end) { + if let SparseIndexDelimiter::Key(CompositeKey { + prefix: start_prefix, + key: _, + }) = start_delim + { + while let Some(&prefix) = prefix_iter.peek() { + if start_prefix.as_str() <= prefix { + break; + } + prefix_iter.next(); + } + } + if let Some(&prefix) = prefix_iter.peek() { + if end_prefix.is_none() || end_prefix.is_some_and(|end_prefix| prefix <= end_prefix) + { + result_uuids.push(block.id); + } + } else { + break; + } + } + result_uuids + } + + pub(super) fn get_block_ids_range<'prefix, PrefixRange>( &self, prefix_range: PrefixRange, ) -> Vec diff --git a/rust/blockstore/src/types/reader.rs b/rust/blockstore/src/types/reader.rs index 63a8f1e9205..b04b9de795f 100644 --- a/rust/blockstore/src/types/reader.rs +++ b/rust/blockstore/src/types/reader.rs @@ -125,6 +125,18 @@ impl< } } + pub async fn load_blocks_for_prefixes<'prefix>( + &self, + prefixes: impl IntoIterator, + ) { + match self { + BlockfileReader::MemoryBlockfileReader(_reader) => unimplemented!(), + BlockfileReader::ArrowBlockfileReader(reader) => { + reader.load_blocks_for_prefixes(prefixes).await + } + } + } + pub async fn rank( &'referred_data self, prefix: &'referred_data str,