From 0ef1a114ec987858dbc8ddf4667c40ad6387413d Mon Sep 17 00:00:00 2001 From: Darnell Andries Date: Sun, 12 Feb 2023 01:16:48 -0800 Subject: [PATCH 1/2] Add associated functions/methods for exposing/loading bitmap data in BloomFilter --- src/filters/bloomfilter.rs | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/filters/bloomfilter.rs b/src/filters/bloomfilter.rs index 1b0da46..62220ed 100644 --- a/src/filters/bloomfilter.rs +++ b/src/filters/bloomfilter.rs @@ -179,6 +179,20 @@ where Self::with_params_and_hash(m, k, bh) } + /// Create BloomFilter with internal parameters and existing bitmap. + /// + /// - `k` is the number of hash functions + /// - `m` is the number of bits used to store state + /// - `bitmap` is the bitmap from an existing bloom filter + pub fn with_existing_filter>( + m: usize, + k: usize, + bitmap: I, + ) -> Self { + let bh = BuildHasherDefault::::default(); + Self::with_existing_filter_and_hash(m, k, bitmap, bh) + } + /// Create new, empty BloomFilter with given properties. /// /// - `n` number of unique elements the BloomFilter is expected to hold, must be `> 0` @@ -207,6 +221,21 @@ where } } + /// Same as `with_existing_filter` but with specific `BuildHasher`. + pub fn with_existing_filter_and_hash>( + m: usize, + k: usize, + bitmap: I, + buildhasher: B, + ) -> Self { + Self { + bs: FixedBitSet::with_capacity_and_blocks(m, bitmap), + k, + builder: HashIterBuilder::new(m, k, buildhasher), + phantom: PhantomData, + } + } + /// Same as `with_properties` but with specific `BuildHasher`. pub fn with_properties_and_hash(n: usize, p: f64, buildhasher: B) -> Self { assert!(n > 0, "n must be greater than 0"); @@ -237,6 +266,11 @@ where pub fn buildhasher(&self) -> &B { self.builder.buildhasher() } + + /// Get bitmap data. + pub fn bitmap(&self) -> &[u32] { + self.bs.as_slice() + } } impl Filter for BloomFilter @@ -529,4 +563,22 @@ mod tests { let bf = BloomFilter::::with_params(100, 2); assert_send(&bf); } + + #[test] + fn bitmap_save_load() { + let mut bf = BloomFilter::with_params(100, 2); + + assert!(bf.insert(&1).unwrap()); + assert!(bf.insert(&7).unwrap()); + assert!(bf.insert(&52).unwrap()); + + let bitmap = bf.bitmap().to_vec(); + + let loaded_bf = BloomFilter::with_existing_filter(100, 2, bitmap); + + assert!(loaded_bf.query(&1)); + assert!(loaded_bf.query(&7)); + assert!(loaded_bf.query(&52)); + assert!(!loaded_bf.query(&15)); + } } From e489fd5f5263146f45ad0b73690cfc7ea9fa44d0 Mon Sep 17 00:00:00 2001 From: Darnell Andries Date: Sun, 12 Feb 2023 15:38:45 -0800 Subject: [PATCH 2/2] Add associated functions/methods for exposing/loading table data in CuckooFilter --- src/filters/cuckoofilter.rs | 121 +++++++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 1 deletion(-) diff --git a/src/filters/cuckoofilter.rs b/src/filters/cuckoofilter.rs index bb20a44..91af03d 100644 --- a/src/filters/cuckoofilter.rs +++ b/src/filters/cuckoofilter.rs @@ -5,7 +5,7 @@ use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher}; use std::marker::PhantomData; use rand::Rng; -use succinct::{IntVec, IntVecMut, IntVector}; +use succinct::{BitVec, BitVecMut, IntVec, IntVecMut, IntVector}; use crate::filters::Filter; use crate::helpers::all_zero_intvector; @@ -186,6 +186,36 @@ where Self::with_params_and_hash(rng, bucketsize, n_buckets, l_fingerprint, bh) } + /// Create CuckooFilter with existing filter table data: + /// + /// - `rng`: random number generator used for certain random actions + /// - `bucketsize`: number of elements per bucket, must be at least 2 + /// - `n_buckets`: number of buckets, must be a power of 2 and at least 2 + /// - `l_fingerprint`: size of the fingerprint in bits + /// - `n_elements`: number of elements in existing filter + /// - `table_succinct_blocks`: filter table block data + /// + /// The BuildHasher is set to the `DefaultHasher`. + pub fn with_existing_filter>( + rng: R, + bucketsize: usize, + n_buckets: usize, + l_fingerprint: usize, + n_elements: usize, + table_succinct_blocks: I, + ) -> Self { + let bh = BuildHasherDefault::::default(); + Self::with_existing_filter_and_hash( + rng, + bucketsize, + n_buckets, + l_fingerprint, + n_elements, + table_succinct_blocks, + bh, + ) + } + /// Construct new `bucketsize=4`-cuckoofilter with properties: /// /// - `false_positive_rate`: false positive lookup rate @@ -260,6 +290,28 @@ where } } + /// Same as `with_existing_filter` but with specific `BuildHasher`. + pub fn with_existing_filter_and_hash>( + rng: R, + bucketsize: usize, + n_buckets: usize, + l_fingerprint: usize, + n_elements: usize, + table_succinct_blocks: I, + bh: B, + ) -> Self { + let mut filter = Self::with_params_and_hash(rng, bucketsize, n_buckets, l_fingerprint, bh); + for (i, block) in table_succinct_blocks.into_iter().enumerate() { + assert!( + i < filter.table.block_len(), + "existing input table block length must not exceed filter table block length" + ); + filter.table.set_block(i, block); + } + filter.n_elements = n_elements; + filter + } + /// Construct new `bucketsize=4`-cuckoofilter with properties: /// /// - `false_positive_rate`: false positive lookup rate @@ -481,6 +533,35 @@ where self.table.set(pos as u64, data); } } + + /// Clear and load filter table with individual filter table elements + /// and existing element count. + pub fn load_table>(&mut self, table: I, n_elements: usize) { + self.clear(); + for (i, value) in table.into_iter().enumerate() { + let i = i as u64; + assert!( + i < self.table.len(), + "input table length must not exceed filter table length" + ); + self.table.set(i, value); + } + self.n_elements = n_elements; + } + + /// Return the individual filter table elements. + pub fn table(&self) -> Vec { + self.table.iter().collect() + } + + /// Return the filter table succinct block data. + pub fn table_succinct_blocks(&self) -> Vec { + let mut result = Vec::with_capacity(self.table.block_len()); + for i in 0..self.table.block_len() { + result.push(self.table.get_block(i)); + } + result + } } impl Filter for CuckooFilter @@ -949,4 +1030,42 @@ mod tests { let cf = CuckooFilter::::with_params(ChaChaRng::from_seed([0; 32]), 2, 16, 8); assert_send(&cf); } + + #[test] + fn succinct_table_save_load() { + let mut cf = CuckooFilter::with_params(ChaChaRng::from_seed([0; 32]), 2, 16, 8); + assert!(cf.insert(&10).unwrap()); + assert!(cf.insert(&51).unwrap()); + assert_eq!(cf.len(), 2); + + let loaded_cf = CuckooFilter::with_existing_filter( + ChaChaRng::from_seed([0; 32]), + 2, + 16, + 8, + cf.len(), + cf.table_succinct_blocks(), + ); + + assert!(loaded_cf.query(&10)); + assert!(loaded_cf.query(&51)); + assert!(!loaded_cf.query(&33)); + assert_eq!(loaded_cf.len(), 2); + } + + #[test] + fn table_save_load() { + let mut cf = CuckooFilter::with_params(ChaChaRng::from_seed([0; 32]), 2, 16, 8); + assert!(cf.insert(&10).unwrap()); + assert!(cf.insert(&51).unwrap()); + assert_eq!(cf.len(), 2); + + let mut loaded_cf = CuckooFilter::with_params(ChaChaRng::from_seed([0; 32]), 2, 16, 8); + loaded_cf.load_table(cf.table(), cf.len()); + + assert!(loaded_cf.query(&10)); + assert!(loaded_cf.query(&51)); + assert!(!loaded_cf.query(&33)); + assert_eq!(loaded_cf.len(), 2); + } }