Skip to content

Commit

Permalink
Introduce the Statistics data structure
Browse files Browse the repository at this point in the history
  • Loading branch information
Kerollmops committed Oct 8, 2024
1 parent 6bba84b commit 8ff028e
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 0 deletions.
2 changes: 2 additions & 0 deletions roaring/src/bitmap/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod container;
mod fmt;
mod multiops;
mod proptests;
mod statistics;
mod store;
mod util;

Expand All @@ -22,6 +23,7 @@ pub(crate) mod serialization;
use self::cmp::Pairs;
pub use self::iter::IntoIter;
pub use self::iter::Iter;
pub use self::statistics::Statistics;

#[cfg(not(feature = "std"))]
use alloc::vec::Vec;
Expand Down
106 changes: 106 additions & 0 deletions roaring/src/bitmap/statistics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
use core::mem;

use crate::bitmap::container::Container;
use crate::RoaringBitmap;

use super::store::Store;

/// Detailed statistics on the composition of a bitmap.
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct Statistics {
/// Number of containers in the bitmap
pub n_containers: u32,
/// Number of array containers in the bitmap
pub n_array_containers: u32,
/// Number of run containers in the bitmap
pub n_run_containers: u32,
/// Number of bitset containers in the bitmap
pub n_bitset_containers: u32,
/// Number of values stored in array containers
pub n_values_array_containers: u32,
/// Number of values stored in run containers
pub n_values_run_containers: u32,
/// Number of values stored in bitset containers
pub n_values_bitset_containers: u32,
/// Number of bytes used by array containers
pub n_bytes_array_containers: u64,
/// Number of bytes used by run containers
pub n_bytes_run_containers: u64,
/// Number of bytes used by bitset containers
pub n_bytes_bitset_containers: u64,
/// Maximum value stored in the bitmap
pub max_value: Option<u32>,
/// Minimum value stored in the bitmap
pub min_value: Option<u32>,
/// Number of values stored in the bitmap
pub cardinality: u64,
}

impl RoaringBitmap {
/// Returns statistics about the composition of a roaring bitmap.
///
/// ```
/// use roaring::RoaringBitmap;
///
/// let mut bitmap: RoaringBitmap = (1..100).collect();
/// let statistics = bitmap.statistics();
///
/// assert_eq!(statistics.n_containers, 1);
/// assert_eq!(statistics.n_array_containers, 1);
/// assert_eq!(statistics.n_run_containers, 0);
/// assert_eq!(statistics.n_bitset_containers, 0);
/// assert_eq!(statistics.n_values_array_containers, 99);
/// assert_eq!(statistics.n_values_run_containers, 0);
/// assert_eq!(statistics.n_values_bitset_containers, 0);
/// assert_eq!(statistics.n_bytes_array_containers, 512);
/// assert_eq!(statistics.n_bytes_run_containers, 0);
/// assert_eq!(statistics.n_bytes_bitset_containers, 0);
/// assert_eq!(statistics.max_value, Some(99));
/// assert_eq!(statistics.min_value, Some(1));
/// assert_eq!(statistics.cardinality, 99);
/// ```
pub fn statistics(&self) -> Statistics {
let mut n_containers = 0;
let mut n_array_containers = 0;
let mut n_bitset_containers = 0;
let mut n_values_array_containers = 0;
let mut n_values_bitset_containers = 0;
let mut n_bytes_array_containers = 0;
let mut n_bytes_bitset_containers = 0;
let mut cardinality = 0;

for Container { key: _, store } in &self.containers {
match store {
Store::Array(array) => {
cardinality += array.len();
n_values_array_containers += array.len() as u32;
n_bytes_array_containers += (array.capacity() * mem::size_of::<u32>()) as u64;
n_array_containers += 1;
}
Store::Bitmap(bitmap) => {
cardinality += bitmap.len();
n_values_bitset_containers += bitmap.len() as u32;
n_bytes_bitset_containers += bitmap.capacity() as u64;
n_bitset_containers += 1;
}
}
n_containers += 1;
}

Statistics {
n_containers,
n_array_containers,
n_run_containers: 0,
n_bitset_containers,
n_values_array_containers,
n_values_run_containers: 0,
n_values_bitset_containers,
n_bytes_array_containers,
n_bytes_run_containers: 0,
n_bytes_bitset_containers,
max_value: self.max(),
min_value: self.min(),
cardinality,
}
}
}
4 changes: 4 additions & 0 deletions roaring/src/bitmap/store/array_store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ impl ArrayStore {
ArrayStore { vec: Vec::with_capacity(capacity) }
}

pub fn capacity(&self) -> usize {
self.vec.capacity()
}

///
/// Create a new SortedU16Vec from a given vec
/// It is up to the caller to ensure the vec is sorted and deduplicated
Expand Down
4 changes: 4 additions & 0 deletions roaring/src/bitmap/store/bitmap_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ impl BitmapStore {
BitmapStore { len: (BITMAP_LENGTH as u64) * 64, bits: Box::new([u64::MAX; BITMAP_LENGTH]) }
}

pub fn capacity(&self) -> usize {
BITMAP_LENGTH * u64::BITS as usize
}

pub fn try_from(len: u64, bits: Box<[u64; BITMAP_LENGTH]>) -> Result<BitmapStore, Error> {
let actual_len = bits.iter().map(|v| v.count_ones() as u64).sum();
if len != actual_len {
Expand Down

0 comments on commit 8ff028e

Please sign in to comment.