ModelarData
diff --git a/‎crates/modelardb_compression/src/compression.rs
Lines changed: 743 additions & 0 deletions b/‎crates/modelardb_compression/src/compression.rs
Lines changed: 743 additions & 0 deletions
diff --git a/‎crates/modelardb_compression/src/lib.rs
Lines changed: 53 additions & 1310 deletions b/‎crates/modelardb_compression/src/lib.rs
Lines changed: 53 additions & 1310 deletions
diff --git a/‎crates/modelardb_compression/src/merge.rs
Lines changed: 439 additions & 0 deletions b/‎crates/modelardb_compression/src/merge.rs
Lines changed: 439 additions & 0 deletions
diff --git a/‎crates/modelardb_compression/src/models/bits.rs
Lines changed: 2 additions & 1 deletion b/‎crates/modelardb_compression/src/models/bits.rs
Lines changed: 2 additions & 1 deletion
diff --git a/‎crates/modelardb_compression/src/models/gorilla.rs
Lines changed: 92 additions & 59 deletions b/‎crates/modelardb_compression/src/models/gorilla.rs
Lines changed: 92 additions & 59 deletions
@@ -143,7 +143,8 @@ impl BitVecBuilder {
     }
 
     /// Return the number of bytes required to store the appended bits.
-    pub fn len(&self) -> usize {
+    #[cfg(test)]
+    fn len(&self) -> usize {
         self.bytes.len() + (self.remaining_bits != 8) as usize
     }
 
 
@@ -22,7 +22,6 @@
 //!
 //! [Gorilla paper]: https://www.vldb.org/pvldb/vol8/p1816-teller.pdf
 
-use modelardb_common::schemas::COMPRESSED_METADATA_SIZE_IN_BYTES;
 use modelardb_common::types::{Timestamp, UnivariateId, UnivariateIdBuilder, Value, ValueBuilder};
 
 use crate::models;
@@ -34,6 +33,10 @@ use crate::models::ErrorBound;
 pub struct Gorilla {
     /// Maximum relative error for the value of each data point.
     error_bound: ErrorBound,
+    /// Min value compressed and added to `compressed_values`.
+    min_value: Value,
+    /// Max value compressed and added to `compressed_values`.
+    max_value: Value,
     /// Last value compressed and added to `compressed_values`.
     last_value: Value,
     /// Number of leading zero bits for the last value that was compressed by
@@ -52,6 +55,8 @@ impl Gorilla {
     pub fn new(error_bound: ErrorBound) -> Self {
         Self {
             error_bound,
+            min_value: Value::NAN,
+            max_value: Value::NAN,
             last_value: 0.0,
             last_leading_zero_bits: u8::MAX,
             last_trailing_zero_bits: 0,
@@ -60,9 +65,33 @@ impl Gorilla {
         }
     }
 
-    /// Compress `value` using XOR and a variable length binary encoding and
-    /// append the compressed value to an internal buffer in [`Gorilla`].
-    pub fn compress_value(&mut self, value: Value) {
+    /// Store the first value in full if this instance of [`Gorilla`] is empty and then compress the
+    /// remaining `values` using XOR and a variable length binary encoding before storing them.
+    pub fn compress_values(&mut self, values: &[Value]) {
+        for value in values {
+            if self.compressed_values.is_empty() {
+                // Store the first value uncompressed using size_of::<Value> bits.
+                self.compressed_values
+                    .append_bits(value.to_bits(), models::VALUE_SIZE_IN_BITS);
+
+                self.update_min_max_and_last_value(*value);
+            } else {
+                self.compress_value_xor_last_value(*value);
+            };
+        }
+    }
+
+    /// Assume `last_value` is stored fully elsewhere, set it as the current last value, and then
+    /// compress each of the values in `values` using XOR and a variable length binary encoding.
+    pub fn compress_values_without_first(&mut self, values: &[Value], model_last_value: Value) {
+        self.last_value = model_last_value;
+        for value in values {
+            self.compress_value_xor_last_value(*value);
+        }
+    }
+
+    /// Compress `value` using XOR and a variable length binary encoding and then store it.
+    fn compress_value_xor_last_value(&mut self, value: Value) {
         // The best case for Gorilla is storing duplicate values.
         let value = if models::is_value_within_error_bound(self.error_bound, value, self.last_value)
         {
@@ -75,11 +104,7 @@ impl Gorilla {
         let last_value_as_integer = self.last_value.to_bits();
         let value_xor_last_value = value_as_integer ^ last_value_as_integer;
 
-        if self.compressed_values.is_empty() {
-            // Store the first value uncompressed using size_of::<Value> bits.
-            self.compressed_values
-                .append_bits(value_as_integer, models::VALUE_SIZE_IN_BITS);
-        } else if value_xor_last_value == 0 {
+        if value_xor_last_value == 0 {
             // Store each repeated value as a single zero bit.
             self.compressed_values.append_a_zero_bit();
         } else {
@@ -119,45 +144,49 @@ impl Gorilla {
                 self.last_trailing_zero_bits = trailing_zero_bits;
             }
         }
-        self.last_value = value;
-        self.length += 1;
-    }
 
-    /// Return the number of values currently compressed using XOR and a
-    /// variable length binary encoding.
-    #[allow(clippy::len_without_is_empty)]
-    pub fn len(&self) -> usize {
-        self.length
+        self.update_min_max_and_last_value(value);
     }
 
-    /// Return the number of bytes currently used per data point on average.
-    pub fn bytes_per_value(&self) -> f32 {
-        // Gorilla does not use metadata for encoding values, only the data in compressed_values.
-        (COMPRESSED_METADATA_SIZE_IN_BYTES.to_owned() + self.compressed_values.len()) as f32
-            / self.length as f32
+    /// Update the current minimum, maximum, and last value based on `value`.
+    fn update_min_max_and_last_value(&mut self, value: Value) {
+        self.min_value = Value::min(self.min_value, value);
+        self.max_value = Value::max(self.max_value, value);
+        self.last_value = value;
+        self.length += 1;
     }
 
     /// Return the values compressed using XOR and a variable length binary
-    /// encoding.
-    pub fn compressed_values(self) -> Vec<u8> {
-        self.compressed_values.finish()
+    /// encoding, the compressed minimum value, and the compressed maximum value.
+    pub fn model(self) -> (Vec<u8>, Value, Value) {
+        (
+            self.compressed_values.finish(),
+            self.min_value,
+            self.max_value,
+        )
     }
 }
 
-/// Compute the sum of the values for a time series segment whose values are
-/// compressed using Gorilla's compression method for floating-point values.
-pub fn sum(start_time: Timestamp, end_time: Timestamp, timestamps: &[u8], values: &[u8]) -> Value {
+/// Compute the sum of the values for a time series segment whose values are compressed using
+/// Gorilla's compression method for floating-point values. If `maybe_model_last_value` is provided,
+/// it is assumed the first value in `values` is compressed against it instead of being stored in
+/// full, i.e., uncompressed.
+pub fn sum(length: usize, values: &[u8], maybe_model_last_value: Option<Value>) -> Value {
     // This function replicates code from gorilla::grid() as it isn't necessary
     // to store the univariate ids, timestamps, and values in arrays for a sum.
     // So any changes to the decompression must be mirrored in gorilla::grid().
-    let length = models::len(start_time, end_time, timestamps);
     let mut bits = BitReader::try_new(values).unwrap();
     let mut leading_zeros = u8::MAX;
     let mut trailing_zeros: u8 = 0;
-    let mut last_value = bits.read_bits(models::VALUE_SIZE_IN_BITS);
 
-    // The first value is stored uncompressed using size_of::<Value> bits.
-    let mut sum = Value::from_bits(last_value);
+    let (mut last_value, mut sum) = if let Some(model_last_value) = maybe_model_last_value {
+        // The first value is stored compressed against model_last_value.
+        (model_last_value.to_bits(), 0.0)
+    } else {
+        // The first value is stored uncompressed using size_of::<Value> bits.
+        let first_value = bits.read_bits(models::VALUE_SIZE_IN_BITS);
+        (first_value, Value::from_bits(first_value))
+    };
 
     // Then values are stored using XOR and a variable length binary encoding.
     for _ in 0..length - 1 {
@@ -182,29 +211,38 @@ pub fn sum(start_time: Timestamp, end_time: Timestamp, timestamps: &[u8], values
     sum
 }
 
-/// Decompress the values in `values` for the `timestamps` without matching
-/// values in `value_builder`. The values in `values` are compressed using
-/// Gorilla's compression method for floating-point values. `univariate_ids`
-/// and `values` are appended to `univariate_id_builder` and `value_builder`.
+/// Decompress all of the values in `values` for the `timestamps` without matching values in
+/// `value_builder`. The values in `values` are compressed using Gorilla's compression method for
+/// floating-point values. `univariate_ids` and `values` are appended to `univariate_id_builder` and
+/// `value_builder`. If `maybe_model_last_value` is provided, it is assumed the first value in
+/// `values` is compressed against it instead of being stored in full, i.e., uncompressed.
 pub fn grid(
     univariate_id: UnivariateId,
     values: &[u8],
     univariate_id_builder: &mut UnivariateIdBuilder,
     timestamps: &[Timestamp],
     value_builder: &mut ValueBuilder,
+    maybe_model_last_value: Option<Value>,
 ) {
     // Changes to the decompression must be mirrored in gorilla::sum().
     let mut bits = BitReader::try_new(values).unwrap();
     let mut leading_zeros = u8::MAX;
     let mut trailing_zeros: u8 = 0;
-    let mut last_value = bits.read_bits(models::VALUE_SIZE_IN_BITS);
 
-    // The first value is stored uncompressed using size_of::<Value> bits.
-    univariate_id_builder.append_value(univariate_id);
-    value_builder.append_value(Value::from_bits(last_value));
+    let mut last_value = if let Some(model_last_value) = maybe_model_last_value {
+        // The first value is stored compressed against model_last_value.
+        model_last_value.to_bits()
+    } else {
+        // The first value is stored uncompressed using size_of::<Value> bits.
+        let first_value = bits.read_bits(models::VALUE_SIZE_IN_BITS);
+        univariate_id_builder.append_value(univariate_id);
+        value_builder.append_value(Value::from_bits(first_value));
+        first_value
+    };
 
-    // Then values are stored using XOR and a variable length binary encoding.
-    for _ in 0..timestamps.len() - 1 {
+    // Then values are stored using XOR and a variable length binary encoding. If last_value was
+    // provided by the model, the first value has not been read from values so all must be read now.
+    for _ in 0..timestamps.len() - maybe_model_last_value.is_none() as usize {
         if bits.read_bit() {
             if bits.read_bit() {
                 // New leading and trailing zeros.
@@ -241,7 +279,7 @@ mod tests {
     #[test]
     fn test_empty_sequence() {
         let error_bound = ErrorBound::try_new(0.0).unwrap();
-        assert!(Gorilla::new(error_bound).compressed_values().is_empty());
+        assert!(Gorilla::new(error_bound).model().0.is_empty());
     }
 
     proptest! {
@@ -250,7 +288,7 @@ mod tests {
         let error_bound = ErrorBound::try_new(0.0).unwrap();
         let mut model_type = Gorilla::new(error_bound);
 
-        model_type.compress_value(value);
+        model_type.compress_values(&[value]);
 
         prop_assert!(models::equal_or_nan(value as f64, model_type.last_value as f64));
         prop_assert_eq!(model_type.last_leading_zero_bits, u8::MAX);
@@ -262,8 +300,7 @@ mod tests {
         let error_bound = ErrorBound::try_new(0.0).unwrap();
         let mut model_type = Gorilla::new(error_bound);
 
-        model_type.compress_value(value);
-        model_type.compress_value(value);
+        model_type.compress_values(&[value, value]);
 
         prop_assert!(models::equal_or_nan(value as f64, model_type.last_value as f64));
         prop_assert_eq!(model_type.last_leading_zero_bits, u8::MAX);
@@ -276,8 +313,7 @@ mod tests {
         let error_bound = ErrorBound::try_new(0.0).unwrap();
         let mut model_type = Gorilla::new(error_bound);
 
-        model_type.compress_value(37.0);
-        model_type.compress_value(73.0);
+        model_type.compress_values(&[37.0, 73.0]);
 
         assert!(models::equal_or_nan(73.0, model_type.last_value as f64));
         assert_eq!(model_type.last_leading_zero_bits, 8);
@@ -289,9 +325,7 @@ mod tests {
         let error_bound = ErrorBound::try_new(0.0).unwrap();
         let mut model_type = Gorilla::new(error_bound);
 
-        model_type.compress_value(37.0);
-        model_type.compress_value(71.0);
-        model_type.compress_value(73.0);
+        model_type.compress_values(&[37.0, 71.0, 73.0]);
 
         assert!(models::equal_or_nan(73.0, model_type.last_value as f64));
         assert_eq!(model_type.last_leading_zero_bits, 8);
@@ -303,12 +337,12 @@ mod tests {
         let error_bound = ErrorBound::try_new(10.0).unwrap();
         let mut model_type = Gorilla::new(error_bound);
 
-        model_type.compress_value(10.0);
+        model_type.compress_values(&[10.0]);
         let before_last_value = model_type.last_value;
         let before_last_leading_zero_bits = model_type.last_leading_zero_bits;
         let before_last_trailing_zero_bits = model_type.last_trailing_zero_bits;
 
-        model_type.compress_value(11.0);
+        model_type.compress_values(&[11.0]);
 
         // State should be unchanged when the value is within the error bound.
         assert_eq!(before_last_value, model_type.last_value);
@@ -328,7 +362,7 @@ mod tests {
     fn test_sum(values in collection::vec(ProptestValue::ANY, 0..50)) {
         prop_assume!(!values.is_empty());
         let compressed_values = compress_values_using_gorilla(&values);
-        let sum = sum(1, values.len() as i64, &values.len().to_be_bytes(), &compressed_values);
+        let sum = sum(values.len(), &compressed_values, None);
         let expected_sum = aggregate::sum(&ValueArray::from_iter_values(values)).unwrap();
         prop_assert!(models::equal_or_nan(expected_sum as f64, sum as f64));
     }
@@ -349,7 +383,8 @@ mod tests {
             &compressed_values,
             &mut univariate_id_builder,
             &timestamps,
-            &mut value_builder
+            &mut value_builder,
+            None,
         );
 
         let univariate_ids_array = univariate_id_builder.finish();
@@ -371,10 +406,8 @@ mod tests {
     fn compress_values_using_gorilla(values: &[Value]) -> Vec<u8> {
         let error_bound = ErrorBound::try_new(0.0).unwrap();
         let mut model_type = Gorilla::new(error_bound);
-        for value in values {
-            model_type.compress_value(*value);
-        }
-        model_type.compressed_values()
+        model_type.compress_values(values);
+        model_type.compressed_values.finish()
     }
 
     fn slice_of_value_equal(values_one: &[Value], values_two: &[Value]) -> bool {
Original file line number	Diff line number	Diff line change
`@@ -143,7 +143,8 @@ impl BitVecBuilder {`
`143`	`143`	`}`
`144`	`144`
`145`	`145`	`/// Return the number of bytes required to store the appended bits.`
`146`		`- pub fn len(&self) -> usize {`
	`146`	`+ #[cfg(test)]`
	`147`	`+ fn len(&self) -> usize {`
`147`	`148`	`self.bytes.len() + (self.remaining_bits != 8) as usize`
`148`	`149`	`}`
`149`	`150`