Skip to content

Commit 0dd969d

Browse files
authored
fix: ALP encoding handles infinity, negative zero and nan values (#2555)
Using NativePType::is_eq and due to rust float -> int cast semantics we can avoid checking whether value is one of the unencodable variants. https://doc.rust-lang.org/stable/reference/expressions/operator-expr.html#numeric-cast for reference
1 parent 459dec3 commit 0dd969d

File tree

6 files changed

+30
-15
lines changed

6 files changed

+30
-15
lines changed

Cargo.lock

-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

encodings/alp/src/alp/compress.rs

+18
Original file line numberDiff line numberDiff line change
@@ -268,4 +268,22 @@ mod tests {
268268
Scalar::null_typed::<f64>()
269269
);
270270
}
271+
272+
#[test]
273+
fn non_finite_numbers() {
274+
let original = PrimitiveArray::new(
275+
buffer![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY],
276+
Validity::NonNullable,
277+
);
278+
let encoded = alp_encode(&original).unwrap();
279+
let decoded = encoded.to_primitive().unwrap();
280+
for idx in 0..original.len() {
281+
let decoded_val = decoded.as_slice::<f32>()[idx];
282+
let original_val = original.as_slice::<f32>()[idx];
283+
assert!(
284+
decoded_val.is_eq(original_val),
285+
"Expected {original_val} but got {decoded_val}"
286+
);
287+
}
288+
}
271289
}

encodings/alp/src/alp/compute/compare.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ mod tests {
245245
assert_eq!(r_gt, vec![true; 10]);
246246

247247
let r_gte = test_alp_compare(&encoded, -0.0_f32, Operator::Gt).unwrap();
248-
assert_eq!(r_gte, vec![false; 10]);
248+
assert_eq!(r_gte, vec![true; 10]);
249249

250250
let r_lte = test_alp_compare(&encoded, 0.06051_f32, Operator::Lte).unwrap();
251251
assert_eq!(r_lte, vec![true; 10]);

encodings/alp/src/alp/mod.rs

+11-11
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ mod serde;
1313
pub use array::*;
1414
pub use compress::*;
1515
use vortex_buffer::{Buffer, BufferMut};
16+
use vortex_dtype::NativePType;
1617

1718
const SAMPLE_SIZE: usize = 32;
1819

@@ -35,7 +36,7 @@ mod private {
3536
impl Sealed for f64 {}
3637
}
3738

38-
pub trait ALPFloat: private::Sealed + Float + Display + 'static {
39+
pub trait ALPFloat: private::Sealed + Float + Display + NativePType {
3940
type ALPInt: PrimInt + Display + ToPrimitive + Copy;
4041

4142
const FRACTIONAL_BITS: u8;
@@ -148,9 +149,9 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
148149

149150
#[inline]
150151
fn encode_single(value: Self, exponents: Exponents) -> Option<Self::ALPInt> {
151-
let encoded = unsafe { Self::encode_single_unchecked(value, exponents) };
152+
let encoded = Self::encode_single_unchecked(value, exponents);
152153
let decoded = Self::decode_single(encoded, exponents);
153-
if decoded == value {
154+
if decoded.is_eq(value) {
154155
return Some(encoded);
155156
}
156157
None
@@ -185,11 +186,10 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
185186
Self::from_int(encoded) * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize]
186187
}
187188

188-
/// # Safety
189-
///
190-
/// The returned value may not decode back to the original value.
189+
/// Encode single float value. The returned value might decode to a different value than passed in.
190+
/// Consider using [`Self::encode_single`] if you want the checked version of this function.
191191
#[inline(always)]
192-
unsafe fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt {
192+
fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt {
193193
(value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
194194
.fast_round()
195195
.as_int()
@@ -212,10 +212,10 @@ fn encode_chunk_unchecked<T: ALPFloat>(
212212

213213
// encode the chunk, counting the number of patches
214214
let mut chunk_patch_count = 0;
215-
encoded_output.extend(chunk.iter().map(|v| {
216-
let encoded = unsafe { T::encode_single_unchecked(*v, exp) };
215+
encoded_output.extend(chunk.iter().map(|&v| {
216+
let encoded = T::encode_single_unchecked(v, exp);
217217
let decoded = T::decode_single(encoded, exp);
218-
let neq = (decoded != *v) as usize;
218+
let neq = !decoded.is_eq(v) as usize;
219219
chunk_patch_count += neq;
220220
encoded
221221
}));
@@ -237,7 +237,7 @@ fn encode_chunk_unchecked<T: ALPFloat>(
237237
// write() is only safe to call more than once because the values are primitive (i.e., Drop is a no-op)
238238
patch_indices_mut[chunk_patch_index].write(i as u64);
239239
patch_values_mut[chunk_patch_index].write(chunk[i - num_prev_encoded]);
240-
chunk_patch_index += (decoded != chunk[i - num_prev_encoded]) as usize;
240+
chunk_patch_index += !decoded.is_eq(chunk[i - num_prev_encoded]) as usize;
241241
}
242242
assert_eq!(chunk_patch_index, chunk_patch_count);
243243
unsafe {

encodings/fastlanes/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ itertools = { workspace = true }
2424
num-traits = { workspace = true }
2525
rand = { workspace = true, optional = true }
2626
rkyv = { workspace = true }
27-
serde = { workspace = true }
2827
vortex-array = { workspace = true }
2928
vortex-buffer = { workspace = true }
3029
vortex-dtype = { workspace = true }

wasm-test/Cargo.lock

-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)