Skip to content

Commit

Permalink
fix: read array<float> columns correctly (#112)
Browse files Browse the repository at this point in the history
* fix: panic when read column of type array<float>

* ci: add tests for nested column.
  • Loading branch information
youngsofun authored Jul 2, 2024
1 parent f102a23 commit d4c5377
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 27 deletions.
4 changes: 2 additions & 2 deletions src/array_decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ pub fn array_decoder_factory(
}
);
let iter = stripe.stream_map().get(column, Kind::Data);
let iter = Box::new(FloatIter::new(iter, stripe.number_of_rows()));
let iter = Box::new(FloatIter::new(iter));
let present = get_present_vec(column, stripe)?
.map(|iter| Box::new(iter.into_iter()) as Box<dyn Iterator<Item = bool> + Send>);
Box::new(Float32ArrayDecoder::new(iter, present))
Expand All @@ -418,7 +418,7 @@ pub fn array_decoder_factory(
}
);
let iter = stripe.stream_map().get(column, Kind::Data);
let iter = Box::new(FloatIter::new(iter, stripe.number_of_rows()));
let iter = Box::new(FloatIter::new(iter));
let present = get_present_vec(column, stripe)?
.map(|iter| Box::new(iter.into_iter()) as Box<dyn Iterator<Item = bool> + Send>);
Box::new(Float64ArrayDecoder::new(iter, present))
Expand Down
39 changes: 14 additions & 25 deletions src/reader/decode/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,53 +32,42 @@ impl Float for f64 {
/// An iterator
pub struct FloatIter<T: Float, R: std::io::Read> {
reader: R,
remaining: usize,
phantom: std::marker::PhantomData<T>,
}

impl<T: Float, R: std::io::Read> FloatIter<T, R> {
/// Returns a new [`FloatIter`]
#[inline]
pub fn new(reader: R, length: usize) -> Self {
pub fn new(reader: R) -> Self {
Self {
reader,
remaining: length,
phantom: Default::default(),
}
}

/// The number of items remaining
#[inline]
pub fn len(&self) -> usize {
self.remaining
}
}

impl<T: Float, R: std::io::Read> Iterator for FloatIter<T, R> {
type Item = Result<T>;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.remaining == 0 {
return None;
}
let mut chunk: T::OBytes = Default::default();
if let Err(err) = self
match self
.reader
.read_exact(chunk.as_mut())
.read(chunk.as_mut())
.context(error::DecodeFloatSnafu)
{
return Some(Err(err));
Err(err) => {
return Some(Err(err));
}
Ok(n) => {
if n == 0 {
return None;
}
}
};
self.remaining -= 1;
Some(Ok(T::from_le_bytes(chunk)))
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.len();
(remaining, Some(remaining))
}
}

#[cfg(test)]
Expand All @@ -100,7 +89,7 @@ mod tests {
let bytes = float_to_bytes(&input);
let bytes = Cursor::new(bytes);

let iter = FloatIter::<F, _>::new(bytes, input.len());
let iter = FloatIter::<F, _>::new(bytes);
let actual = iter.collect::<Result<Vec<_>>>().unwrap();

assert_eq!(input, actual);
Expand Down Expand Up @@ -131,7 +120,7 @@ mod tests {
let bytes = float_to_bytes(&[f32::NAN]);
let bytes = Cursor::new(bytes);

let iter = FloatIter::<f32, _>::new(bytes, 1);
let iter = FloatIter::<f32, _>::new(bytes);
let actual = iter.collect::<Result<Vec<_>>>().unwrap();
assert_eq!(actual.len(), 1);
assert!(actual[0].is_nan());
Expand All @@ -142,7 +131,7 @@ mod tests {
let bytes = float_to_bytes(&[f64::NAN]);
let bytes = Cursor::new(bytes);

let iter = FloatIter::<f64, _>::new(bytes, 1);
let iter = FloatIter::<f64, _>::new(bytes);
let actual = iter.collect::<Result<Vec<_>>>().unwrap();
assert_eq!(actual.len(), 1);
assert!(actual[0].is_nan());
Expand Down
Binary file added tests/basic/data/nested_array_float.orc
Binary file not shown.
Binary file added tests/basic/data/nested_array_struct.orc
Binary file not shown.
Binary file added tests/basic/data/nested_map_struct.orc
Binary file not shown.
30 changes: 30 additions & 0 deletions tests/basic/data/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def _write(

_write("struct<nest:struct<a:float,b:boolean>>", nested_struct, "nested_struct.orc")


nested_array = {
"value": [
[1, None, 3, 43, 5],
Expand All @@ -110,6 +111,25 @@ def _write(

_write("struct<value:array<int>>", nested_array, "nested_array.orc")


nested_array_float = {
"value": [
[1.0, 3.0],
[None, 2.0],
],
}

_write("struct<value:array<float>>", nested_array_float, "nested_array_float.orc")

nested_array_struct = {
"value": [
[(1.0, 1, "01"), (2.0, 2, "02")],
[None, (3.0, 3, "03")],
],
}

_write("struct<value:array<struct<a:float,b:int,c:string>>>", nested_array_struct, "nested_array_struct.orc")

nested_map = {
"map": [
{"zero": 0, "one": 1},
Expand All @@ -121,6 +141,16 @@ def _write(

_write("struct<map:map<string,int>>", nested_map, "nested_map.orc")

nested_map_struct = {
"map": [
{"01": (1.0, 1, "01"), "02": (2.0, 1, "02")},
None,
{"03": (3.0, 3, "03"), "04": (4.0, 4, "04")},
],
}

_write("struct<value:map<string,struct<a:float,b:int,c:string>>>", nested_map_struct, "nested_map_struct.orc")


_write(
infer_schema(data),
Expand Down
52 changes: 52 additions & 0 deletions tests/basic/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,58 @@ pub fn basic_test_nested_array() {
assert_batches_eq(&batch, &expected);
}

#[test]
pub fn basic_test_nested_array_float() {
let path = basic_path("nested_array_float.orc");
let reader = new_arrow_reader_root(&path);
let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();

let expected = [
"+------------+",
"| value |",
"+------------+",
"| [1.0, 3.0] |",
"| [, 2.0] |",
"+------------+",
];
assert_batches_eq(&batch, &expected);
}

#[test]
pub fn basic_test_nested_array_struct() {
let path = basic_path("nested_array_struct.orc");
let reader = new_arrow_reader_root(&path);
let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();

let expected = [
"+------------------------------------------------+",
"| value |",
"+------------------------------------------------+",
"| [{a: 1.0, b: 1, c: 01}, {a: 2.0, b: 2, c: 02}] |",
"| [, {a: 3.0, b: 3, c: 03}] |",
"+------------------------------------------------+",
];
assert_batches_eq(&batch, &expected);
}

#[test]
pub fn basic_test_nested_map_struct() {
let path = basic_path("nested_map_struct.orc");
let reader = new_arrow_reader_root(&path);
let batch = reader.collect::<Result<Vec<_>, _>>().unwrap();

let expected = [
"+--------------------------------------------------------+",
"| value |",
"+--------------------------------------------------------+",
"| {01: {a: 1.0, b: 1, c: 01}, 02: {a: 2.0, b: 1, c: 02}} |",
"| |",
"| {03: {a: 3.0, b: 3, c: 03}, 04: {a: 4.0, b: 4, c: 04}} |",
"+--------------------------------------------------------+",
];
assert_batches_eq(&batch, &expected);
}

#[test]
pub fn basic_test_nested_map() {
let path = basic_path("nested_map.orc");
Expand Down

0 comments on commit d4c5377

Please sign in to comment.