Skip to content

Commit 7367b59

Browse files
Jefffreyprogval
authored andcommitted
Add roundtrip test case for null buffer test
1 parent a3a7bea commit 7367b59

File tree

4 files changed

+60
-76
lines changed

4 files changed

+60
-76
lines changed

src/arrow_writer.rs

+60-1
Original file line numberDiff line numberDiff line change
@@ -268,12 +268,13 @@ mod tests {
268268
Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, RecordBatchReader,
269269
StringArray,
270270
},
271+
buffer::NullBuffer,
271272
compute::concat_batches,
272273
datatypes::{DataType as ArrowDataType, Field, Schema},
273274
};
274275
use bytes::Bytes;
275276

276-
use crate::ArrowReaderBuilder;
277+
use crate::{stripe::Stripe, ArrowReaderBuilder};
277278

278279
use super::*;
279280

@@ -474,4 +475,62 @@ mod tests {
474475
let rows = roundtrip(&[batch1, batch2]);
475476
assert_eq!(expected_batch, rows[0]);
476477
}
478+
479+
#[test]
480+
fn test_empty_null_buffers() {
481+
// Create an ORC file with present streams, but which have no nulls.
482+
// When this file is read then the resulting Arrow arrays show have
483+
// NO null buffer, even though there is a present stream.
484+
let schema = Arc::new(Schema::new(vec![Field::new(
485+
"int64",
486+
ArrowDataType::Int64,
487+
true,
488+
)]));
489+
490+
// Array with null buffer but has no nulls
491+
let array_empty_nulls = Arc::new(Int64Array::from_iter_values_with_nulls(
492+
vec![1],
493+
Some(NullBuffer::from_iter(vec![true])),
494+
));
495+
assert!(array_empty_nulls.nulls().is_some());
496+
assert!(array_empty_nulls.null_count() == 0);
497+
498+
let batch = RecordBatch::try_new(schema, vec![array_empty_nulls]).unwrap();
499+
500+
// Encoding to bytes
501+
let mut f = vec![];
502+
let mut writer = ArrowWriterBuilder::new(&mut f, batch.schema())
503+
.try_build()
504+
.unwrap();
505+
writer.write(&batch).unwrap();
506+
writer.close().unwrap();
507+
let mut f = Bytes::from(f);
508+
let builder = ArrowReaderBuilder::try_new(f.clone()).unwrap();
509+
510+
// Ensure the ORC file we wrote indeed has a present stream
511+
let stripe = Stripe::new(
512+
&mut f,
513+
&builder.file_metadata,
514+
builder.file_metadata().root_data_type(),
515+
&builder.file_metadata().stripe_metadatas()[0],
516+
)
517+
.unwrap();
518+
assert_eq!(stripe.columns().len(), 1);
519+
// Make sure we're getting the right column
520+
assert_eq!(stripe.columns()[0].name(), "int64");
521+
// Then check present stream
522+
let present_stream = stripe
523+
.stream_map()
524+
.get_opt(&stripe.columns()[0], proto::stream::Kind::Present);
525+
assert!(present_stream.is_some());
526+
527+
// Decoding from bytes
528+
let reader = builder.build();
529+
let rows = reader.collect::<Result<Vec<_>, _>>().unwrap();
530+
531+
assert_eq!(rows.len(), 1);
532+
assert_eq!(rows[0].num_columns(), 1);
533+
// Ensure read array has no null buffer
534+
assert!(rows[0].column(0).nulls().is_none());
535+
}
477536
}

tests/basic/data/no_nulls.orc

-355 Bytes
Binary file not shown.

tests/basic/data/no_nulls.py

-32
This file was deleted.

tests/basic/main.rs

-43
Original file line numberDiff line numberDiff line change
@@ -625,46 +625,3 @@ pub fn assert_batches_eq(batches: &[RecordBatch], expected_lines: &[&str]) {
625625
expected_lines, actual_lines
626626
);
627627
}
628-
629-
/// Tests a file with a 'present' stream for each column, but no actual nulls in it
630-
#[test]
631-
pub fn no_nulls_test() {
632-
let path = basic_path("no_nulls.orc");
633-
let reader = new_arrow_reader_root(&path);
634-
let schema = reader.schema();
635-
let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
636-
637-
let expected = [
638-
"+------+-------+",
639-
"| col0 | col1 |",
640-
"+------+-------+",
641-
"| 1 | row 1 |",
642-
"| 2 | row 2 |",
643-
"+------+-------+",
644-
];
645-
assert_batches_eq(&batches, &expected);
646-
647-
let expected_file_schema = Arc::new(Schema::new(vec![
648-
Field::new(
649-
"col0",
650-
DataType::Int32,
651-
true, // this shouldn't change unless no_nulls.orc was incorrectly regenerated
652-
),
653-
Field::new(
654-
"col1",
655-
DataType::Utf8,
656-
true, // this shouldn't change unless no_nulls.orc was incorrectly regenerated
657-
),
658-
]));
659-
assert_eq!(schema, expected_file_schema);
660-
661-
let expected_batch_schema = Arc::new(Schema::new(vec![
662-
Field::new("col0", DataType::Int32, false),
663-
Field::new("col1", DataType::Utf8, false),
664-
]));
665-
for batch in &batches {
666-
assert_eq!(batch.schema(), expected_batch_schema);
667-
assert!(batch.column_by_name("col0").unwrap().nulls().is_none());
668-
assert!(batch.column_by_name("col1").unwrap().nulls().is_none());
669-
}
670-
}

0 commit comments

Comments
 (0)