Skip to content

Commit 966cda9

Browse files
committed
Clean up unused code for view types in offset buffer (#6040)
* clean up unused view types in offset buffer * make tests happy
1 parent 5b6bc55 commit 966cda9

File tree

2 files changed

+14
-200
lines changed

2 files changed

+14
-200
lines changed

parquet/src/arrow/array_reader/byte_array.rs

+1-113
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,7 @@ mod tests {
599599
use super::*;
600600
use crate::arrow::array_reader::test_util::{byte_array_all_encodings, utf8_column};
601601
use crate::arrow::record_reader::buffer::ValuesBuffer;
602-
use arrow_array::{Array, StringArray, StringViewArray};
602+
use arrow_array::{Array, StringArray};
603603
use arrow_buffer::Buffer;
604604

605605
#[test]
@@ -657,64 +657,6 @@ mod tests {
657657
}
658658
}
659659

660-
#[test]
661-
fn test_byte_array_string_view_decoder() {
662-
let (pages, encoded_dictionary) =
663-
byte_array_all_encodings(vec!["hello", "world", "large payload over 12 bytes", "b"]);
664-
665-
let column_desc = utf8_column();
666-
let mut decoder = ByteArrayColumnValueDecoder::new(&column_desc);
667-
668-
decoder
669-
.set_dict(encoded_dictionary, 4, Encoding::RLE_DICTIONARY, false)
670-
.unwrap();
671-
672-
for (encoding, page) in pages {
673-
let mut output = OffsetBuffer::<i32>::default();
674-
decoder.set_data(encoding, page, 4, Some(4)).unwrap();
675-
676-
assert_eq!(decoder.read(&mut output, 1).unwrap(), 1);
677-
678-
assert_eq!(output.values.as_slice(), "hello".as_bytes());
679-
assert_eq!(output.offsets.as_slice(), &[0, 5]);
680-
681-
assert_eq!(decoder.read(&mut output, 1).unwrap(), 1);
682-
assert_eq!(output.values.as_slice(), "helloworld".as_bytes());
683-
assert_eq!(output.offsets.as_slice(), &[0, 5, 10]);
684-
685-
assert_eq!(decoder.read(&mut output, 2).unwrap(), 2);
686-
assert_eq!(
687-
output.values.as_slice(),
688-
"helloworldlarge payload over 12 bytesb".as_bytes()
689-
);
690-
assert_eq!(output.offsets.as_slice(), &[0, 5, 10, 37, 38]);
691-
692-
assert_eq!(decoder.read(&mut output, 4).unwrap(), 0);
693-
694-
let valid = [false, false, true, true, false, true, true, false, false];
695-
let valid_buffer = Buffer::from_iter(valid.iter().cloned());
696-
697-
output.pad_nulls(0, 4, valid.len(), valid_buffer.as_slice());
698-
let array = output.into_array(Some(valid_buffer), ArrowType::Utf8View);
699-
let strings = array.as_any().downcast_ref::<StringViewArray>().unwrap();
700-
701-
assert_eq!(
702-
strings.iter().collect::<Vec<_>>(),
703-
vec![
704-
None,
705-
None,
706-
Some("hello"),
707-
Some("world"),
708-
None,
709-
Some("large payload over 12 bytes"),
710-
Some("b"),
711-
None,
712-
None,
713-
]
714-
);
715-
}
716-
}
717-
718660
#[test]
719661
fn test_byte_array_decoder_skip() {
720662
let (pages, encoded_dictionary) =
@@ -759,60 +701,6 @@ mod tests {
759701
}
760702
}
761703

762-
#[test]
763-
fn test_byte_array_string_view_decoder_skip() {
764-
let (pages, encoded_dictionary) =
765-
byte_array_all_encodings(vec!["hello", "world", "a", "large payload over 12 bytes"]);
766-
767-
let column_desc = utf8_column();
768-
let mut decoder = ByteArrayColumnValueDecoder::new(&column_desc);
769-
770-
decoder
771-
.set_dict(encoded_dictionary, 4, Encoding::RLE_DICTIONARY, false)
772-
.unwrap();
773-
774-
for (encoding, page) in pages {
775-
let mut output = OffsetBuffer::<i32>::default();
776-
decoder.set_data(encoding, page, 4, Some(4)).unwrap();
777-
778-
assert_eq!(decoder.read(&mut output, 1).unwrap(), 1);
779-
780-
assert_eq!(output.values.as_slice(), "hello".as_bytes());
781-
assert_eq!(output.offsets.as_slice(), &[0, 5]);
782-
783-
assert_eq!(decoder.skip_values(1).unwrap(), 1);
784-
assert_eq!(decoder.skip_values(1).unwrap(), 1);
785-
786-
assert_eq!(decoder.read(&mut output, 1).unwrap(), 1);
787-
assert_eq!(
788-
output.values.as_slice(),
789-
"hellolarge payload over 12 bytes".as_bytes()
790-
);
791-
assert_eq!(output.offsets.as_slice(), &[0, 5, 32]);
792-
793-
assert_eq!(decoder.read(&mut output, 4).unwrap(), 0);
794-
795-
let valid = [false, false, true, true, false, false];
796-
let valid_buffer = Buffer::from_iter(valid.iter().cloned());
797-
798-
output.pad_nulls(0, 2, valid.len(), valid_buffer.as_slice());
799-
let array = output.into_array(Some(valid_buffer), ArrowType::Utf8View);
800-
let strings = array.as_any().downcast_ref::<StringViewArray>().unwrap();
801-
802-
assert_eq!(
803-
strings.iter().collect::<Vec<_>>(),
804-
vec![
805-
None,
806-
None,
807-
Some("hello"),
808-
Some("large payload over 12 bytes"),
809-
None,
810-
None,
811-
]
812-
);
813-
}
814-
}
815-
816704
#[test]
817705
fn test_byte_array_decoder_nulls() {
818706
let (pages, encoded_dictionary) = byte_array_all_encodings(Vec::<&str>::new());

parquet/src/arrow/buffer/offset_buffer.rs

+13-87
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,10 @@
1818
use crate::arrow::buffer::bit_util::iter_set_bits_rev;
1919
use crate::arrow::record_reader::buffer::ValuesBuffer;
2020
use crate::errors::{ParquetError, Result};
21-
use arrow_array::builder::GenericByteViewBuilder;
22-
use arrow_array::types::BinaryViewType;
2321
use arrow_array::{make_array, ArrayRef, OffsetSizeTrait};
2422
use arrow_buffer::{ArrowNativeType, Buffer};
2523
use arrow_data::ArrayDataBuilder;
2624
use arrow_schema::DataType as ArrowType;
27-
use std::sync::Arc;
2825

2926
/// A buffer of variable-sized byte arrays that can be converted into
3027
/// a corresponding [`ArrayRef`]
@@ -128,51 +125,18 @@ impl<I: OffsetSizeTrait> OffsetBuffer<I> {
128125

129126
/// Converts this into an [`ArrayRef`] with the provided `data_type` and `null_buffer`
130127
pub fn into_array(self, null_buffer: Option<Buffer>, data_type: ArrowType) -> ArrayRef {
131-
match data_type {
132-
ArrowType::Utf8View => {
133-
let mut builder = self.build_generic_byte_view();
134-
Arc::new(builder.finish().to_string_view().unwrap())
135-
}
136-
ArrowType::BinaryView => {
137-
let mut builder = self.build_generic_byte_view();
138-
Arc::new(builder.finish())
139-
}
140-
_ => {
141-
let array_data_builder = ArrayDataBuilder::new(data_type)
142-
.len(self.len())
143-
.add_buffer(Buffer::from_vec(self.offsets))
144-
.add_buffer(Buffer::from_vec(self.values))
145-
.null_bit_buffer(null_buffer);
146-
147-
let data = match cfg!(debug_assertions) {
148-
true => array_data_builder.build().unwrap(),
149-
false => unsafe { array_data_builder.build_unchecked() },
150-
};
151-
152-
make_array(data)
153-
}
154-
}
155-
}
156-
157-
fn build_generic_byte_view(self) -> GenericByteViewBuilder<BinaryViewType> {
158-
let mut builder = GenericByteViewBuilder::<BinaryViewType>::with_capacity(self.len());
159-
let buffer = Buffer::from_vec(self.values);
160-
let block = builder.append_block(buffer);
161-
for window in self.offsets.windows(2) {
162-
let start = window[0];
163-
let end = window[1];
164-
let len = (end - start).to_usize().unwrap();
165-
166-
if len != 0 {
167-
// Safety: (1) the buffer is valid (2) the offsets are valid (3) the values in between are of ByteViewType
168-
unsafe {
169-
builder.append_view_unchecked(block, start.as_usize() as u32, len as u32);
170-
}
171-
} else {
172-
builder.append_null();
173-
}
174-
}
175-
builder
128+
let array_data_builder = ArrayDataBuilder::new(data_type)
129+
.len(self.len())
130+
.add_buffer(Buffer::from_vec(self.offsets))
131+
.add_buffer(Buffer::from_vec(self.values))
132+
.null_bit_buffer(null_buffer);
133+
134+
let data = match cfg!(debug_assertions) {
135+
true => array_data_builder.build().unwrap(),
136+
false => unsafe { array_data_builder.build_unchecked() },
137+
};
138+
139+
make_array(data)
176140
}
177141
}
178142

@@ -229,7 +193,7 @@ impl<I: OffsetSizeTrait> ValuesBuffer for OffsetBuffer<I> {
229193
#[cfg(test)]
230194
mod tests {
231195
use super::*;
232-
use arrow_array::{Array, LargeStringArray, StringArray, StringViewArray};
196+
use arrow_array::{Array, LargeStringArray, StringArray};
233197

234198
#[test]
235199
fn test_offset_buffer_empty() {
@@ -280,44 +244,6 @@ mod tests {
280244
);
281245
}
282246

283-
#[test]
284-
fn test_string_view() {
285-
let mut buffer = OffsetBuffer::<i32>::default();
286-
for v in [
287-
"hello",
288-
"world",
289-
"large payload over 12 bytes",
290-
"a",
291-
"b",
292-
"c",
293-
] {
294-
buffer.try_push(v.as_bytes(), false).unwrap()
295-
}
296-
let split = std::mem::take(&mut buffer);
297-
298-
let array = split.into_array(None, ArrowType::Utf8View);
299-
let strings = array.as_any().downcast_ref::<StringViewArray>().unwrap();
300-
assert_eq!(
301-
strings.iter().map(|x| x.unwrap()).collect::<Vec<_>>(),
302-
vec![
303-
"hello",
304-
"world",
305-
"large payload over 12 bytes",
306-
"a",
307-
"b",
308-
"c"
309-
]
310-
);
311-
312-
buffer.try_push("test".as_bytes(), false).unwrap();
313-
let array = buffer.into_array(None, ArrowType::Utf8View);
314-
let strings = array.as_any().downcast_ref::<StringViewArray>().unwrap();
315-
assert_eq!(
316-
strings.iter().map(|x| x.unwrap()).collect::<Vec<_>>(),
317-
vec!["test"]
318-
);
319-
}
320-
321247
#[test]
322248
fn test_offset_buffer_pad_nulls() {
323249
let mut buffer = OffsetBuffer::<i32>::default();

0 commit comments

Comments
 (0)