diff --git a/src/arrow_reader/decoder/mod.rs b/src/arrow_reader/decoder/mod.rs index 0febbdb0..1cb960f7 100644 --- a/src/arrow_reader/decoder/mod.rs +++ b/src/arrow_reader/decoder/mod.rs @@ -52,10 +52,10 @@ impl PrimitiveArrayDecoder { ) -> Result> { let present = derive_present_vec(&mut self.present, parent_present, batch_size); - match &present { + match present { Some(present) => { let mut builder = PrimitiveBuilder::::with_capacity(batch_size); - for &is_present in present { + for is_present in present { if is_present { // TODO: return as error instead let val = self @@ -128,10 +128,10 @@ impl ArrayBatchDecoder for BooleanArrayDecoder { ) -> Result { let present = derive_present_vec(&mut self.present, parent_present, batch_size); - match &present { + match present { Some(present) => { let mut builder = BooleanBuilder::with_capacity(batch_size); - for &is_present in present { + for is_present in present { if is_present { // TODO: return as error instead let val = self diff --git a/src/arrow_reader/decoder/string.rs b/src/arrow_reader/decoder/string.rs index 5af2417d..0c4d3e0d 100644 --- a/src/arrow_reader/decoder/string.rs +++ b/src/arrow_reader/decoder/string.rs @@ -173,7 +173,13 @@ impl ArrayBatchDecoder for DictionaryStringArrayDecoder { let keys = self .indexes .next_primitive_batch(batch_size, parent_present)?; + // TODO: ORC spec states: For dictionary encodings the dictionary is sorted + // (in lexicographical order of bytes in the UTF-8 encodings). + // So we can set the is_ordered property here? let array = DictionaryArray::try_new(keys, self.dictionary.clone()).context(ArrowSnafu)?; + // Cast back to StringArray to ensure all stripes have consistent datatype + // TODO: Is there anyway to preserve the dictionary encoding? + // This costs performance. let array = cast(&array, &DataType::Utf8).context(ArrowSnafu)?; let array = Arc::new(array);