Skip to content

Commit

Permalink
Switch ByteRleReader to emit i8 instead of u8
Browse files Browse the repository at this point in the history
Most cases we want i8 (For Int8Array, Union types) so we have to cast
from u8 to i8 which isn't ergonomic. Only cases we need u8 is for
boolean, but that can probably be refactored later.
  • Loading branch information
Jefffrey committed Sep 21, 2024
1 parent e5b0645 commit d87734b
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 15 deletions.
3 changes: 1 addition & 2 deletions src/array_decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,7 @@ pub fn array_decoder_factory(
}
);
let iter = stripe.stream_map().get(column, Kind::Data);
let iter =
Box::new(ByteRleReader::new(iter).map(|value| value.map(|value| value as i8)));
let iter = Box::new(ByteRleReader::new(iter));
let present = get_present_vec(column, stripe)?
.map(|iter| Box::new(iter.into_iter()) as Box<dyn Iterator<Item = bool> + Send>);
Box::new(Int8ArrayDecoder::new(iter, present))
Expand Down
2 changes: 1 addition & 1 deletion src/array_decoder/union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pub struct UnionArrayDecoder {
// TODO: encode this assumption into types
fields: UnionFields,
variants: Vec<Box<dyn ArrayBatchDecoder>>,
tags: Box<dyn Iterator<Item = Result<u8>> + Send>,
tags: Box<dyn Iterator<Item = Result<i8>> + Send>,
present: Option<Box<dyn Iterator<Item = bool> + Send>>,
}

Expand Down
2 changes: 1 addition & 1 deletion src/encoding/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ impl<R: Read> Iterator for BooleanIter<R> {
if self.bits_in_data == 0 {
match self.iter.next() {
Some(Ok(data)) => {
self.data = data;
self.data = data as u8;
self.bits_in_data = 8;
Some(Ok(self.value()))
}
Expand Down
21 changes: 10 additions & 11 deletions src/encoding/byte.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ impl<R: Read> ByteRleReader<R> {
}

impl<R: Read> Iterator for ByteRleReader<R> {
type Item = Result<u8>;
type Item = Result<i8>;

fn next(&mut self) -> Option<Self::Item> {
if self.used == self.num_literals {
Expand All @@ -241,7 +241,7 @@ impl<R: Read> Iterator for ByteRleReader<R> {
self.literals[self.used]
};
self.used += 1;
Some(Ok(result))
Some(Ok(result as i8))
}
}

Expand Down Expand Up @@ -277,10 +277,9 @@ mod tests {
assert_eq!(iter, vec![0x44, 0x45]);
}

fn roundtrip_byte_rle_helper(values: &[u8]) -> Result<Vec<u8>> {
fn roundtrip_byte_rle_helper(values: &[i8]) -> Result<Vec<i8>> {
let mut writer = ByteRleWriter::new();
let values = values.iter().map(|&b| b as i8).collect::<Vec<_>>();
writer.write_slice(&values);
writer.write_slice(values);
writer.flush();

let buf = writer.take_inner();
Expand All @@ -291,19 +290,19 @@ mod tests {

#[derive(Debug, Clone)]
enum ByteSequence {
Run(u8, usize),
Literals(Vec<u8>),
Run(i8, usize),
Literals(Vec<i8>),
}

fn byte_sequence_strategy() -> impl Strategy<Value = ByteSequence> {
// We limit the max length of the sequences to 140 to try get more interleaving
prop_oneof![
(any::<u8>(), 1..140_usize).prop_map(|(a, b)| ByteSequence::Run(a, b)),
prop::collection::vec(any::<u8>(), 1..140).prop_map(ByteSequence::Literals)
(any::<i8>(), 1..140_usize).prop_map(|(a, b)| ByteSequence::Run(a, b)),
prop::collection::vec(any::<i8>(), 1..140).prop_map(ByteSequence::Literals)
]
}

fn generate_bytes_from_sequences(sequences: Vec<ByteSequence>) -> Vec<u8> {
fn generate_bytes_from_sequences(sequences: Vec<ByteSequence>) -> Vec<i8> {
let mut bytes = vec![];
for sequence in sequences {
match sequence {
Expand All @@ -318,7 +317,7 @@ mod tests {

proptest! {
#[test]
fn roundtrip_byte_rle_pure_random(values: Vec<u8>) {
fn roundtrip_byte_rle_pure_random(values: Vec<i8>) {
// Biased towards literal sequences due to purely random values
let out = roundtrip_byte_rle_helper(&values).unwrap();
prop_assert_eq!(out, values);
Expand Down

0 comments on commit d87734b

Please sign in to comment.