Skip to content

Commit

Permalink
feat: map tinyint to i8
Browse files Browse the repository at this point in the history
  • Loading branch information
WenyXu committed Nov 5, 2023
1 parent cf13577 commit 3378874
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 16 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Read [Apache ORC](https://orc.apache.org/) in Rust.
| Float, Double || | f32, f64 | Float32, Float64 |
| String, Char, and VarChar || | string | Utf8 |
| Boolean || | bool | Boolean |
| TinyInt || | u8 | Uint8 |
| TinyInt || | i8 | Int8 |
| Binary || | Vec\<u8\> | Binary |
| Decimal || | | |
| Date || | chrono::NaiveDate | Date32 |
Expand Down
10 changes: 5 additions & 5 deletions src/arrow_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use arrow::record_batch::{RecordBatch, RecordBatchReader};
use chrono::{Datelike, NaiveDate, NaiveDateTime};
use snafu::{OptionExt, ResultExt};

use self::column::tinyint::new_u8_iter;
use self::column::tinyint::new_i8_iter;
use self::column::Column;
use crate::arrow_reader::column::binary::new_binary_iterator;
use crate::arrow_reader::column::boolean::new_boolean_iter;
Expand Down Expand Up @@ -118,7 +118,7 @@ pub enum Decoder {
Int64(NullableIterator<i64>),
Int32(NullableIterator<i64>),
Int16(NullableIterator<i64>),
Uint8(NullableIterator<u8>),
Int8(NullableIterator<i8>),
Boolean(NullableIterator<bool>),
Float32(NullableIterator<f32>),
Float64(NullableIterator<f64>),
Expand Down Expand Up @@ -209,7 +209,7 @@ macro_rules! impl_decode_next_batch_cast {
impl_decode_next_batch_cast!(i64, Int64Type);
impl_decode_next_batch_cast!(i32, Int32Type);
impl_decode_next_batch_cast!(i16, Int16Type);
impl_decode_next_batch!(u8);
impl_decode_next_batch!(i8);
impl_decode_next_batch!(f32);
impl_decode_next_batch!(f64);

Expand Down Expand Up @@ -241,7 +241,7 @@ impl NaiveStripeDecoder {
Some(array) => fields.push(array),
None => break,
},
Decoder::Uint8(decoder) => match decode_next_batch_u8(decoder, chunk)? {
Decoder::Int8(decoder) => match decode_next_batch_i8(decoder, chunk)? {
Some(array) => fields.push(array),
None => break,

Check warning on line 246 in src/arrow_reader.rs

View check run for this annotation

Codecov / codecov/patch

src/arrow_reader.rs#L246

Added line #L246 was not covered by tests
},
Expand Down Expand Up @@ -335,7 +335,7 @@ impl NaiveStripeDecoder {
for col in &stripe.columns {
let decoder = match col.kind() {
crate::proto::r#type::Kind::Boolean => Decoder::Boolean(new_boolean_iter(col)?),
crate::proto::r#type::Kind::Byte => Decoder::Uint8(new_u8_iter(col)?),
crate::proto::r#type::Kind::Byte => Decoder::Int8(new_i8_iter(col)?),
crate::proto::r#type::Kind::Short => Decoder::Int16(new_i64_iter(col)?),
crate::proto::r#type::Kind::Int => Decoder::Int32(new_i64_iter(col)?),
crate::proto::r#type::Kind::Long => Decoder::Int64(new_i64_iter(col)?),
Expand Down
7 changes: 5 additions & 2 deletions src/arrow_reader/column/tinyint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@ use crate::error::{InvalidColumnSnafu, Result};
use crate::proto::stream::Kind;
use crate::reader::decode::byte_rle::ByteRleIter;

pub fn new_u8_iter(column: &Column) -> Result<NullableIterator<u8>> {
pub fn new_i8_iter(column: &Column) -> Result<NullableIterator<i8>> {
let present = new_present_iter(column)?.collect::<Result<Vec<_>>>()?;
let rows: usize = present.iter().filter(|&p| *p).count();

let iter = column
.stream(Kind::Data)
.transpose()?
.map(|reader| Box::new(ByteRleIter::new(reader, rows)) as _)
.map(|reader| {
Box::new(ByteRleIter::new(reader, rows).map(|value| value.map(|value| value as i8)))
as _
})
.context(InvalidColumnSnafu { name: &column.name })?;

Ok(NullableIterator {
Expand Down
Binary file modified tests/basic/data/test.orc
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/basic/data/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"utf8_decrease": ["eeeee", "dddd", "ccc", "bb", "a"],
"timestamp_simple": [datetime.datetime(2023, 4, 1, 20, 15, 30, 2000), datetime.datetime.fromtimestamp(int('1629617204525777000')/1000000000), datetime.datetime(2023, 1, 1), datetime.datetime(2023, 2, 1), datetime.datetime(2023, 3, 1)],
"date_simple": [datetime.date(2023, 4, 1), datetime.date(2023, 3, 1), datetime.date(2023, 1, 1), datetime.date(2023, 2, 1), datetime.date(2023, 3, 1)],
"tinyint_simple": [0, None, 1, 128, 255]
"tinyint_simple": [-1, None, 1, 127, -127]
}

def infer_schema(data):
Expand Down
13 changes: 6 additions & 7 deletions tests/basic/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,11 +232,11 @@ pub fn basic_test_0() {
let expected = r#"+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+----------------+
| a | b | str_direct | d | e | f | int_short_repeated | int_neg_short_repeated | int_delta | int_neg_delta | int_direct | int_neg_direct | bigint_direct | bigint_neg_direct | bigint_other | utf8_increase | utf8_decrease | timestamp_simple | date_simple | tinyint_simple |
+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+----------------+
| 1.0 | true | a | a | ddd | aaaaa | 5 | -5 | 1 | 5 | 1 | -1 | 1 | -1 | 5 | a | eeeee | 2023-04-01T20:15:30.002 | 2023-04-01 | 0 |
| 1.0 | true | a | a | ddd | aaaaa | 5 | -5 | 1 | 5 | 1 | -1 | 1 | -1 | 5 | a | eeeee | 2023-04-01T20:15:30.002 | 2023-04-01 | -1 |
| 2.0 | false | cccccc | bb | cc | bbbbb | 5 | -5 | 2 | 4 | 6 | -6 | 6 | -6 | -5 | bb | dddd | 2021-08-22T07:26:44.525777 | 2023-03-01 | |
| | | | | | | | | | | | | | | 1 | ccc | ccc | 2023-01-01T00:00:00 | 2023-01-01 | 1 |
| 4.0 | true | ddd | ccc | bb | ccccc | 5 | -5 | 4 | 2 | 3 | -3 | 3 | -3 | 5 | dddd | bb | 2023-02-01T00:00:00 | 2023-02-01 | 128 |
| 5.0 | false | ee | ddd | a | ddddd | 5 | -5 | 5 | 1 | 2 | -2 | 2 | -2 | 5 | eeeee | a | 2023-03-01T00:00:00 | 2023-03-01 | 255 |
| 4.0 | true | ddd | ccc | bb | ccccc | 5 | -5 | 4 | 2 | 3 | -3 | 3 | -3 | 5 | dddd | bb | 2023-02-01T00:00:00 | 2023-02-01 | 127 |
| 5.0 | false | ee | ddd | a | ddddd | 5 | -5 | 5 | 1 | 2 | -2 | 2 | -2 | 5 | eeeee | a | 2023-03-01T00:00:00 | 2023-03-01 | -127 |
+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+----------------+"#;
assert_eq!(
expected,
Expand All @@ -253,13 +253,12 @@ pub async fn async_basic_test_0() {
let expected = r#"+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+----------------+
| a | b | str_direct | d | e | f | int_short_repeated | int_neg_short_repeated | int_delta | int_neg_delta | int_direct | int_neg_direct | bigint_direct | bigint_neg_direct | bigint_other | utf8_increase | utf8_decrease | timestamp_simple | date_simple | tinyint_simple |
+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+----------------+
| 1.0 | true | a | a | ddd | aaaaa | 5 | -5 | 1 | 5 | 1 | -1 | 1 | -1 | 5 | a | eeeee | 2023-04-01T20:15:30.002 | 2023-04-01 | 0 |
| 1.0 | true | a | a | ddd | aaaaa | 5 | -5 | 1 | 5 | 1 | -1 | 1 | -1 | 5 | a | eeeee | 2023-04-01T20:15:30.002 | 2023-04-01 | -1 |
| 2.0 | false | cccccc | bb | cc | bbbbb | 5 | -5 | 2 | 4 | 6 | -6 | 6 | -6 | -5 | bb | dddd | 2021-08-22T07:26:44.525777 | 2023-03-01 | |
| | | | | | | | | | | | | | | 1 | ccc | ccc | 2023-01-01T00:00:00 | 2023-01-01 | 1 |
| 4.0 | true | ddd | ccc | bb | ccccc | 5 | -5 | 4 | 2 | 3 | -3 | 3 | -3 | 5 | dddd | bb | 2023-02-01T00:00:00 | 2023-02-01 | 128 |
| 5.0 | false | ee | ddd | a | ddddd | 5 | -5 | 5 | 1 | 2 | -2 | 2 | -2 | 5 | eeeee | a | 2023-03-01T00:00:00 | 2023-03-01 | 255 |
| 4.0 | true | ddd | ccc | bb | ccccc | 5 | -5 | 4 | 2 | 3 | -3 | 3 | -3 | 5 | dddd | bb | 2023-02-01T00:00:00 | 2023-02-01 | 127 |
| 5.0 | false | ee | ddd | a | ddddd | 5 | -5 | 5 | 1 | 2 | -2 | 2 | -2 | 5 | eeeee | a | 2023-03-01T00:00:00 | 2023-03-01 | -127 |
+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+----------------+"#;

assert_eq!(
expected,
pretty::pretty_format_batches(&batch).unwrap().to_string()
Expand Down

0 comments on commit 3378874

Please sign in to comment.