From 6e9f95747bb3a12265cfe7e71420a30ec833f907 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Sun, 22 Sep 2024 22:27:38 +1000 Subject: [PATCH] Simplify retrieval of timestamp decoder --- src/array_decoder/timestamp.rs | 74 +++++++++++++--------------------- 1 file changed, 29 insertions(+), 45 deletions(-) diff --git a/src/array_decoder/timestamp.rs b/src/array_decoder/timestamp.rs index dae7f436..8062719d 100644 --- a/src/array_decoder/timestamp.rs +++ b/src/array_decoder/timestamp.rs @@ -44,12 +44,11 @@ const NANOSECOND_DIGITS: i8 = 9; /// point for all timestamp values, to the UNIX epoch of 1 January 1970. const ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH: i64 = 1_420_070_400; -fn get_timestamp_decoder( +fn get_inner_timestamp_decoder( column: &Column, stripe: &Stripe, seconds_since_unix_epoch: i64, - is_instant: bool, -) -> Result> { +) -> Result> { let data = stripe.stream_map().get(column, Kind::Data); let data = get_rle_reader(column, data)?; @@ -64,17 +63,31 @@ fn get_timestamp_decoder( data, secondary, )); - let inner = PrimitiveArrayDecoder::::new(iter, present); - if is_instant { - Ok(Box::new(TimestampInstantArrayDecoder(inner))) - } else { - match stripe.writer_tz() { - Some(writer_tz) => Ok(Box::new(TimestampOffsetArrayDecoder { inner, writer_tz })), - None => Ok(Box::new(inner)), - } + Ok(PrimitiveArrayDecoder::::new(iter, present)) +} + +fn get_timestamp_decoder( + column: &Column, + stripe: &Stripe, + seconds_since_unix_epoch: i64, +) -> Result> { + let inner = get_inner_timestamp_decoder::(column, stripe, seconds_since_unix_epoch)?; + match stripe.writer_tz() { + Some(writer_tz) => Ok(Box::new(TimestampOffsetArrayDecoder { inner, writer_tz })), + None => Ok(Box::new(inner)), } } +fn get_timestamp_instant_decoder( + column: &Column, + stripe: &Stripe, +) -> Result> { + // TIMESTAMP_INSTANT is encoded as UTC so we don't check writer timezone in stripe + let inner = + get_inner_timestamp_decoder::(column, stripe, ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH)?; + Ok(Box::new(TimestampInstantArrayDecoder(inner))) +} + fn decimal128_decoder( column: &Column, stripe: &Stripe, @@ -151,19 +164,13 @@ pub fn new_timestamp_decoder( match field_type { ArrowDataType::Timestamp(TimeUnit::Second, None) => { - get_timestamp_decoder::( - column, - stripe, - seconds_since_unix_epoch, - false, - ) + get_timestamp_decoder::(column, stripe, seconds_since_unix_epoch) } ArrowDataType::Timestamp(TimeUnit::Millisecond, None) => { get_timestamp_decoder::( column, stripe, seconds_since_unix_epoch, - false, ) } ArrowDataType::Timestamp(TimeUnit::Microsecond, None) => { @@ -171,7 +178,6 @@ pub fn new_timestamp_decoder( column, stripe, seconds_since_unix_epoch, - false, ) } ArrowDataType::Timestamp(TimeUnit::Nanosecond, None) => { @@ -179,7 +185,6 @@ pub fn new_timestamp_decoder( column, stripe, seconds_since_unix_epoch, - false, ) } ArrowDataType::Decimal128(Decimal128Type::MAX_PRECISION, NANOSECOND_DIGITS) => { @@ -206,38 +211,17 @@ pub fn new_timestamp_instant_decoder( stripe: &Stripe, ) -> Result> { match field_type { - // TIMESTAMP_INSTANT is encoded as UTC so we don't check writer timezone in stripe ArrowDataType::Timestamp(TimeUnit::Second, Some(tz)) if tz.as_ref() == "UTC" => { - get_timestamp_decoder::( - column, - stripe, - ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH, - true, - ) + get_timestamp_instant_decoder::(column, stripe) } ArrowDataType::Timestamp(TimeUnit::Millisecond, Some(tz)) if tz.as_ref() == "UTC" => { - get_timestamp_decoder::( - column, - stripe, - ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH, - true, - ) + get_timestamp_instant_decoder::(column, stripe) } ArrowDataType::Timestamp(TimeUnit::Microsecond, Some(tz)) if tz.as_ref() == "UTC" => { - get_timestamp_decoder::( - column, - stripe, - ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH, - true, - ) + get_timestamp_instant_decoder::(column, stripe) } ArrowDataType::Timestamp(TimeUnit::Nanosecond, Some(tz)) if tz.as_ref() == "UTC" => { - get_timestamp_decoder::( - column, - stripe, - ORC_EPOCH_UTC_SECONDS_SINCE_UNIX_EPOCH, - true, - ) + get_timestamp_instant_decoder::(column, stripe) } ArrowDataType::Timestamp(_, Some(_)) => UnsupportedTypeVariantSnafu { msg: "Non-UTC Arrow timestamps",