-
Notifications
You must be signed in to change notification settings - Fork 596
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(types): support ns timestamp #19827
Open
xxhZs
wants to merge
9
commits into
main
Choose a base branch
from
xxh/support-timestamp-ns-1
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+226
−36
Open
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
efcb97b
support
xxhZs 4aad830
fix comm
xxhZs 9bf25a9
Merge branch 'main' into xxh/support-timestamp-ns-1
wcy-fdu a6acfc2
add ci
xxhZs 026e08a
fix comm
xxhZs f1cb8ab
fix fmt
xxhZs d09d463
add doc + fix ci
xxhZs 2c87c76
fix ci
xxhZs d975bf0
fix ci
xxhZs File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
statement ok | ||
SET RW_IMPLICIT_FLUSH TO true; | ||
|
||
statement ok | ||
create table t1(v1 int, v2 timestamp); | ||
|
||
statement ok | ||
insert into t1 values(1,'2013-01-01 01:01:01.123456789'),(2,'2012-01-01 01:01:01.123456'),(3,'0000-01-01 01:01:01.123456789'),(4,'2213-01-01 01:01:01.123456789'),(5,null); | ||
|
||
query T rowsort | ||
select * from t1; | ||
---- | ||
1 2013-01-01 01:01:01.123456789 | ||
2 2012-01-01 01:01:01.123456 | ||
3 0001-01-01 01:01:01.123456789 BC | ||
4 2213-01-01 01:01:01.123456789 | ||
5 NULL | ||
|
||
query T | ||
select * from t1 where v2 is null; | ||
---- | ||
5 NULL | ||
|
||
query T rowsort | ||
select v1, v2, | ||
case | ||
when extract(year from v2) < 2000 then 'Before 2000' | ||
when extract(year from v2) >= 2000 and extract(year from v2) < 2100 then '21st Century' | ||
else 'Future' | ||
end as time_period | ||
from t1; | ||
---- | ||
1 2013-01-01 01:01:01.123456789 21st Century | ||
2 2012-01-01 01:01:01.123456 21st Century | ||
3 0001-01-01 01:01:01.123456789 BC Before 2000 | ||
4 2213-01-01 01:01:01.123456789 Future | ||
5 NULL Future | ||
|
||
query T rowsort | ||
select v1, v2, coalesce(v2, '1900-01-01 00:00:00') as coalesce_v2 from t1; | ||
---- | ||
1 2013-01-01 01:01:01.123456789 2013-01-01 01:01:01.123456789 | ||
2 2012-01-01 01:01:01.123456 2012-01-01 01:01:01.123456 | ||
3 0001-01-01 01:01:01.123456789 BC 0001-01-01 01:01:01.123456789 BC | ||
4 2213-01-01 01:01:01.123456789 2213-01-01 01:01:01.123456789 | ||
5 NULL 1900-01-01 00:00:00 | ||
|
||
query T | ||
select count(v2) as total_rows from t1; | ||
---- | ||
4 | ||
|
||
query T rowsort | ||
select * from t1 order by v2; | ||
---- | ||
1 2013-01-01 01:01:01.123456789 | ||
2 2012-01-01 01:01:01.123456 | ||
3 0001-01-01 01:01:01.123456789 BC | ||
4 2213-01-01 01:01:01.123456789 | ||
5 NULL | ||
|
||
query T rowsort | ||
select * from t1 where v2 >= '2012-01-01 01:01:01.123456'; | ||
---- | ||
1 2013-01-01 01:01:01.123456789 | ||
2 2012-01-01 01:01:01.123456 | ||
4 2213-01-01 01:01:01.123456789 | ||
|
||
query T rowsort | ||
select v1, cast(v2 as date) as date_v2, cast(v2 as timestamp with time zone) as timestamptz_v2 from t1; | ||
---- | ||
1 2013-01-01 2013-01-01 01:01:01.123456+00:00 | ||
2 2012-01-01 2012-01-01 01:01:01.123456+00:00 | ||
3 0001-01-01 BC 0001-01-01 01:01:01.123456+00:00 BC | ||
4 2213-01-01 2213-01-01 01:01:01.123456+00:00 | ||
5 NULL NULL | ||
|
||
query T rowsort | ||
select v1, date_trunc('day', v2) AS truncated_v2 from t1; | ||
---- | ||
1 2013-01-01 00:00:00 | ||
2 2012-01-01 00:00:00 | ||
3 0001-01-01 00:00:00 BC | ||
4 2213-01-01 00:00:00 | ||
5 NULL | ||
|
||
query T rowsort | ||
select v1, v2 at time zone 'UTC' as v2_utc from t1; | ||
---- | ||
1 2013-01-01 01:01:01.123456+00:00 | ||
2 2012-01-01 01:01:01.123456+00:00 | ||
3 0001-01-01 01:01:01.123456+00:00 BC | ||
4 2213-01-01 01:01:01.123456+00:00 | ||
5 NULL | ||
|
||
query T rowsort | ||
select v1, to_char(v2, 'YYYY-MM-DD HH24:MI:SS.NS') as formatted_v2 from t1; | ||
---- | ||
1 2013-01-01 01:01:01.123456789 | ||
2 2012-01-01 01:01:01.123456000 | ||
3 0000-01-01 01:01:01.123456789 | ||
4 2213-01-01 01:01:01.123456789 | ||
5 NULL | ||
|
||
query T rowsort | ||
select generate_series('2013-01-01 01:01:01.123456789'::timestamp,'2013-01-01 01:01:05.123456790'::timestamp, '1 s'); | ||
---- | ||
2013-01-01 01:01:01.123456789 | ||
2013-01-01 01:01:02.123456789 | ||
2013-01-01 01:01:03.123456789 | ||
2013-01-01 01:01:04.123456789 | ||
2013-01-01 01:01:05.123456789 | ||
|
||
statement ok | ||
drop table t1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -168,29 +168,18 @@ impl FromStr for Timestamp { | |
type Err = InvalidParamsError; | ||
|
||
fn from_str(s: &str) -> Result<Self> { | ||
if let Ok(res) = speedate::DateTime::parse_str_rfc3339(s) { | ||
if res.time.tz_offset.is_some() { | ||
return Err(ErrorKind::ParseTimestamp.into()); | ||
} | ||
Ok(Date::from_ymd_uncheck( | ||
res.date.year as i32, | ||
res.date.month as u32, | ||
res.date.day as u32, | ||
) | ||
.and_hms_micro_uncheck( | ||
res.time.hour as u32, | ||
res.time.minute as u32, | ||
res.time.second as u32, | ||
res.time.microsecond, | ||
)) | ||
} else { | ||
let res = | ||
speedate::Date::parse_str_rfc3339(s).map_err(|_| ErrorKind::ParseTimestamp)?; | ||
Ok( | ||
Date::from_ymd_uncheck(res.year as i32, res.month as u32, res.day as u32) | ||
.and_hms_micro_uncheck(0, 0, 0, 0), | ||
) | ||
} | ||
let dt = s | ||
.parse::<jiff::civil::DateTime>() | ||
.map_err(|_| ErrorKind::ParseTimestamp)?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Error message of |
||
Ok( | ||
Date::from_ymd_uncheck(dt.year() as i32, dt.month() as u32, dt.day() as u32) | ||
.and_hms_nano_uncheck( | ||
dt.hour() as u32, | ||
dt.minute() as u32, | ||
dt.second() as u32, | ||
dt.subsec_nanosecond() as u32, | ||
), | ||
) | ||
} | ||
} | ||
|
||
|
@@ -422,6 +411,13 @@ impl Date { | |
.and_time(Time::from_hms_micro_uncheck(hour, min, sec, micro).0), | ||
) | ||
} | ||
|
||
pub fn and_hms_nano_uncheck(self, hour: u32, min: u32, sec: u32, nano: u32) -> Timestamp { | ||
Timestamp::new( | ||
self.0 | ||
.and_time(Time::from_hms_nano_uncheck(hour, min, sec, nano).0), | ||
) | ||
} | ||
} | ||
|
||
impl Time { | ||
|
@@ -485,6 +481,41 @@ impl Time { | |
} | ||
} | ||
|
||
// The first 64 bits of protobuf encoding for `Timestamp` type has 2 possible meanings. | ||
// * When the highest 2 bits are `11` or `00` (i.e. values ranging from `0b1100...00` to `0b0011..11`), | ||
// it is *microseconds* since 1970-01-01 midnight. 2^62 microseconds covers 146235 years. | ||
// * When the highest 2 bits are `10` or `01`, we flip the second bit to get values from `0b1100...00` to `0b0011..11` again. | ||
// It is *seconds* since 1970-01-01 midnight. It is then followed by another 32 bits as nanoseconds within a second. | ||
enum FirstI64 { | ||
V0 { usecs: i64 }, | ||
V1 { secs: i64 }, | ||
} | ||
impl FirstI64 { | ||
pub fn to_protobuf(&self) -> i64 { | ||
match self { | ||
FirstI64::V0 { usecs } => *usecs, | ||
FirstI64::V1 { secs } => secs ^ (0b01 << 62), | ||
} | ||
} | ||
|
||
pub fn from_protobuf(cur: &mut Cursor<&[u8]>) -> ArrayResult<FirstI64> { | ||
let value = cur | ||
.read_i64::<BigEndian>() | ||
.context("failed to read i64 from Time buffer")?; | ||
if Self::is_v1_format_state(value) { | ||
let secs = value ^ (0b01 << 62); | ||
Ok(FirstI64::V1 { secs }) | ||
} else { | ||
Ok(FirstI64::V0 { usecs: value }) | ||
} | ||
} | ||
|
||
fn is_v1_format_state(value: i64) -> bool { | ||
let state = (value >> 62) & 0b11; | ||
state == 0b10 || state == 0b01 | ||
} | ||
} | ||
|
||
impl Timestamp { | ||
pub fn with_secs_nsecs(secs: i64, nsecs: u32) -> Result<Self> { | ||
Ok(Timestamp::new({ | ||
|
@@ -495,18 +526,34 @@ impl Timestamp { | |
} | ||
|
||
pub fn from_protobuf(cur: &mut Cursor<&[u8]>) -> ArrayResult<Timestamp> { | ||
let micros = cur | ||
.read_i64::<BigEndian>() | ||
.context("failed to read i64 from Timestamp buffer")?; | ||
|
||
Ok(Timestamp::with_micros(micros)?) | ||
match FirstI64::from_protobuf(cur)? { | ||
FirstI64::V0 { usecs } => Ok(Timestamp::with_micros(usecs)?), | ||
FirstI64::V1 { secs } => { | ||
let nsecs = cur | ||
.read_u32::<BigEndian>() | ||
.context("failed to read u32 from Time buffer")?; | ||
Ok(Timestamp::with_secs_nsecs(secs, nsecs)?) | ||
} | ||
} | ||
} | ||
|
||
/// Although `Timestamp` takes 12 bytes, we drop 4 bytes in protobuf encoding. | ||
// Since timestamp secs is much smaller than i64, we use the highest 2 bit to store the format information, which is compatible with the old format. | ||
// New format: secs(i64) + nsecs(u32) | ||
// Old format: micros(i64) | ||
pub fn to_protobuf<T: Write>(self, output: &mut T) -> ArrayResult<usize> { | ||
output | ||
.write(&(self.0.and_utc().timestamp_micros()).to_be_bytes()) | ||
.map_err(Into::into) | ||
let timestamp_size = output | ||
.write( | ||
&(FirstI64::V1 { | ||
secs: self.0.and_utc().timestamp(), | ||
} | ||
.to_protobuf()) | ||
.to_be_bytes(), | ||
) | ||
.map_err(Into::<ArrayError>::into)?; | ||
let timestamp_subsec_nanos_size = output | ||
.write(&(self.0.and_utc().timestamp_subsec_nanos()).to_be_bytes()) | ||
.map_err(Into::<ArrayError>::into)?; | ||
Ok(timestamp_subsec_nanos_size + timestamp_size) | ||
xxhZs marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
pub fn get_timestamp_nanos(&self) -> i64 { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need to introduce yet another dependency in addition to
chrono
andspeedate
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because chrono can't be parsed, and speedate is only parsed at the us level