@@ -20,11 +20,7 @@ use arrow_array::ArrowPrimitiveType;
20
20
use arrow_schema:: ArrowError ;
21
21
use chrono:: prelude:: * ;
22
22
23
- /// Accepts a string in RFC3339 / ISO8601 standard format and some
24
- /// variants and converts it to a nanosecond precision timestamp.
25
- ///
26
- /// Implements the `to_timestamp` function to convert a string to a
27
- /// timestamp, following the model of spark SQL’s to_`timestamp`.
23
+ /// Accepts a string and parses it relative to the provided `timezone`
28
24
///
29
25
/// In addition to RFC3339 / ISO8601 standard timestamps, it also
30
26
/// accepts strings that use a space ` ` to separate the date and time
@@ -38,36 +34,6 @@ use chrono::prelude::*;
38
34
/// * `1997-01-31 09:26:56.123` # close to RCF3339 but uses a space and no timezone offset
39
35
/// * `1997-01-31 09:26:56` # close to RCF3339, no fractional seconds
40
36
/// * `1997-01-31` # close to RCF3339, only date no time
41
- //
42
- /// Internally, this function uses the `chrono` library for the
43
- /// datetime parsing
44
- ///
45
- /// We hope to extend this function in the future with a second
46
- /// parameter to specifying the format string.
47
- ///
48
- /// ## Timestamp Precision
49
- ///
50
- /// Function uses the maximum precision timestamps supported by
51
- /// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
52
- /// means the range of dates that timestamps can represent is ~1677 AD
53
- /// to 2262 AM
54
- ///
55
- ///
56
- /// ## Timezone / Offset Handling
57
- ///
58
- /// Numerical values of timestamps are stored compared to offset UTC.
59
- ///
60
- /// This function interprets strings without an explicit time zone as
61
- /// timestamps with offsets of the local time on the machine
62
- ///
63
- /// For example, `1997-01-31 09:26:56.123Z` is interpreted as UTC, as
64
- /// it has an explicit timezone specifier (“Z” for Zulu/UTC)
65
- ///
66
- /// `1997-01-31T09:26:56.123` is interpreted as a local timestamp in
67
- /// the timezone of the machine. For example, if
68
- /// the system timezone is set to Americas/New_York (UTC-5) the
69
- /// timestamp will be interpreted as though it were
70
- /// `1997-01-31T09:26:56.123-05:00`
71
37
///
72
38
/// Some formats that supported by PostgresSql <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-DATETIME-TIME-TABLE>
73
39
/// still not supported by chrono, like
@@ -76,12 +42,14 @@ use chrono::prelude::*;
76
42
/// "2023-01-01 040506 +07:30:00",
77
43
/// "2023-01-01 04:05:06.789 PST",
78
44
/// "2023-01-01 04:05:06.789 -08",
79
- #[ inline]
80
- pub fn string_to_timestamp_nanos ( s : & str ) -> Result < i64 , ArrowError > {
45
+ pub fn string_to_datetime < T : TimeZone > (
46
+ timezone : & T ,
47
+ s : & str ,
48
+ ) -> Result < DateTime < T > , ArrowError > {
81
49
// Fast path: RFC3339 timestamp (with a T)
82
50
// Example: 2020-09-08T13:42:29.190855Z
83
51
if let Ok ( ts) = DateTime :: parse_from_rfc3339 ( s) {
84
- return Ok ( ts. timestamp_nanos ( ) ) ;
52
+ return Ok ( ts. with_timezone ( timezone ) ) ;
85
53
}
86
54
87
55
// Implement quasi-RFC3339 support by trying to parse the
@@ -96,14 +64,14 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
96
64
97
65
for f in supported_formats. iter ( ) {
98
66
if let Ok ( ts) = DateTime :: parse_from_str ( s, f) {
99
- return to_timestamp_nanos ( ts. naive_utc ( ) ) ;
67
+ return Ok ( ts. with_timezone ( timezone ) ) ;
100
68
}
101
69
}
102
70
103
71
// with an explicit Z, using ' ' as a separator
104
72
// Example: 2020-09-08 13:42:29Z
105
73
if let Ok ( ts) = Utc . datetime_from_str ( s, "%Y-%m-%d %H:%M:%S%.fZ" ) {
106
- return to_timestamp_nanos ( ts. naive_utc ( ) ) ;
74
+ return Ok ( ts. with_timezone ( timezone ) ) ;
107
75
}
108
76
109
77
// Support timestamps without an explicit timezone offset, again
@@ -112,34 +80,44 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
112
80
// without a timezone specifier as a local time, using T as a separator
113
81
// Example: 2020-09-08T13:42:29.190855
114
82
if let Ok ( ts) = NaiveDateTime :: parse_from_str ( s, "%Y-%m-%dT%H:%M:%S%.f" ) {
115
- return to_timestamp_nanos ( ts) ;
83
+ if let Some ( offset) = timezone. offset_from_local_datetime ( & ts) . single ( ) {
84
+ return Ok ( DateTime :: from_local ( ts, offset) ) ;
85
+ }
116
86
}
117
87
118
88
// without a timezone specifier as a local time, using T as a
119
89
// separator, no fractional seconds
120
90
// Example: 2020-09-08T13:42:29
121
91
if let Ok ( ts) = NaiveDateTime :: parse_from_str ( s, "%Y-%m-%dT%H:%M:%S" ) {
122
- return Ok ( ts. timestamp_nanos ( ) ) ;
92
+ if let Some ( offset) = timezone. offset_from_local_datetime ( & ts) . single ( ) {
93
+ return Ok ( DateTime :: from_local ( ts, offset) ) ;
94
+ }
123
95
}
124
96
125
97
// without a timezone specifier as a local time, using ' ' as a separator
126
98
// Example: 2020-09-08 13:42:29.190855
127
99
if let Ok ( ts) = NaiveDateTime :: parse_from_str ( s, "%Y-%m-%d %H:%M:%S%.f" ) {
128
- return to_timestamp_nanos ( ts) ;
100
+ if let Some ( offset) = timezone. offset_from_local_datetime ( & ts) . single ( ) {
101
+ return Ok ( DateTime :: from_local ( ts, offset) ) ;
102
+ }
129
103
}
130
104
131
105
// without a timezone specifier as a local time, using ' ' as a
132
106
// separator, no fractional seconds
133
107
// Example: 2020-09-08 13:42:29
134
108
if let Ok ( ts) = NaiveDateTime :: parse_from_str ( s, "%Y-%m-%d %H:%M:%S" ) {
135
- return Ok ( ts. timestamp_nanos ( ) ) ;
109
+ if let Some ( offset) = timezone. offset_from_local_datetime ( & ts) . single ( ) {
110
+ return Ok ( DateTime :: from_local ( ts, offset) ) ;
111
+ }
136
112
}
137
113
138
114
// without a timezone specifier as a local time, only date
139
115
// Example: 2020-09-08
140
116
if let Ok ( dt) = NaiveDate :: parse_from_str ( s, "%Y-%m-%d" ) {
141
117
if let Some ( ts) = dt. and_hms_opt ( 0 , 0 , 0 ) {
142
- return Ok ( ts. timestamp_nanos ( ) ) ;
118
+ if let Some ( offset) = timezone. offset_from_local_datetime ( & ts) . single ( ) {
119
+ return Ok ( DateTime :: from_local ( ts, offset) ) ;
120
+ }
143
121
}
144
122
}
145
123
@@ -153,6 +131,42 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
153
131
) ) )
154
132
}
155
133
134
+ /// Accepts a string in RFC3339 / ISO8601 standard format and some
135
+ /// variants and converts it to a nanosecond precision timestamp.
136
+ ///
137
+ /// See [`string_to_datetime`] for the full set of supported formats
138
+ ///
139
+ /// Implements the `to_timestamp` function to convert a string to a
140
+ /// timestamp, following the model of spark SQL’s to_`timestamp`.
141
+ ///
142
+ /// Internally, this function uses the `chrono` library for the
143
+ /// datetime parsing
144
+ ///
145
+ /// We hope to extend this function in the future with a second
146
+ /// parameter to specifying the format string.
147
+ ///
148
+ /// ## Timestamp Precision
149
+ ///
150
+ /// Function uses the maximum precision timestamps supported by
151
+ /// Arrow (nanoseconds stored as a 64-bit integer) timestamps. This
152
+ /// means the range of dates that timestamps can represent is ~1677 AD
153
+ /// to 2262 AM
154
+ ///
155
+ /// ## Timezone / Offset Handling
156
+ ///
157
+ /// Numerical values of timestamps are stored compared to offset UTC.
158
+ ///
159
+ /// This function interprets string without an explicit time zone as timestamps
160
+ /// relative to UTC, see [`string_to_datetime`] for alternative semantics
161
+ ///
162
+ /// For example, both `1997-01-31 09:26:56.123Z`, `1997-01-31T09:26:56.123`,
163
+ /// and `1997-01-31T14:26:56.123-05:00` will be parsed as the same value
164
+ ///
165
+ #[ inline]
166
+ pub fn string_to_timestamp_nanos ( s : & str ) -> Result < i64 , ArrowError > {
167
+ to_timestamp_nanos ( string_to_datetime ( & Utc , s) ?. naive_utc ( ) )
168
+ }
169
+
156
170
/// Defensive check to prevent chrono-rs panics when nanosecond conversion happens on non-supported dates
157
171
#[ inline]
158
172
fn to_timestamp_nanos ( dt : NaiveDateTime ) -> Result < i64 , ArrowError > {
@@ -448,6 +462,7 @@ impl Parser for Date64Type {
448
462
#[ cfg( test) ]
449
463
mod tests {
450
464
use super :: * ;
465
+ use arrow_array:: timezone:: Tz ;
451
466
452
467
#[ test]
453
468
fn string_to_timestamp_timezone ( ) {
@@ -614,6 +629,34 @@ mod tests {
614
629
naive_datetime. timestamp_nanos( ) ,
615
630
parse_timestamp( "2020-09-08 13:42:29" ) . unwrap( )
616
631
) ;
632
+
633
+ let tz: Tz = "+02:00" . parse ( ) . unwrap ( ) ;
634
+ let date = string_to_datetime ( & tz, "2020-09-08 13:42:29" ) . unwrap ( ) ;
635
+ let utc = date. naive_utc ( ) . to_string ( ) ;
636
+ assert_eq ! ( utc, "2020-09-08 11:42:29" ) ;
637
+ let local = date. naive_local ( ) . to_string ( ) ;
638
+ assert_eq ! ( local, "2020-09-08 13:42:29" ) ;
639
+
640
+ let date = string_to_datetime ( & tz, "2020-09-08 13:42:29Z" ) . unwrap ( ) ;
641
+ let utc = date. naive_utc ( ) . to_string ( ) ;
642
+ assert_eq ! ( utc, "2020-09-08 13:42:29" ) ;
643
+ let local = date. naive_local ( ) . to_string ( ) ;
644
+ assert_eq ! ( local, "2020-09-08 15:42:29" ) ;
645
+
646
+ let dt =
647
+ NaiveDateTime :: parse_from_str ( "2020-09-08T13:42:29Z" , "%Y-%m-%dT%H:%M:%SZ" )
648
+ . unwrap ( ) ;
649
+ let local: Tz = "+08:00" . parse ( ) . unwrap ( ) ;
650
+
651
+ // Parsed as offset from UTC
652
+ let date = string_to_datetime ( & local, "2020-09-08T13:42:29Z" ) . unwrap ( ) ;
653
+ assert_eq ! ( dt, date. naive_utc( ) ) ;
654
+ assert_ne ! ( dt, date. naive_local( ) ) ;
655
+
656
+ // Parsed as offset from local
657
+ let date = string_to_datetime ( & local, "2020-09-08 13:42:29" ) . unwrap ( ) ;
658
+ assert_eq ! ( dt, date. naive_local( ) ) ;
659
+ assert_ne ! ( dt, date. naive_utc( ) ) ;
617
660
}
618
661
619
662
#[ test]
0 commit comments