uutils · yuankunzhang · Apr 25, 2025 · Apr 25, 2025 · Apr 25, 2025
diff --git a/src/lib.rs b/src/lib.rs
@@ -13,6 +13,8 @@ use regex::Regex;
 use std::error::Error;
 use std::fmt::{self, Display};
 
+pub(crate) mod parse;
+
 // Expose parse_datetime
 mod parse_relative_time;
 mod parse_timestamp;
@@ -21,12 +23,12 @@ mod parse_time_only_str;
 mod parse_weekday;
 
 use chrono::{
-    DateTime, Datelike, Duration, FixedOffset, Local, LocalResult, MappedLocalTime, NaiveDate,
-    NaiveDateTime, TimeZone, Timelike,
+    DateTime, FixedOffset, Local, LocalResult, MappedLocalTime, NaiveDate, NaiveDateTime, TimeZone,
 };
 
 use parse_relative_time::parse_relative_time_at_date;
 use parse_timestamp::parse_timestamp;
+use parse_weekday::parse_weekday_at_date;
 
 #[derive(Debug, PartialEq)]
 pub enum ParseDateTimeError {
@@ -300,23 +302,8 @@ where
     }
 
     // parse weekday
-    if let Some(weekday) = parse_weekday::parse_weekday(s.as_ref()) {
-        let mut beginning_of_day = date
-            .with_hour(0)
-            .unwrap()
-            .with_minute(0)
-            .unwrap()
-            .with_second(0)
-            .unwrap()
-            .with_nanosecond(0)
-            .unwrap();
-
-        while beginning_of_day.weekday() != weekday {
-            beginning_of_day += Duration::days(1);
-        }
-
-        let dt = DateTime::<FixedOffset>::from(beginning_of_day);
-
+    if let Ok(dt) = parse_weekday_at_date(date, s.as_ref()) {
+        let dt = DateTime::<FixedOffset>::from(dt);
         return Some((dt, s.as_ref().len()));
     }
 

diff --git a/src/parse/mod.rs b/src/parse/mod.rs
@@ -0,0 +1,50 @@
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+use relative_time::relative_times;
+use timestamp::timestamp;
+use weekday::weekday;
+
+mod primitive;
+mod relative_time;
+mod timestamp;
+mod weekday;
+
+// TODO: more specific errors?
+#[derive(Debug)]
+pub(crate) struct ParseError;
+
+pub(crate) use relative_time::RelativeTime;
+pub(crate) use relative_time::TimeUnit;
+pub(crate) use weekday::WeekdayItem;
+
+/// Parses a string of relative times into a vector of `RelativeTime` structs.
+pub(crate) fn parse_relative_times(input: &str) -> Result<Vec<RelativeTime>, ParseError> {
+    relative_times(input)
+        .map(|(_, times)| times)
+        .map_err(|_| ParseError)
+}
+
+/// Parses a string of timestamp into a `f64` value (the seconds since epoch).
+pub(crate) fn parse_timestamp(input: &str) -> Result<f64, ParseError> {
+    timestamp(input)
+        .map(|(_, timestamp)| timestamp)
+        .map_err(|_| ParseError)
+}
+
+/// Parses a string of weekday into a `WeekdayItem` struct.
+pub(crate) fn parse_weekday(input: &str) -> Result<WeekdayItem, ParseError> {
+    weekday(input)
+        .map(|(_, weekday_item)| weekday_item)
+        .map_err(|_| ParseError)
+}
+
+/// Finds a value in a list of pairs by its key.
+fn find_in_pairs<T: Clone>(pairs: &[(&str, T)], key: &str) -> Option<T> {
+    pairs.iter().find_map(|(k, v)| {
+        if k.eq_ignore_ascii_case(key) {
+            Some(v.clone())
+        } else {
+            None
+        }
+    })
+}
diff --git a/src/parse/primitive.rs b/src/parse/primitive.rs
@@ -0,0 +1,144 @@
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+
+//! Module to parser relative time strings.
+//!
+//! Grammar definition:
+//!
+//! ```ebnf
+//! ordinal = "last" | "this" | "next"
+//!         | "first" | "third" | "fourth" | "fifth"
+//!         | "sixth" | "seventh" | "eighth" | "ninth"
+//!         | "tenth" | "eleventh" | "twelfth" ;
+//!
+//! integer = [ sign ] , digit , { digit } ;
+//!
+//! sign = { ("+" | "-") , { whitespace } } ;
+//!
+//! digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+//! ```
+
+use nom::{
+    bytes::complete::take_while1,
+    character::complete::{digit1, multispace0, one_of},
+    combinator::{map_res, opt},
+    multi::fold_many1,
+    sequence::terminated,
+    IResult, Parser,
+};
+
+use super::find_in_pairs;
+
+const ORDINALS: &[(&str, i64)] = &[
+    ("last", -1),
+    ("this", 0),
+    ("next", 1),
+    ("first", 1),
+    // Unfortunately we can't use "second" as ordinal, the keyword is overloaded
+    ("third", 3),
+    ("fourth", 4),
+    ("fifth", 5),
+    ("sixth", 6),
+    ("seventh", 7),
+    ("eighth", 8),
+    ("ninth", 9),
+    ("tenth", 10),
+    ("eleventh", 11),
+    ("twelfth", 12),
+];
+
+pub(super) fn ordinal(input: &str) -> IResult<&str, i64> {
+    map_res(take_while1(|c: char| c.is_alphabetic()), |s: &str| {
+        find_in_pairs(ORDINALS, s).ok_or("unknown ordinal")
+    })
+    .parse(input)
+}
+
+pub(super) fn integer(input: &str) -> IResult<&str, i64> {
+    let (rest, sign) = opt(sign).parse(input)?;
+    let (rest, num) = map_res(digit1, str::parse::<i64>).parse(rest)?;
+    if sign == Some('-') {
+        Ok((rest, -num))
+    } else {
+        Ok((rest, num))
+    }
+}
+
+/// Parses a sign (either + or -) from the input string. The input string must
+/// start with a sign character followed by arbitrary number of interleaving
+/// sign characters and whitespace characters. All but the last sign character
+/// is ignored, and the last sign character is returned as the result. This
+/// quirky behavior is to stay consistent with GNU date.
+pub(super) fn sign(input: &str) -> IResult<&str, char> {
+    fold_many1(
+        terminated(one_of("+-"), multispace0),
+        || '+',
+        |acc, c| if "+-".contains(c) { c } else { acc },
+    )
+    .parse(input)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_ordinal() {
+        assert!(ordinal("").is_err());
+        assert!(ordinal("invalid").is_err());
+        assert!(ordinal(" last").is_err());
+
+        assert_eq!(ordinal("last"), Ok(("", -1)));
+        assert_eq!(ordinal("this"), Ok(("", 0)));
+        assert_eq!(ordinal("next"), Ok(("", 1)));
+        assert_eq!(ordinal("first"), Ok(("", 1)));
+        assert_eq!(ordinal("third"), Ok(("", 3)));
+        assert_eq!(ordinal("fourth"), Ok(("", 4)));
+        assert_eq!(ordinal("fifth"), Ok(("", 5)));
+        assert_eq!(ordinal("sixth"), Ok(("", 6)));
+        assert_eq!(ordinal("seventh"), Ok(("", 7)));
+        assert_eq!(ordinal("eighth"), Ok(("", 8)));
+        assert_eq!(ordinal("ninth"), Ok(("", 9)));
+        assert_eq!(ordinal("tenth"), Ok(("", 10)));
+        assert_eq!(ordinal("eleventh"), Ok(("", 11)));
+        assert_eq!(ordinal("twelfth"), Ok(("", 12)));
+
+        // Boundary
+        assert_eq!(ordinal("last123"), Ok(("123", -1)));
+        assert_eq!(ordinal("last abc"), Ok((" abc", -1)));
+        assert!(ordinal("lastabc").is_err());
+
+        // Case insensitive
+        assert_eq!(ordinal("THIS"), Ok(("", 0)));
+        assert_eq!(ordinal("This"), Ok(("", 0)));
+    }
+
+    #[test]
+    fn test_integer() {
+        assert!(integer("").is_err());
+        assert!(integer("invalid").is_err());
+        assert!(integer(" 123").is_err());
+
+        assert_eq!(integer("123"), Ok(("", 123)));
+        assert_eq!(integer("+123"), Ok(("", 123)));
+        assert_eq!(integer("- 123"), Ok(("", -123)));
+
+        // Boundary
+        assert_eq!(integer("- 123abc"), Ok(("abc", -123)));
+        assert_eq!(integer("- +- 123abc"), Ok(("abc", -123)));
+    }
+
+    #[test]
+    fn test_sign() {
+        assert!(sign("").is_err());
+        assert!(sign("invalid").is_err());
+        assert!(sign(" +").is_err());
+
+        assert_eq!(sign("+"), Ok(("", '+')));
+        assert_eq!(sign("-"), Ok(("", '-')));
+        assert_eq!(sign("- + - "), Ok(("", '-')));
+
+        // Boundary
+        assert_eq!(sign("- + - abc"), Ok(("abc", '-')));
+    }
+}