Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parse-zoneinfo: replace rule parser with simple state machine #172

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
171 changes: 76 additions & 95 deletions parse-zoneinfo/src/line.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ use regex::{Captures, Regex};

pub struct LineParser {
rule_line: Regex,
hm_field: Regex,
hms_field: Regex,
zone_line: Regex,
continuation_line: Regex,
link_line: Regex,
Expand Down Expand Up @@ -150,24 +148,6 @@ impl Default for LineParser {
)
.unwrap(),

hm_field: Regex::new(
r##"(?x) ^
( ?P<sign> -? )
( ?P<hour> \d{1,2} ) : ( ?P<minute> \d{2} )
( ?P<flag> [wsugz] )?
$ "##,
)
.unwrap(),

hms_field: Regex::new(
r##"(?x) ^
( ?P<sign> -? )
( ?P<hour> \d{1,2} ) : ( ?P<minute> \d{2} ) : ( ?P<second> \d{2} )
( ?P<flag> [wsugz] )?
$ "##,
)
.unwrap(),

zone_line: Regex::new(
r##"(?x) ^
Zone \s+
Expand Down Expand Up @@ -619,6 +599,21 @@ impl TimeSpec {
TimeSpec::Zero => 0,
}
}

pub fn with_type(self, timetype: TimeType) -> TimeSpecAndType {
TimeSpecAndType(self, timetype)
}
}

impl FromStr for TimeSpec {
type Err = Error;

fn from_str(s: &str) -> Result<TimeSpec, Error> {
match TimeSpecAndType::from_str(s)? {
TimeSpecAndType(spec, TimeType::Wall) => Ok(spec),
TimeSpecAndType(_, _) => Err(Error::NonWallClockInTimeSpec(s.to_string())),
}
}
}

#[derive(PartialEq, Debug, Copy, Clone)]
Expand All @@ -628,12 +623,64 @@ pub enum TimeType {
UTC,
}

impl TimeType {
fn from_char(c: char) -> Option<Self> {
Some(match c {
'w' => Self::Wall,
's' => Self::Standard,
'u' | 'g' | 'z' => Self::UTC,
_ => return None,
})
}
}

#[derive(PartialEq, Debug, Copy, Clone)]
pub struct TimeSpecAndType(pub TimeSpec, pub TimeType);

impl TimeSpec {
pub fn with_type(self, timetype: TimeType) -> TimeSpecAndType {
TimeSpecAndType(self, timetype)
impl FromStr for TimeSpecAndType {
type Err = Error;

fn from_str(input: &str) -> Result<Self, Error> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please split this method over the TimeSpec and TimeSpecAndType types? I am an not sure yet if anything but wall times is allowed zone lines, and if the existing code took a shortcut there that we want to fix.

if input == "-" {
return Ok(TimeSpecAndType(TimeSpec::Zero, TimeType::Wall));
} else if input.chars().all(|c| c == '-' || c.is_ascii_digit()) {
return Ok(TimeSpecAndType(
TimeSpec::Hours(input.parse().unwrap()),
TimeType::Wall,
));
}

let (input, ty) = match input.chars().last().and_then(TimeType::from_char) {
Some(ty) => (&input[..input.len() - 1], Some(ty)),
None => (input, None),
};

let neg = if input.starts_with('-') { -1 } else { 1 };
let mut state = TimeSpec::Zero;
for part in input.split(':') {
state = match (state, part) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Elegant you reused the TimeSpec enum as state.

(TimeSpec::Zero, hour) => TimeSpec::Hours(
i8::from_str(hour)
.map_err(|_| Error::InvalidTimeSpecAndType(input.to_string()))?,
),
(TimeSpec::Hours(hours), minutes) => TimeSpec::HoursMinutes(
hours,
i8::from_str(minutes)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex checked minutes and seconds had two digits to prevent parsing 1:1, 1:1:1 or 1:00000 as a valid time. Can you add a length check? (Maybe also worth a test)

.map_err(|_| Error::InvalidTimeSpecAndType(input.to_string()))?
* neg,
),
(TimeSpec::HoursMinutes(hours, minutes), seconds) => TimeSpec::HoursMinutesSeconds(
hours,
minutes,
i8::from_str(seconds)
.map_err(|_| Error::InvalidTimeSpecAndType(input.to_string()))?
* neg,
),
_ => return Err(Error::InvalidTimeSpecAndType(input.to_string())),
};
}

Ok(TimeSpecAndType(state, ty.unwrap_or(TimeType::Wall)))
}
}

Expand Down Expand Up @@ -877,77 +924,12 @@ pub enum Line<'a> {
Link(Link<'a>),
}

fn parse_time_type(c: &str) -> Option<TimeType> {
Some(match c {
"w" => TimeType::Wall,
"s" => TimeType::Standard,
"u" | "g" | "z" => TimeType::UTC,
_ => return None,
})
}

impl LineParser {
#[deprecated]
pub fn new() -> Self {
Self::default()
}

fn parse_timespec_and_type(&self, input: &str) -> Result<TimeSpecAndType, Error> {
if input == "-" {
Ok(TimeSpecAndType(TimeSpec::Zero, TimeType::Wall))
} else if input.chars().all(|c| c == '-' || c.is_ascii_digit()) {
Ok(TimeSpecAndType(
TimeSpec::Hours(input.parse().unwrap()),
TimeType::Wall,
))
} else if let Some(caps) = self.hm_field.captures(input) {
let sign: i8 = if caps.name("sign").unwrap().as_str() == "-" {
-1
} else {
1
};
let hour: i8 = caps.name("hour").unwrap().as_str().parse().unwrap();
let minute: i8 = caps.name("minute").unwrap().as_str().parse().unwrap();
let flag = caps
.name("flag")
.and_then(|c| parse_time_type(&c.as_str()[0..1]))
.unwrap_or(TimeType::Wall);

Ok(TimeSpecAndType(
TimeSpec::HoursMinutes(hour * sign, minute * sign),
flag,
))
} else if let Some(caps) = self.hms_field.captures(input) {
let sign: i8 = if caps.name("sign").unwrap().as_str() == "-" {
-1
} else {
1
};
let hour: i8 = caps.name("hour").unwrap().as_str().parse().unwrap();
let minute: i8 = caps.name("minute").unwrap().as_str().parse().unwrap();
let second: i8 = caps.name("second").unwrap().as_str().parse().unwrap();
let flag = caps
.name("flag")
.and_then(|c| parse_time_type(&c.as_str()[0..1]))
.unwrap_or(TimeType::Wall);

Ok(TimeSpecAndType(
TimeSpec::HoursMinutesSeconds(hour * sign, minute * sign, second * sign),
flag,
))
} else {
Err(Error::InvalidTimeSpecAndType(input.to_string()))
}
}

fn parse_timespec(&self, input: &str) -> Result<TimeSpec, Error> {
match self.parse_timespec_and_type(input) {
Ok(TimeSpecAndType(spec, TimeType::Wall)) => Ok(spec),
Ok(TimeSpecAndType(_, _)) => Err(Error::NonWallClockInTimeSpec(input.to_string())),
Err(e) => Err(e),
}
}

fn parse_rule<'a>(&self, input: &'a str) -> Result<Rule<'a>, Error> {
if let Some(caps) = self.rule_line.captures(input) {
let name = caps.name("name").unwrap().as_str();
Expand All @@ -972,8 +954,8 @@ impl LineParser {

let month = caps.name("in").unwrap().as_str().parse()?;
let day = DaySpec::from_str(caps.name("on").unwrap().as_str())?;
let time = self.parse_timespec_and_type(caps.name("at").unwrap().as_str())?;
let time_to_add = self.parse_timespec(caps.name("save").unwrap().as_str())?;
let time = TimeSpecAndType::from_str(caps.name("at").unwrap().as_str())?;
let time_to_add = TimeSpec::from_str(caps.name("save").unwrap().as_str())?;
let letters = match caps.name("letters").unwrap().as_str() {
"-" => None,
l => Some(l),
Expand Down Expand Up @@ -1002,16 +984,15 @@ impl LineParser {
.all(|c| c == '-' || c == '_' || c.is_alphabetic())
{
Ok(Saving::Multiple(input))
} else if self.hm_field.is_match(input) {
let time = self.parse_timespec(input)?;
} else if let Ok(time) = TimeSpec::from_str(input) {
Ok(Saving::OneOff(time))
} else {
Err(Error::CouldNotParseSaving(input.to_string()))
}
}

fn zoneinfo_from_captures<'a>(&self, caps: Captures<'a>) -> Result<ZoneInfo<'a>, Error> {
let utc_offset = self.parse_timespec(caps.name("gmtoff").unwrap().as_str())?;
let utc_offset = TimeSpec::from_str(caps.name("gmtoff").unwrap().as_str())?;
let saving = self.saving_from_str(caps.name("rulessave").unwrap().as_str())?;
let format = caps.name("format").unwrap().as_str();

Expand All @@ -1028,7 +1009,7 @@ impl LineParser {
y.as_str().parse()?,
m.as_str().parse()?,
DaySpec::from_str(d.as_str())?,
self.parse_timespec_and_type(t.as_str())?,
TimeSpecAndType::from_str(t.as_str())?,
)),
(Some(y), Some(m), Some(d), _) => Some(ChangeTime::UntilDay(
y.as_str().parse()?,
Expand Down Expand Up @@ -1373,7 +1354,7 @@ mod tests {
test!(comment: "# this is a comment" => Ok(Line::Space));
test!(another_comment: " # so is this" => Ok(Line::Space));
test!(multiple_hash: " # so is this ## " => Ok(Line::Space));
test!(non_comment: " this is not a # comment" => Err(Error::InvalidTimeSpecAndType("this".to_string())));
test!(non_comment: " this is not a # comment" => Err(Error::InvalidTimeSpecAndType("thi".to_string())));

test!(comment_after: "Link Europe/Istanbul Asia/Istanbul #with a comment after" => Ok(Line::Link(Link {
existing: "Europe/Istanbul",
Expand Down