Skip to content

fix: invalid timezone should return err not panic #47

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion examples/russian.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ fn main() {
)
.unwrap()
.0,
NaiveDate::from_ymd_opt(2015, 9, 10).unwrap().and_hms_opt(10, 20, 0).unwrap()
NaiveDate::from_ymd_opt(2015, 9, 10)
.unwrap()
.and_hms_opt(10, 20, 0)
.unwrap()
);
}
94 changes: 62 additions & 32 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ pub fn parse_info(vec: Vec<Vec<&str>>) -> HashMap<String, usize> {
let mut m = HashMap::new();

if vec.len() == 1 {
for (i, val) in vec.get(0).unwrap().iter().enumerate() {
for (i, val) in vec.first().unwrap().iter().enumerate() {
m.insert(val.to_lowercase(), i);
}
} else {
Expand Down Expand Up @@ -305,13 +305,9 @@ impl ParserInfo {
}

fn ampm_index(&self, name: &str) -> Option<bool> {
if let Some(v) = self.ampm.get(&name.to_lowercase()) {
// Python technically uses numbers here, but given that the numbers are
// only 0 and 1, it's easier to use booleans
Some(*v == 1)
} else {
None
}
// Python technically uses numbers here, but given that the numbers are
// only 0 and 1, it's easier to use booleans
self.ampm.get(&name.to_lowercase()).map(|v| *v == 1)
}

fn pertain_index(&self, name: &str) -> bool {
Expand Down Expand Up @@ -394,6 +390,7 @@ enum YMDLabel {
Day,
}

#[allow(clippy::upper_case_acronyms)]
#[derive(Debug, Default)]
struct YMD {
_ymd: Vec<i32>, // TODO: This seems like a super weird way to store things
Expand All @@ -412,7 +409,7 @@ impl YMD {
if self.dstridx.is_some() {
false
} else if self.mstridx.is_none() {
(1 <= val) && (val <= 31)
(1..=31).contains(&val)
} else if self.ystridx.is_none() {
// UNWRAP: Earlier condition catches mstridx missing
let month = self._ymd[self.mstridx.unwrap()];
Expand Down Expand Up @@ -713,9 +710,12 @@ impl Parser {
ignoretz: bool,
tzinfos: &HashMap<String, i32>,
) -> ParseResult<(NaiveDateTime, Option<FixedOffset>, Option<Vec<String>>)> {
let default_date = default.unwrap_or(&Local::now().naive_local()).date();

let default_ts = NaiveDateTime::new(default_date, NaiveTime::from_hms_opt(0, 0, 0).unwrap());
// If default is none, 1970-01-01 00:00:00 as default value is better.
let default_date = default
.unwrap_or(&NaiveDate::default().and_hms_opt(0, 0, 0).unwrap())
.date();
let default_ts =
NaiveDateTime::new(default_date, NaiveTime::from_hms_opt(0, 0, 0).unwrap());

let (res, tokens) =
self.parse_with_tokens(timestr, dayfirst, yearfirst, fuzzy, fuzzy_with_tokens)?;
Expand Down Expand Up @@ -758,7 +758,7 @@ impl Parser {

let mut res = ParsingResult::default();

let mut l = tokenize(&timestr);
let mut l = tokenize(timestr);
let mut skipped_idxs: Vec<usize> = Vec::new();

let mut ymd = YMD::default();
Expand Down Expand Up @@ -840,28 +840,55 @@ impl Parser {
}
} else if res.hour.is_some() && (l[i] == "+" || l[i] == "-") {
let signal = if l[i] == "+" { 1 } else { -1 };
let len_li = l[i].len();
// check next index's length
let timezone_len = l[i + 1].len();

let mut hour_offset: Option<i32> = None;
let mut min_offset: Option<i32> = None;

// TODO: check that l[i + 1] is integer?
if len_li == 4 {
if timezone_len == 4 {
// -0300
hour_offset = Some(l[i + 1][..2].parse::<i32>()?);
min_offset = Some(l[i + 1][2..4].parse::<i32>()?);
} else if i + 2 < len_l && l[i + 2] == ":" {
// -03:00
hour_offset = Some(l[i + 1].parse::<i32>()?);
min_offset = Some(l[i + 3].parse::<i32>()?);
let hour_offset_len = l[i + 1].len();
// -003:00 need err
if hour_offset_len <= 2 {
let range_len = min(hour_offset_len, 2);
hour_offset = Some(l[i + 1][..range_len].parse::<i32>()?);
} else {
return Err(ParseError::TimezoneUnsupported);
}

// if timezone is wrong format like "-03:" just return a Err, should not panic.
if i + 3 > l.len() - 1 {
return Err(ParseError::TimezoneUnsupported);
}

let min_offset_len = l[i + 3].len();
// -09:003 need err
if min_offset_len <= 2 {
let range_len = min(min_offset_len, 2);
min_offset = Some(l[i + 3][..range_len].parse::<i32>()?);
} else {
return Err(ParseError::TimezoneUnsupported);
}

i += 2;
} else if len_li <= 2 {
} else if timezone_len <= 2 {
// -[0]3
let range_len = min(l[i + 1].len(), 2);
hour_offset = Some(l[i + 1][..range_len].parse::<i32>()?);
min_offset = Some(0);
}

// like +09123
if hour_offset.is_none() && min_offset.is_none() {
return Err(ParseError::TimezoneUnsupported);
}

res.tzoffset =
Some(signal * (hour_offset.unwrap() * 3600 + min_offset.unwrap() * 60));

Expand Down Expand Up @@ -972,16 +999,16 @@ impl Parser {
days_in_month(y, m as i32)?,
),
)
.ok_or_else(|| ParseError::ImpossibleTimestamp("Invalid date range given"))?;
.ok_or(ParseError::ImpossibleTimestamp("Invalid date range given"))?;

let d = d + d_offset;

let hour = res.hour.unwrap_or(default.hour() as i32) as u32;
let minute = res.minute.unwrap_or(default.minute() as i32) as u32;
let second = res.second.unwrap_or(default.second() as i32) as u32;
let nanosecond = res
.nanosecond
.unwrap_or(default.timestamp_subsec_nanos() as i64) as u32;
let nanosecond =
res.nanosecond
.unwrap_or(default.and_utc().timestamp_subsec_nanos() as i64) as u32;
let t =
NaiveTime::from_hms_nano_opt(hour, minute, second, nanosecond).ok_or_else(|| {
if hour >= 24 {
Expand All @@ -1008,11 +1035,11 @@ impl Parser {
) -> ParseResult<Option<FixedOffset>> {
if let Some(offset) = res.tzoffset {
Ok(FixedOffset::east_opt(offset))
} else if res.tzoffset == None
} else if res.tzoffset.is_none()
&& (res.tzname == Some(" ".to_owned())
|| res.tzname == Some(".".to_owned())
|| res.tzname == Some("-".to_owned())
|| res.tzname == None)
|| res.tzname.is_none())
{
Ok(None)
} else if res.tzname.is_some() && tzinfos.contains_key(res.tzname.as_ref().unwrap()) {
Expand All @@ -1039,7 +1066,7 @@ impl Parser {
) -> ParseResult<usize> {
let mut idx = idx;
let value_repr = &tokens[idx];
let mut value = Decimal::from_str(&value_repr).unwrap();
let mut value = Decimal::from_str(value_repr).unwrap();

let len_li = value_repr.len();
let len_l = tokens.len();
Expand All @@ -1063,7 +1090,7 @@ impl Parser {
// YYMMDD or HHMMSS[.ss]
let s = &tokens[idx];

if ymd.len() == 0 && tokens[idx].find('.') == None {
if ymd.len() == 0 && tokens[idx].find('.').is_none() {
ymd.append(s[0..2].parse::<i32>()?, &s[0..2], None)?;
ymd.append(s[2..4].parse::<i32>()?, &s[2..4], None)?;
ymd.append(s[4..6].parse::<i32>()?, &s[4..6], None)?;
Expand All @@ -1076,7 +1103,7 @@ impl Parser {
res.second = Some(t.0);
res.nanosecond = Some(t.1);
}
} else if vec![8, 12, 14].contains(&len_li) {
} else if [8, 12, 14].contains(&len_li) {
// YYMMDD
let s = &tokens[idx];
ymd.append(s[..4].parse::<i32>()?, &s[..4], Some(YMDLabel::Year))?;
Expand Down Expand Up @@ -1122,7 +1149,7 @@ impl Parser {
{
// TODO: There's got to be a better way of handling the condition above
let sep = &tokens[idx + 1];
ymd.append(value_repr.parse::<i32>()?, &value_repr, None)?;
ymd.append(value_repr.parse::<i32>()?, value_repr, None)?;

if idx + 2 < len_l && !info.jump_index(&tokens[idx + 2]) {
if let Ok(val) = tokens[idx + 2].parse::<i32>() {
Expand Down Expand Up @@ -1155,8 +1182,11 @@ impl Parser {
idx += 1;
} else {
//let value = value.floor().to_i32().ok_or(Err(ParseError::InvalidNumeric()))
let value = value.floor().to_i32().ok_or_else(|| ParseError::InvalidNumeric(value_repr.to_owned()))?;
ymd.append(value, &value_repr, None)?;
let value = value
.floor()
.to_i32()
.ok_or_else(|| ParseError::InvalidNumeric(value_repr.to_owned()))?;
ymd.append(value, value_repr, None)?;
}

idx += 1;
Expand All @@ -1168,7 +1198,7 @@ impl Parser {
res.hour = Some(self.adjust_ampm(hour, info.ampm_index(&tokens[idx + 1]).unwrap()));
idx += 1;
} else if ymd.could_be_day(value.to_i64().unwrap() as i32) {
ymd.append(value.to_i64().unwrap() as i32, &value_repr, None)?;
ymd.append(value.to_i64().unwrap() as i32, value_repr, None)?;
} else if !fuzzy {
return Err(ParseError::UnrecognizedFormat);
}
Expand Down Expand Up @@ -1291,7 +1321,7 @@ impl Parser {
}

fn to_decimal(&self, value: &str) -> ParseResult<Decimal> {
Decimal::from_str(value).or_else(|_| Err(ParseError::InvalidNumeric(value.to_owned())))
Decimal::from_str(value).map_err(|_| ParseError::InvalidNumeric(value.to_owned()))
}

fn parse_min_sec(&self, value: Decimal) -> (i32, Option<i32>) {
Expand Down
85 changes: 80 additions & 5 deletions src/tests/fuzzing.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use base64::Engine;
use base64::engine::general_purpose::STANDARD;
use base64::Engine;
use chrono::NaiveDate;
use std::collections::HashMap;
use std::str;
Expand All @@ -21,7 +21,10 @@ fn test_fuzz() {
Err(ParseError::UnrecognizedFormat)
);

let default = NaiveDate::from_ymd_opt(2016, 6, 29).unwrap().and_hms_opt(0, 0, 0).unwrap();
let default = NaiveDate::from_ymd_opt(2016, 6, 29)
.unwrap()
.and_hms_opt(0, 0, 0)
.unwrap();
let p = Parser::default();
let res = p.parse(
"\x0D\x31",
Expand Down Expand Up @@ -66,12 +69,20 @@ fn empty_string() {

#[test]
fn github_33() {
assert_eq!(parse("66:'"), Err(ParseError::InvalidNumeric("'".to_owned())))
assert_eq!(
parse("66:'"),
Err(ParseError::InvalidNumeric("'".to_owned()))
)
}

#[test]
fn github_32() {
assert_eq!(parse("99999999999999999999999"), Err(ParseError::InvalidNumeric("99999999999999999999999".to_owned())))
assert_eq!(
parse("99999999999999999999999"),
Err(ParseError::InvalidNumeric(
"99999999999999999999999".to_owned()
))
)
}

#[test]
Expand All @@ -96,4 +107,68 @@ fn github_36() {
let parse_str = str::from_utf8(&parse_vec).unwrap();
let parse_result = parse(parse_str);
assert!(parse_result.is_err());
}
}

#[test]
fn github_46() {
assert_eq!(
parse("2000-01-01 12:00:00+00:"),
Err(ParseError::TimezoneUnsupported)
);
assert_eq!(
parse("2000-01-01 12:00:00+09123"),
Err(ParseError::TimezoneUnsupported)
);
assert_eq!(
parse("2000-01-01 13:00:00+00:003"),
Err(ParseError::TimezoneUnsupported)
);
assert_eq!(
parse("2000-01-01 13:00:00+009:03"),
Err(ParseError::TimezoneUnsupported)
);
assert_eq!(
parse("2000-01-01 13:00:00+xx:03"),
Err(ParseError::InvalidNumeric(
"invalid digit found in string".to_owned()
))
);
assert_eq!(
parse("2000-01-01 13:00:00+00:yz"),
Err(ParseError::InvalidNumeric(
"invalid digit found in string".to_owned()
))
);
let mut parse_result = parse("2000-01-01 13:00:00+00:03");
match parse_result {
Ok((dt, offset)) => {
assert_eq!(format!("{:?}", dt), "2000-01-01T13:00:00".to_string());
assert_eq!(format!("{:?}", offset), "Some(+00:03)".to_string());
}
Err(_) => {
panic!();
}
};

parse_result = parse("2000-01-01 12:00:00+0811");
match parse_result {
Ok((dt, offset)) => {
assert_eq!(format!("{:?}", dt), "2000-01-01T12:00:00".to_string());
assert_eq!(format!("{:?}", offset), "Some(+08:11)".to_string());
}
Err(_) => {
panic!();
}
}

parse_result = parse("2000");
match parse_result {
Ok((dt, offset)) => {
assert_eq!(format!("{:?}", dt), "2000-01-01T00:00:00".to_string());
assert!(offset.is_none());
}
Err(_) => {
panic!();
}
}
}
12 changes: 9 additions & 3 deletions src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@ mod fuzzing;
mod pycompat_parser;
mod pycompat_tokenizer;

use chrono::NaiveDate;
use crate::parse;
use chrono::NaiveDate;

#[test]
fn nanosecond_precision() {
assert_eq!(
parse("2008.12.29T08:09:10.123456789").unwrap(),
(NaiveDate::from_ymd_opt(2008, 12, 29).unwrap().and_hms_nano_opt(8, 9, 10, 123_456_789).unwrap(), None)
(
NaiveDate::from_ymd_opt(2008, 12, 29)
.unwrap()
.and_hms_nano_opt(8, 9, 10, 123_456_789)
.unwrap(),
None
)
)
}
}
Loading