diff --git a/internal/func_time_parser_test.go b/internal/func_time_parser_test.go new file mode 100644 index 0000000..10b6095 --- /dev/null +++ b/internal/func_time_parser_test.go @@ -0,0 +1,93 @@ +package internal + +import "testing" + +func TestTimeParser(t *testing.T) { + for _, test := range []struct { + name string + text []rune + minValue int64 + maxValue int64 + expectedProgress int + expectedResult int + expectedErr string + }{ + { + name: "single digit but multiple allowed; non-digit character terminates", + text: []rune{'2', '/'}, + minValue: 1, + maxValue: 12, + expectedResult: 2, + expectedProgress: 1, + }, + { + name: "multiple digits; non-digit character terminates", + text: []rune{'1', '2', '/'}, + minValue: 1, + maxValue: 12, + expectedResult: 12, + expectedProgress: 2, + }, + { + name: "leading zero digit but multiple allowed; non-digit character terminates", + text: []rune{'0', '2', '/'}, + minValue: 1, + maxValue: 12, + expectedResult: 2, + expectedProgress: 2, + }, + + { + name: "leading zero digit but multiple allowed; non-digit character terminates", + text: []rune{'0', '0', '2', '/'}, + minValue: 1, + maxValue: 9999, + expectedResult: 2, + expectedProgress: 3, + }, + { + name: "multiple digits but exceeds limit; non-digit character terminates", + text: []rune{'2', '2', '/'}, + minValue: 1, + maxValue: 12, + expectedErr: "part [22] is greater than maximum value [12]", + }, + { + name: "multiple digits but lower than start bound; non-digit character terminates", + text: []rune{'0', '0', '/'}, + minValue: 1, + maxValue: 12, + expectedErr: "part [0] is less than minimum value [1]", + }, + { + name: "multiple digits but lower than start bound; non-digit character terminates", + text: []rune{'4', '-'}, + minValue: 1, + maxValue: 12, + expectedResult: 4, + expectedProgress: 1, + }, + } { + + t.Run(test.name, func(t *testing.T) { + progress, result, err := parseDigitRespectingOptionalPlaces(test.text, test.minValue, test.maxValue) + if err != nil { + if test.expectedErr != err.Error() { + t.Fatalf("unexpected error message: expected [%s] but got [%s]", test.expectedErr, err.Error()) + } else { + // expected error occurred, consider test successful + return + } + } + + if progress != test.expectedProgress { + t.Fatalf("unexpected progress: expected [%d] but got [%d]", test.expectedProgress, progress) + } + + if result != int64(test.expectedResult) { + t.Fatalf("unexpected result: expected [%d] but got [%d]", test.expectedResult, result) + } + + }) + } +} diff --git a/internal/function_time_parser.go b/internal/function_time_parser.go index 973a72e..924466a 100644 --- a/internal/function_time_parser.go +++ b/internal/function_time_parser.go @@ -372,8 +372,8 @@ var formatPatternMap = map[rune]*FormatTimeInfo{ AvailableTypes: []TimeFormatType{ FormatTypeDate, FormatTypeDatetime, FormatTypeTimestamp, }, - Parse: centuryParser, - Format: centuryFormatter, + Parse: yearWithoutCenturyParser, + Format: yearWithoutCenturyFormatter, }, 'Z': &FormatTimeInfo{ AvailableTypes: []TimeFormatType{ @@ -518,6 +518,34 @@ func centuryFormatter(t *time.Time) ([]rune, error) { return []rune(fmt.Sprint(t.Year())[:2]), nil } +func yearWithoutCenturyParser(text []rune, t *time.Time) (int, error) { + progress, year, err := parseDigitRespectingOptionalPlaces(text, 0, 99) + if err != nil { + return 0, fmt.Errorf("could not parse year without century: %s", err) + } + if year >= 69 { + year += 1900 + } else { + year += 2000 + } + *t = time.Date( + int(year), + t.Month(), + int(t.Day()), + int(t.Hour()), + int(t.Minute()), + int(t.Second()), + int(t.Nanosecond()), + t.Location(), + ) + return progress, nil +} + +func yearWithoutCenturyFormatter(t *time.Time) ([]rune, error) { + year := t.Format("2006") + return []rune(year[len(year)-2:]), nil +} + func ansicParser(text []rune, t *time.Time) (int, error) { v, err := time.Parse("Mon Jan 02 15:04:05 2006", string(text)) if err != nil { @@ -584,28 +612,21 @@ func monthDayYearFormatter(t *time.Time) ([]rune, error) { } func dayParser(text []rune, t *time.Time) (int, error) { - const dayLen = 2 - if len(text) < dayLen { - return 0, fmt.Errorf("unexpected day number") - } - d, err := strconv.ParseInt(string(text[:dayLen]), 10, 64) + progress, days, err := parseDigitRespectingOptionalPlaces(text, 1, 31) if err != nil { - return 0, fmt.Errorf("unexpected day number") - } - if d < 0 { - return 0, fmt.Errorf("invalid day number %d", d) + return 0, fmt.Errorf("could not parse day number: %s", err) } *t = time.Date( int(t.Year()), t.Month(), - int(d), + int(days), int(t.Hour()), int(t.Minute()), int(t.Second()), int(t.Nanosecond()), t.Location(), ) - return dayLen, nil + return progress, nil } func dayFormatter(t *time.Time) ([]rune, error) { @@ -613,32 +634,32 @@ func dayFormatter(t *time.Time) ([]rune, error) { } func yearMonthDayParser(text []rune, t *time.Time) (int, error) { - fmtLen := len("2021-01-20") - if len(text) < fmtLen { - return 0, fmt.Errorf("unexpected year-month-day format") - } - splitted := strings.Split(string(text[:fmtLen]), "-") - if len(splitted) != 3 { - return 0, fmt.Errorf("unexpected year-month-day format") + const separator = '-' + progress, y, err := parseDigitRespectingOptionalPlaces(text, 1, 9999) + if err != nil { + return 0, fmt.Errorf("could not parse year number: %s", err) } - year := splitted[0] - month := splitted[1] - day := splitted[2] - if len(year) != 4 || len(month) != 2 || len(day) != 2 { - return 0, fmt.Errorf("unexpected year-month-day format") + if text[progress] != separator { + return 0, fmt.Errorf("could not parse year-month-day: [%c] not found after [%s]", separator, string(text[:progress])) } - y, err := strconv.ParseInt(year, 10, 64) + progress += 1 + + mProgress, m, err := parseDigitRespectingOptionalPlaces(text[progress:], 1, 12) if err != nil { - return 0, fmt.Errorf("unexpected year-month-day format: %w", err) + return 0, fmt.Errorf("could not parse month number: %s", err) } - m, err := strconv.ParseInt(month, 10, 64) - if err != nil { - return 0, fmt.Errorf("unexpected year-month-day format: %w", err) + progress += mProgress + if text[progress] != separator { + return 0, fmt.Errorf("could not parse year-month-day: [%c] not found after [%s]", separator, string(text[:progress])) } - d, err := strconv.ParseInt(day, 10, 64) + + progress += 1 + dProgress, d, err := parseDigitRespectingOptionalPlaces(text[progress:], 1, 31) if err != nil { - return 0, fmt.Errorf("unexpected year-month-day format: %w", err) + return 0, fmt.Errorf("could not parse day number: %s", err) } + progress += dProgress + *t = time.Date( int(y), time.Month(m), @@ -649,7 +670,7 @@ func yearMonthDayParser(text []rune, t *time.Time) (int, error) { int(t.Nanosecond()), t.Location(), ) - return fmtLen, nil + return progress, nil } func yearMonthDayFormatter(t *time.Time) ([]rune, error) { @@ -675,16 +696,9 @@ func centuryISOFormatter(t *time.Time) ([]rune, error) { } func hourParser(text []rune, t *time.Time) (int, error) { - const hourLen = 2 - if len(text) < hourLen { - return 0, fmt.Errorf("unexpected hour number") - } - h, err := strconv.ParseInt(string(text[:hourLen]), 10, 64) + progress, h, err := parseDigitRespectingOptionalPlaces(text, 0, 23) if err != nil { - return 0, fmt.Errorf("unexpected hour number") - } - if h < 0 || h > 24 { - return 0, fmt.Errorf("invalid hour number %d", h) + return 0, fmt.Errorf("could not parse hour number: %s", err) } *t = time.Date( int(t.Year()), @@ -696,7 +710,7 @@ func hourParser(text []rune, t *time.Time) (int, error) { int(t.Nanosecond()), t.Location(), ) - return hourLen, nil + return progress, nil } func hourFormatter(t *time.Time) ([]rune, error) { @@ -704,16 +718,9 @@ func hourFormatter(t *time.Time) ([]rune, error) { } func hour12Parser(text []rune, t *time.Time) (int, error) { - const hourLen = 2 - if len(text) < hourLen { - return 0, fmt.Errorf("unexpected hour number") - } - h, err := strconv.ParseInt(string(text[:hourLen]), 10, 64) + progress, h, err := parseDigitRespectingOptionalPlaces(text, 0, 12) if err != nil { - return 0, fmt.Errorf("unexpected hour number") - } - if h < 0 || h > 12 { - return 0, fmt.Errorf("invalid hour number %d", h) + return 0, fmt.Errorf("could not parse hour number: %s", err) } *t = time.Date( int(t.Year()), @@ -725,7 +732,7 @@ func hour12Parser(text []rune, t *time.Time) (int, error) { int(t.Nanosecond()), t.Location(), ) - return hourLen, nil + return progress, nil } func hour12Formatter(t *time.Time) ([]rune, error) { @@ -741,16 +748,9 @@ func dayOfYearFormatter(t *time.Time) ([]rune, error) { } func minuteParser(text []rune, t *time.Time) (int, error) { - const minuteLen = 2 - if len(text) < minuteLen { - return 0, fmt.Errorf("unexpected minute number") - } - m, err := strconv.ParseInt(string(text[:minuteLen]), 10, 64) + progress, m, err := parseDigitRespectingOptionalPlaces(text, 0, 59) if err != nil { - return 0, fmt.Errorf("unexpected minute number") - } - if m < 0 || m > 59 { - return 0, fmt.Errorf("invalid minute number %d", m) + return 0, fmt.Errorf("unexpected minute number: %s", err) } *t = time.Date( int(t.Year()), @@ -762,28 +762,73 @@ func minuteParser(text []rune, t *time.Time) (int, error) { int(t.Nanosecond()), t.Location(), ) - return minuteLen, nil + return progress, nil } func minuteFormatter(t *time.Time) ([]rune, error) { return []rune(fmt.Sprintf("%02d", t.Minute())), nil } -func monthNumberParser(text []rune, t *time.Time) (int, error) { - const monthLen = 2 - if len(text) < monthLen { - return 0, fmt.Errorf("unexpected month number") +func parseDigitRespectingOptionalPlaces(text []rune, minNumber int64, maxNumber int64) (int, int64, error) { + // Given a target value of `minNumber` and `maxNumber`, parse the given text up to `maxNumber`'s places + // If a non-digit character is encountered, consider the digit parsed and move on + // e.g. ('3', 0, 99) == 3 ('03', 0, 99) == 3 ('04/', 0, 999) == 4 + + textLen := len(text) + places := len(fmt.Sprint(maxNumber)) + var parts []string + if textLen == 0 { + return 0, 0, fmt.Errorf("empty text") + } + + // Format tokens require at least 1 character most `places` characters + steps := places + if textLen < places { + steps = textLen } - m, err := strconv.ParseInt(string(text[:monthLen]), 10, 64) + + for i := 0; i < steps; i++ { + char := string(text[i]) + _, err := strconv.ParseInt(char, 10, 64) + + // If we have encountered an error, we have encountered a non-digit + if err != nil { + // If we have not parsed any digits yet, the input text cannot be parsed + if len(parts) == 0 { + return 0, 0, fmt.Errorf("leading character is not a digit") + } + // If we already have parsed some digits, we assume the character was part of the format string (eg - or /) + break + } + parts = append(parts, char) + } + + result, err := strconv.ParseInt(strings.Join(parts, ""), 10, 64) + + // These parts have already been parsed/formatted once, we don't expect this error to occur, but must handle anyway if err != nil { - return 0, fmt.Errorf("unexpected month number") + return 0, 0, fmt.Errorf("%s", err) + } + + if result > maxNumber { + return 0, 0, fmt.Errorf("part [%d] is greater than maximum value [%d]", result, maxNumber) } - if m < 0 { - return 0, fmt.Errorf("invalid month number %d", m) + + if result < minNumber { + return 0, 0, fmt.Errorf("part [%d] is less than minimum value [%d]", result, minNumber) + } + + return len(parts), result, nil +} + +func monthNumberParser(text []rune, t *time.Time) (int, error) { + progress, months, err := parseDigitRespectingOptionalPlaces(text, 1, 12) + if err != nil { + return 0, fmt.Errorf("could not parse month: %s", err) } *t = time.Date( t.Year(), - time.Month(m), + time.Month(months), int(t.Day()), int(t.Hour()), int(t.Minute()), @@ -791,7 +836,7 @@ func monthNumberParser(text []rune, t *time.Time) (int, error) { int(t.Nanosecond()), t.Location(), ) - return monthLen, nil + return progress, nil } func monthNumberFormatter(t *time.Time) ([]rune, error) { @@ -846,26 +891,17 @@ func quarterFormatter(t *time.Time) ([]rune, error) { } func hourMinuteParser(text []rune, t *time.Time) (int, error) { - fmtLen := len("00:00") - if len(text) < fmtLen { - return 0, fmt.Errorf("unexpected hour:minute format") - } - splitted := strings.Split(string(text[:fmtLen]), ":") - if len(splitted) != 2 { - return 0, fmt.Errorf("unexpected hour:minute format") - } - hour := splitted[0] - minute := splitted[1] - if len(hour) != 2 || len(minute) != 2 { - return 0, fmt.Errorf("unexpected hour:minute format") - } - h, err := strconv.ParseInt(hour, 10, 64) + hProgress, h, err := parseDigitRespectingOptionalPlaces(text, 0, 23) if err != nil { - return 0, fmt.Errorf("unexpected hour:minute format: %w", err) + return 0, fmt.Errorf("could not parse hour: %s", err) } - m, err := strconv.ParseInt(minute, 10, 64) + if text[hProgress] != ':' { + return 0, fmt.Errorf("could not parse hour:minute format: character after hour [%s] is not a `:`", string(text[:hProgress])) + } + hProgress += 1 + mProgress, m, err := parseDigitRespectingOptionalPlaces(text, 0, 59) if err != nil { - return 0, fmt.Errorf("unexpected hour:minute format: %w", err) + return 0, fmt.Errorf("could not parse minute: %s", err) } *t = time.Date( int(t.Year()), @@ -877,7 +913,7 @@ func hourMinuteParser(text []rune, t *time.Time) (int, error) { int(t.Nanosecond()), t.Location(), ) - return fmtLen, nil + return mProgress + hProgress, nil } func hourMinuteFormatter(t *time.Time) ([]rune, error) { @@ -885,16 +921,9 @@ func hourMinuteFormatter(t *time.Time) ([]rune, error) { } func secondParser(text []rune, t *time.Time) (int, error) { - const secondLen = 2 - if len(text) < secondLen { - return 0, fmt.Errorf("unexpected second number") - } - s, err := strconv.ParseInt(string(text[:secondLen]), 10, 64) + progress, s, err := parseDigitRespectingOptionalPlaces(text, 0, 59) if err != nil { - return 0, fmt.Errorf("unexpected second number") - } - if s < 0 || s > 59 { - return 0, fmt.Errorf("invalid second number %d", s) + return 0, fmt.Errorf("unexpected second number: %s", err) } *t = time.Date( int(t.Year()), @@ -906,7 +935,7 @@ func secondParser(text []rune, t *time.Time) (int, error) { int(t.Nanosecond()), t.Location(), ) - return secondLen, nil + return progress, nil } func secondFormatter(t *time.Time) ([]rune, error) { @@ -1022,16 +1051,9 @@ func weekNumberZeroBaseFormatter(t *time.Time) ([]rune, error) { } func yearParser(text []rune, t *time.Time) (int, error) { - const yearLen = 4 - if len(text) < yearLen { - return 0, fmt.Errorf("unexpected year number") - } - y, err := strconv.ParseInt(string(text[:yearLen]), 10, 64) + progress, y, err := parseDigitRespectingOptionalPlaces(text, 1, 9999) if err != nil { - return 0, fmt.Errorf("unexpected year number") - } - if y < 0 { - return 0, fmt.Errorf("invalid year number %d", y) + return 0, fmt.Errorf("could not parse year: %s", err) } *t = time.Date( int(y), @@ -1043,7 +1065,7 @@ func yearParser(text []rune, t *time.Time) (int, error) { int(t.Nanosecond()), t.Location(), ) - return yearLen, nil + return progress, nil } func yearFormatter(t *time.Time) ([]rune, error) { diff --git a/query_test.go b/query_test.go index db2ff7a..b0bcb37 100644 --- a/query_test.go +++ b/query_test.go @@ -3811,6 +3811,11 @@ SELECT date, EXTRACT(ISOYEAR FROM date), EXTRACT(YEAR FROM date), EXTRACT(MONTH query: `SELECT FORMAT_DATE("%x", DATE "2008-12-25")`, expectedRows: [][]interface{}{{"12/25/08"}}, }, + { + name: "format_date with %y", + query: `SELECT FORMAT_DATE("%y", DATE "2008-12-25"), FORMAT_DATE("%y", DATE "2012-12-25")`, + expectedRows: [][]interface{}{{"08", "12"}}, + }, { name: "format_date with %b-%d-%Y", query: `SELECT FORMAT_DATE("%b-%d-%Y", DATE "2008-12-25")`, @@ -3858,6 +3863,37 @@ SELECT date, EXTRACT(ISOYEAR FROM date), EXTRACT(YEAR FROM date), EXTRACT(MONTH query: `SELECT LAST_DAY(DATE '2008-11-10', WEEK(MONDAY)) AS last_day`, expectedRows: [][]interface{}{{"2008-11-16"}}, }, + // date parsing out of range values + { + name: "parse date exceeding month maximum", + query: `SELECT PARSE_DATE("%m", "14")`, + expectedErr: "could not parse month: part [14] is greater than maximum value [12]", + }, + { + name: "parse date beneath month minimum", + query: `SELECT PARSE_DATE("%m", "0")`, + expectedErr: "could not parse month: part [0] is less than minimum value [1]", + }, + { + name: "parse date exceeding day maximum", + query: `SELECT PARSE_DATE("%d", "32")`, + expectedErr: "could not parse day number: part [32] is greater than maximum value [31]", + }, + { + name: "parse date beneath day minimum", + query: `SELECT PARSE_DATE("%d", "0")`, + expectedErr: "could not parse day number: part [0] is less than minimum value [1]", + }, + { + name: "parse date with single-digit month %m", + query: `SELECT PARSE_DATE("%m", "03"), PARSE_DATE("%m", "3"), PARSE_DATE("%m%Y", "032024")`, + expectedRows: [][]interface{}{{"0001-03-01", "0001-03-01", "2024-03-01"}}, + }, + { + name: "parse_date with %y", + query: `SELECT PARSE_DATE("%y", '1'), PARSE_DATE("%y", '67'), PARSE_DATE("%y", '69')`, + expectedRows: [][]interface{}{{"2001-01-01", "2067-01-01", "1969-01-01"}}, + }, { name: "parse date with %A %b %e %Y", query: `SELECT PARSE_DATE("%A %b %e %Y", "Thursday Dec 25 2008")`, @@ -3881,7 +3917,7 @@ SELECT date, EXTRACT(ISOYEAR FROM date), EXTRACT(YEAR FROM date), EXTRACT(MONTH { name: "parse date ( the year element is in different locations )", query: `SELECT PARSE_DATE("%Y %A %b %e", "Thursday Dec 25 2008")`, - expectedErr: "unexpected year number", + expectedErr: "could not parse year: leading character is not a digit", }, { name: "safe parse date ( the year element is in different locations )", @@ -4026,13 +4062,18 @@ SELECT date, EXTRACT(ISOYEAR FROM date), EXTRACT(YEAR FROM date), EXTRACT(MONTH { name: "parse datetime ( the year element is in different locations )", query: `SELECT PARSE_DATETIME("%a %b %e %Y %I:%M:%S", "Thu Dec 25 07:30:00 2008")`, - expectedErr: "unexpected year number", + expectedErr: "could not parse hour number: part [30] is greater than maximum value [12]", }, { name: "parse datetime ( one of the year elements is missing )", query: `SELECT PARSE_DATETIME("%a %b %e %I:%M:%S", "Thu Dec 25 07:30:00 2008")`, expectedErr: `found unused format element [' ' '2' '0' '0' '8']`, }, + { + name: "parse datetime %F respectfully consuming digits", + query: `SELECT PARSE_DATETIME("%F", "03-1-1"), PARSE_DATETIME("%F", "003-01-1"), PARSE_DATETIME("%F", "0003-1-11")`, + expectedRows: [][]interface{}{{"0003-01-01T00:00:00", "0003-01-01T00:00:00", "0003-01-11T00:00:00"}}, + }, // time functions { @@ -4117,7 +4158,7 @@ SELECT date, EXTRACT(ISOYEAR FROM date), EXTRACT(YEAR FROM date), EXTRACT(MONTH { name: "parse time ( the seconds element is in different locations )", query: `SELECT PARSE_TIME("%S:%I:%M", "07:30:00")`, - expectedErr: "invalid hour number 30", + expectedErr: "could not parse hour number: part [30] is greater than maximum value [12]", }, { name: "parse time ( one of the seconds elements is missing )", @@ -4284,7 +4325,7 @@ SELECT date, EXTRACT(ISOYEAR FROM date), EXTRACT(YEAR FROM date), EXTRACT(MONTH { name: "parse timestamp ( the year element is in different locations )", query: `SELECT PARSE_TIMESTAMP("%a %b %e %Y %I:%M:%S", "Thu Dec 25 07:30:00 2008")`, - expectedErr: "unexpected year number", + expectedErr: "could not parse hour number: part [30] is greater than maximum value [12]", }, { name: "parse timestamp ( one of the year elements is missing )",