Skip to content

Commit

Permalink
[EN DateTimeV2] "the MM/DD[/YYYY]" resolution fix
Browse files Browse the repository at this point in the history
  • Loading branch information
andrew-gradinari committed May 8, 2023
1 parent 17de6bb commit 5b3fe87
Show file tree
Hide file tree
Showing 10 changed files with 446 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ public static class DateTimeDefinitions
public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}[\.]?\s*[/\\\-]\s*{DateYearRegex}";
public static readonly string DateExtractor5 = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)";
public static readonly string DateExtractor6 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b";
public static readonly string DateExtractor7L = $@"\b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b";
public static readonly string DateExtractor7S = $@"\b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b";
public static readonly string DateExtractor7L = $@"\b({DayPrefix}(\s*,)?\s+)?(the\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b";
public static readonly string DateExtractor7S = $@"\b({DayPrefix}(\s*,)?\s+)?(the\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b";
public static readonly string DateExtractor8 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{DayRegex}[\\\-]{MonthNumRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b";
public static readonly string DateExtractor9L = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\b";
public static readonly string DateExtractor9S = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])\b";
Expand Down Expand Up @@ -926,4 +926,4 @@ public static class DateTimeDefinitions
public const string QuarterTypeRegex = @"(quarter(s|ly)?)$";
public const string YearTypeRegex = @"((years?|annual)(ly)?)$";
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -412,13 +412,13 @@ public class EnglishDateTime {
.replace("{StrictRelativeRegex}", StrictRelativeRegex)
.replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex);

public static final String DateExtractor7L = "\\b({DayPrefix}(\\s*,)?\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\\b"
public static final String DateExtractor7L = "\\b({DayPrefix}(\\s*,)?\\s+)?(the\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\\b"
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DayRegex}", DayRegex)
.replace("{DayPrefix}", DayPrefix)
.replace("{DateExtractorYearTermRegex}", DateExtractorYearTermRegex);

public static final String DateExtractor7S = "\\b({DayPrefix}(\\s*,)?\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b"
public static final String DateExtractor7S = "\\b({DayPrefix}(\\s*,)?\\s+)?(the\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b"
.replace("{MonthNumRegex}", MonthNumRegex)
.replace("{DayRegex}", DayRegex)
.replace("{DayPrefix}", DayPrefix)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ export namespace EnglishDateTime {
export const DateExtractor4 = `\\b${MonthNumRegex}\\s*[/\\\\\\-]\\s*${DayRegex}[\\.]?\\s*[/\\\\\\-]\\s*${DateYearRegex}`;
export const DateExtractor5 = `\\b(${DayPrefix}(\\s*,)?\\s+)?${DayRegex}\\s*[/\\\\\\-\\.]\\s*(${MonthNumRegex}|${MonthRegex})\\s*[/\\\\\\-\\.]\\s*${DateYearRegex}(?!\\s*[/\\\\\\-\\.]\\s*\\d+)`;
export const DateExtractor6 = `(?<=${DatePreposition}\\s+)(${StrictRelativeRegex}\\s+)?(${DayPrefix}\\s+)?${MonthNumRegex}[\\-\\.]${DayRegex}(?![%])${BaseDateTime.CheckDecimalRegex}\\b`;
export const DateExtractor7L = `\\b(${DayPrefix}(\\s*,)?\\s+)?${MonthNumRegex}\\s*/\\s*${DayRegex}${DateExtractorYearTermRegex}(?![%])\\b`;
export const DateExtractor7S = `\\b(${DayPrefix}(\\s*,)?\\s+)?${MonthNumRegex}\\s*/\\s*${DayRegex}(?![%])${BaseDateTime.CheckDecimalRegex}\\b`;
export const DateExtractor7L = `\\b(${DayPrefix}(\\s*,)?\\s+)?(the\\s+)?${MonthNumRegex}\\s*/\\s*${DayRegex}${DateExtractorYearTermRegex}(?![%])\\b`;
export const DateExtractor7S = `\\b(${DayPrefix}(\\s*,)?\\s+)?(the\\s+)?${MonthNumRegex}\\s*/\\s*${DayRegex}(?![%])${BaseDateTime.CheckDecimalRegex}\\b`;
export const DateExtractor8 = `(?<=${DatePreposition}\\s+)(${StrictRelativeRegex}\\s+)?(${DayPrefix}\\s+)?${DayRegex}[\\\\\\-]${MonthNumRegex}(?![%])${BaseDateTime.CheckDecimalRegex}\\b`;
export const DateExtractor9L = `\\b(${DayPrefix}(\\s*,)?\\s+)?${DayRegex}\\s*/\\s*${MonthNumRegex}${DateExtractorYearTermRegex}(?![%])\\b`;
export const DateExtractor9S = `\\b(${DayPrefix}(\\s*,)?\\s+)?${DayRegex}\\s*/\\s*${MonthNumRegex}${BaseDateTime.CheckDecimalRegex}(?![%])\\b`;
Expand Down
4 changes: 2 additions & 2 deletions Patterns/English/English-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -306,10 +306,10 @@ DateExtractor6: !nestedRegex
def: (?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b
references: [ MonthNumRegex, DayRegex, DayPrefix, DatePreposition, StrictRelativeRegex, BaseDateTime.CheckDecimalRegex ]
DateExtractor7L: !nestedRegex
def: \b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b
def: \b({DayPrefix}(\s*,)?\s+)?(the\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b
references: [ MonthNumRegex, DayRegex, DayPrefix, DateExtractorYearTermRegex ]
DateExtractor7S: !nestedRegex
def: \b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b
def: \b({DayPrefix}(\s*,)?\s+)?(the\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b
references: [ MonthNumRegex, DayRegex, DayPrefix, BaseDateTime.CheckDecimalRegex ]
# The only difference between 7L and 7S is whether "Year" part is required
# We have both the long and short Regex because we would like to catch both "11/20, 12" and "11/20, 12/20"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ class EnglishDateTime:
DateExtractor4 = f'\\b{MonthNumRegex}\\s*[/\\\\\\-]\\s*{DayRegex}[\\.]?\\s*[/\\\\\\-]\\s*{DateYearRegex}'
DateExtractor5 = f'\\b({DayPrefix}(\\s*,)?\\s+)?{DayRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DateYearRegex}(?!\\s*[/\\\\\\-\\.]\\s*\\d+)'
DateExtractor6 = f'(?<={DatePreposition}\\s+)({StrictRelativeRegex}\\s+)?({DayPrefix}\\s+)?{MonthNumRegex}[\\-\\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b'
DateExtractor7L = f'\\b({DayPrefix}(\\s*,)?\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\\b'
DateExtractor7S = f'\\b({DayPrefix}(\\s*,)?\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b'
DateExtractor7L = f'\\b({DayPrefix}(\\s*,)?\\s+)?(the\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\\b'
DateExtractor7S = f'\\b({DayPrefix}(\\s*,)?\\s+)?(the\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b'
DateExtractor8 = f'(?<={DatePreposition}\\s+)({StrictRelativeRegex}\\s+)?({DayPrefix}\\s+)?{DayRegex}[\\\\\\-]{MonthNumRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b'
DateExtractor9L = f'\\b({DayPrefix}(\\s*,)?\\s+)?{DayRegex}\\s*/\\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\\b'
DateExtractor9S = f'\\b({DayPrefix}(\\s*,)?\\s+)?{DayRegex}\\s*/\\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])\\b'
Expand Down
44 changes: 44 additions & 0 deletions Specs/DateTime/English/DateExtractor.json
Original file line number Diff line number Diff line change
Expand Up @@ -1680,5 +1680,49 @@
"Length": 17
}
]
},
{
"Input": "the photograph appeared in the 01/07 issue of the magazine",
"Results": [
{
"Text": "the 01/07",
"Type": "date",
"Start": 27,
"Length": 9
}
]
},
{
"Input": "the contract is expected to start on 10/11",
"Results": [
{
"Text": "10/11",
"Type": "date",
"Start": 37,
"Length": 5
}
]
},
{
"Input": "the software used in the 11/3/2020 election can be easily hacked",
"Results": [
{
"Text": "the 11/3/2020",
"Type": "date",
"Start": 21,
"Length": 13
}
]
},
{
"Input": "this article was published on 06/10/2020",
"Results": [
{
"Text": "06/10/2020",
"Type": "date",
"Start": 30,
"Length": 10
}
]
}
]
95 changes: 94 additions & 1 deletion Specs/DateTime/English/DateParser.json
Original file line number Diff line number Diff line change
Expand Up @@ -3143,5 +3143,98 @@
"Length": 9
}
]
},
{
"Input": "the photograph appeared in the 01/07 issue of the magazine",
"Context": {
"ReferenceDateTime": "2023-05-05T00:00:00"
},
"Results": [
{
"Text": "the 01/07",
"Type": "date",
"Value": {
"Timex": "XXXX-01-07",
"FutureResolution": {
"date": "2024-01-07"
},
"PastResolution": {
"date": "2023-01-07"
}
},
"Start": 27,
"Length": 9
}
]
},
{
"Input": "the contract is expected to start on 10/11",
"Context": {
"ReferenceDateTime": "2023-05-05T00:00:00"
},
"Results": [
{
"Text": "10/11",
"Type": "date",
"Value": {
"Timex": "XXXX-10-11",
"FutureResolution": {
"date": "2023-10-11"
},
"PastResolution": {
"date": "2022-10-11"
}
},
"Start": 37,
"Length": 5
}
]
},
{
"Input": "the software used in the 11/3/2020 election can be easily hacked",
"Context": {
"ReferenceDateTime": "2023-05-05T00:00:00"
},
"NotSupported": "java,javascript",
"Results": [
{
"Text": "the 11/3/2020",
"Type": "date",
"Value": {
"Timex": "2020-11-03",
"FutureResolution": {
"date": "2020-11-03"
},
"PastResolution": {
"date": "2020-11-03"
}
},
"Start": 21,
"Length": 13
}
]
},
{
"Input": "this article was published on 06/10/2020",
"Context": {
"ReferenceDateTime": "2023-05-05T00:00:00"
},
"Results": [
{
"Text": "06/10/2020",
"Type": "date",
"Value": {
"Timex": "2020-06-10",
"FutureResolution": {
"date": "2020-06-10"
},
"PastResolution": {
"date": "2020-06-10"
}
},
"Start": 30,
"Length": 10
}
]
}
]
]
103 changes: 103 additions & 0 deletions Specs/DateTime/English/DateTimeModel.json
Original file line number Diff line number Diff line change
Expand Up @@ -25850,5 +25850,108 @@
}
}
]
},
{
"Input": "the photograph appeared in the 01/07 issue of the magazine",
"Context": {
"ReferenceDateTime": "2023-05-05T00:00:00"
},
"Results": [
{
"Start": 27,
"End": 35,
"Resolution": {
"values": [
{
"timex": "XXXX-01-07",
"type": "date",
"value": "2023-01-07"
},
{
"timex": "XXXX-01-07",
"type": "date",
"value": "2024-01-07"
}
]
},
"Text": "the 01/07",
"TypeName": "datetimeV2.date"
}
]
},
{
"Input": "the contract is expected to start on 10/11",
"Context": {
"ReferenceDateTime": "2023-05-05T00:00:00"
},
"Results": [
{
"Start": 37,
"End": 41,
"Resolution": {
"values": [
{
"timex": "XXXX-10-11",
"type": "date",
"value": "2022-10-11"
},
{
"timex": "XXXX-10-11",
"type": "date",
"value": "2023-10-11"
}
]
},
"Text": "10/11",
"TypeName": "datetimeV2.date"
}
]
},
{
"Input": "the software used in the 11/3/2020 election can be easily hacked",
"Context": {
"ReferenceDateTime": "2023-05-05T00:00:00"
},
"NotSupported": "java,javascript",
"Results": [
{
"Start": 21,
"End": 33,
"Resolution": {
"values": [
{
"timex": "2020-11-03",
"type": "date",
"value": "2020-11-03"
}
]
},
"Text": "the 11/3/2020",
"TypeName": "datetimeV2.date"
}
]
},
{
"Input": "this article was published on 06/10/2020",
"Context": {
"ReferenceDateTime": "2023-05-05T00:00:00"
},
"Results": [
{
"Start": 30,
"End": 39,
"Resolution": {
"values": [
{
"timex": "2020-06-10",
"type": "date",
"value": "2020-06-10"
}
]
},
"Text": "06/10/2020",
"TypeName": "datetimeV2.date"
}
]
}
]
Loading

0 comments on commit 5b3fe87

Please sign in to comment.