From 5b3fe871f6945ad7249c0f5bd2a3fbe1729bc986 Mon Sep 17 00:00:00 2001 From: Andrew Gradinari Date: Mon, 8 May 2023 10:17:25 +0100 Subject: [PATCH] [EN DateTimeV2] "the MM/DD[/YYYY]" resolution fix --- .../English/DateTimeDefinitions.cs | 6 +- .../datetime/resources/EnglishDateTime.java | 4 +- .../src/resources/englishDateTime.ts | 4 +- Patterns/English/English-DateTime.yaml | 4 +- .../resources/english_date_time.py | 4 +- Specs/DateTime/English/DateExtractor.json | 44 ++++++++ Specs/DateTime/English/DateParser.json | 95 +++++++++++++++- Specs/DateTime/English/DateTimeModel.json | 103 ++++++++++++++++++ Specs/DateTime/EnglishOthers/DateParser.json | 92 ++++++++++++++++ .../DateTime/EnglishOthers/DateTimeModel.json | 102 +++++++++++++++++ 10 files changed, 446 insertions(+), 12 deletions(-) diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs index 21d069402c..481058f158 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs @@ -139,8 +139,8 @@ public static class DateTimeDefinitions public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}[\.]?\s*[/\\\-]\s*{DateYearRegex}"; public static readonly string DateExtractor5 = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; public static readonly string DateExtractor6 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; - public static readonly string DateExtractor7L = $@"\b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b"; - public static readonly string DateExtractor7S = $@"\b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor7L = $@"\b({DayPrefix}(\s*,)?\s+)?(the\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b"; + public static readonly string DateExtractor7S = $@"\b({DayPrefix}(\s*,)?\s+)?(the\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; public static readonly string DateExtractor8 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{DayRegex}[\\\-]{MonthNumRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; public static readonly string DateExtractor9L = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\b"; public static readonly string DateExtractor9S = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])\b"; @@ -926,4 +926,4 @@ public static class DateTimeDefinitions public const string QuarterTypeRegex = @"(quarter(s|ly)?)$"; public const string YearTypeRegex = @"((years?|annual)(ly)?)$"; } -} \ No newline at end of file +} diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java index 3032d725fd..efb52516bb 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java @@ -412,13 +412,13 @@ public class EnglishDateTime { .replace("{StrictRelativeRegex}", StrictRelativeRegex) .replace("{BaseDateTime.CheckDecimalRegex}", BaseDateTime.CheckDecimalRegex); - public static final String DateExtractor7L = "\\b({DayPrefix}(\\s*,)?\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\\b" + public static final String DateExtractor7L = "\\b({DayPrefix}(\\s*,)?\\s+)?(the\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\\b" .replace("{MonthNumRegex}", MonthNumRegex) .replace("{DayRegex}", DayRegex) .replace("{DayPrefix}", DayPrefix) .replace("{DateExtractorYearTermRegex}", DateExtractorYearTermRegex); - public static final String DateExtractor7S = "\\b({DayPrefix}(\\s*,)?\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b" + public static final String DateExtractor7S = "\\b({DayPrefix}(\\s*,)?\\s+)?(the\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b" .replace("{MonthNumRegex}", MonthNumRegex) .replace("{DayRegex}", DayRegex) .replace("{DayPrefix}", DayPrefix) diff --git a/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts index b8bd4c2bc5..116f4ee322 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts @@ -129,8 +129,8 @@ export namespace EnglishDateTime { export const DateExtractor4 = `\\b${MonthNumRegex}\\s*[/\\\\\\-]\\s*${DayRegex}[\\.]?\\s*[/\\\\\\-]\\s*${DateYearRegex}`; export const DateExtractor5 = `\\b(${DayPrefix}(\\s*,)?\\s+)?${DayRegex}\\s*[/\\\\\\-\\.]\\s*(${MonthNumRegex}|${MonthRegex})\\s*[/\\\\\\-\\.]\\s*${DateYearRegex}(?!\\s*[/\\\\\\-\\.]\\s*\\d+)`; export const DateExtractor6 = `(?<=${DatePreposition}\\s+)(${StrictRelativeRegex}\\s+)?(${DayPrefix}\\s+)?${MonthNumRegex}[\\-\\.]${DayRegex}(?![%])${BaseDateTime.CheckDecimalRegex}\\b`; - export const DateExtractor7L = `\\b(${DayPrefix}(\\s*,)?\\s+)?${MonthNumRegex}\\s*/\\s*${DayRegex}${DateExtractorYearTermRegex}(?![%])\\b`; - export const DateExtractor7S = `\\b(${DayPrefix}(\\s*,)?\\s+)?${MonthNumRegex}\\s*/\\s*${DayRegex}(?![%])${BaseDateTime.CheckDecimalRegex}\\b`; + export const DateExtractor7L = `\\b(${DayPrefix}(\\s*,)?\\s+)?(the\\s+)?${MonthNumRegex}\\s*/\\s*${DayRegex}${DateExtractorYearTermRegex}(?![%])\\b`; + export const DateExtractor7S = `\\b(${DayPrefix}(\\s*,)?\\s+)?(the\\s+)?${MonthNumRegex}\\s*/\\s*${DayRegex}(?![%])${BaseDateTime.CheckDecimalRegex}\\b`; export const DateExtractor8 = `(?<=${DatePreposition}\\s+)(${StrictRelativeRegex}\\s+)?(${DayPrefix}\\s+)?${DayRegex}[\\\\\\-]${MonthNumRegex}(?![%])${BaseDateTime.CheckDecimalRegex}\\b`; export const DateExtractor9L = `\\b(${DayPrefix}(\\s*,)?\\s+)?${DayRegex}\\s*/\\s*${MonthNumRegex}${DateExtractorYearTermRegex}(?![%])\\b`; export const DateExtractor9S = `\\b(${DayPrefix}(\\s*,)?\\s+)?${DayRegex}\\s*/\\s*${MonthNumRegex}${BaseDateTime.CheckDecimalRegex}(?![%])\\b`; diff --git a/Patterns/English/English-DateTime.yaml b/Patterns/English/English-DateTime.yaml index 14ded022d4..b27da83b3a 100644 --- a/Patterns/English/English-DateTime.yaml +++ b/Patterns/English/English-DateTime.yaml @@ -306,10 +306,10 @@ DateExtractor6: !nestedRegex def: (?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b references: [ MonthNumRegex, DayRegex, DayPrefix, DatePreposition, StrictRelativeRegex, BaseDateTime.CheckDecimalRegex ] DateExtractor7L: !nestedRegex - def: \b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b + def: \b({DayPrefix}(\s*,)?\s+)?(the\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b references: [ MonthNumRegex, DayRegex, DayPrefix, DateExtractorYearTermRegex ] DateExtractor7S: !nestedRegex - def: \b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b + def: \b({DayPrefix}(\s*,)?\s+)?(the\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b references: [ MonthNumRegex, DayRegex, DayPrefix, BaseDateTime.CheckDecimalRegex ] # The only difference between 7L and 7S is whether "Year" part is required # We have both the long and short Regex because we would like to catch both "11/20, 12" and "11/20, 12/20" diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py index 9e2d06ddd4..73187dfe09 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py @@ -132,8 +132,8 @@ class EnglishDateTime: DateExtractor4 = f'\\b{MonthNumRegex}\\s*[/\\\\\\-]\\s*{DayRegex}[\\.]?\\s*[/\\\\\\-]\\s*{DateYearRegex}' DateExtractor5 = f'\\b({DayPrefix}(\\s*,)?\\s+)?{DayRegex}\\s*[/\\\\\\-\\.]\\s*({MonthNumRegex}|{MonthRegex})\\s*[/\\\\\\-\\.]\\s*{DateYearRegex}(?!\\s*[/\\\\\\-\\.]\\s*\\d+)' DateExtractor6 = f'(?<={DatePreposition}\\s+)({StrictRelativeRegex}\\s+)?({DayPrefix}\\s+)?{MonthNumRegex}[\\-\\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b' - DateExtractor7L = f'\\b({DayPrefix}(\\s*,)?\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\\b' - DateExtractor7S = f'\\b({DayPrefix}(\\s*,)?\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b' + DateExtractor7L = f'\\b({DayPrefix}(\\s*,)?\\s+)?(the\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\\b' + DateExtractor7S = f'\\b({DayPrefix}(\\s*,)?\\s+)?(the\\s+)?{MonthNumRegex}\\s*/\\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b' DateExtractor8 = f'(?<={DatePreposition}\\s+)({StrictRelativeRegex}\\s+)?({DayPrefix}\\s+)?{DayRegex}[\\\\\\-]{MonthNumRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\\b' DateExtractor9L = f'\\b({DayPrefix}(\\s*,)?\\s+)?{DayRegex}\\s*/\\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\\b' DateExtractor9S = f'\\b({DayPrefix}(\\s*,)?\\s+)?{DayRegex}\\s*/\\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])\\b' diff --git a/Specs/DateTime/English/DateExtractor.json b/Specs/DateTime/English/DateExtractor.json index 7195b9bb60..e0e3634f0f 100644 --- a/Specs/DateTime/English/DateExtractor.json +++ b/Specs/DateTime/English/DateExtractor.json @@ -1680,5 +1680,49 @@ "Length": 17 } ] + }, + { + "Input": "the photograph appeared in the 01/07 issue of the magazine", + "Results": [ + { + "Text": "the 01/07", + "Type": "date", + "Start": 27, + "Length": 9 + } + ] + }, + { + "Input": "the contract is expected to start on 10/11", + "Results": [ + { + "Text": "10/11", + "Type": "date", + "Start": 37, + "Length": 5 + } + ] + }, + { + "Input": "the software used in the 11/3/2020 election can be easily hacked", + "Results": [ + { + "Text": "the 11/3/2020", + "Type": "date", + "Start": 21, + "Length": 13 + } + ] + }, + { + "Input": "this article was published on 06/10/2020", + "Results": [ + { + "Text": "06/10/2020", + "Type": "date", + "Start": 30, + "Length": 10 + } + ] } ] diff --git a/Specs/DateTime/English/DateParser.json b/Specs/DateTime/English/DateParser.json index e3b3bf72ff..181e4a9c95 100644 --- a/Specs/DateTime/English/DateParser.json +++ b/Specs/DateTime/English/DateParser.json @@ -3143,5 +3143,98 @@ "Length": 9 } ] + }, + { + "Input": "the photograph appeared in the 01/07 issue of the magazine", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Text": "the 01/07", + "Type": "date", + "Value": { + "Timex": "XXXX-01-07", + "FutureResolution": { + "date": "2024-01-07" + }, + "PastResolution": { + "date": "2023-01-07" + } + }, + "Start": 27, + "Length": 9 + } + ] + }, + { + "Input": "the contract is expected to start on 10/11", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Text": "10/11", + "Type": "date", + "Value": { + "Timex": "XXXX-10-11", + "FutureResolution": { + "date": "2023-10-11" + }, + "PastResolution": { + "date": "2022-10-11" + } + }, + "Start": 37, + "Length": 5 + } + ] + }, + { + "Input": "the software used in the 11/3/2020 election can be easily hacked", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "NotSupported": "java,javascript", + "Results": [ + { + "Text": "the 11/3/2020", + "Type": "date", + "Value": { + "Timex": "2020-11-03", + "FutureResolution": { + "date": "2020-11-03" + }, + "PastResolution": { + "date": "2020-11-03" + } + }, + "Start": 21, + "Length": 13 + } + ] + }, + { + "Input": "this article was published on 06/10/2020", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Text": "06/10/2020", + "Type": "date", + "Value": { + "Timex": "2020-06-10", + "FutureResolution": { + "date": "2020-06-10" + }, + "PastResolution": { + "date": "2020-06-10" + } + }, + "Start": 30, + "Length": 10 + } + ] } -] \ No newline at end of file +] diff --git a/Specs/DateTime/English/DateTimeModel.json b/Specs/DateTime/English/DateTimeModel.json index 804b5c2033..c4d475dde4 100644 --- a/Specs/DateTime/English/DateTimeModel.json +++ b/Specs/DateTime/English/DateTimeModel.json @@ -25850,5 +25850,108 @@ } } ] + }, + { + "Input": "the photograph appeared in the 01/07 issue of the magazine", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Start": 27, + "End": 35, + "Resolution": { + "values": [ + { + "timex": "XXXX-01-07", + "type": "date", + "value": "2023-01-07" + }, + { + "timex": "XXXX-01-07", + "type": "date", + "value": "2024-01-07" + } + ] + }, + "Text": "the 01/07", + "TypeName": "datetimeV2.date" + } + ] + }, + { + "Input": "the contract is expected to start on 10/11", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Start": 37, + "End": 41, + "Resolution": { + "values": [ + { + "timex": "XXXX-10-11", + "type": "date", + "value": "2022-10-11" + }, + { + "timex": "XXXX-10-11", + "type": "date", + "value": "2023-10-11" + } + ] + }, + "Text": "10/11", + "TypeName": "datetimeV2.date" + } + ] + }, + { + "Input": "the software used in the 11/3/2020 election can be easily hacked", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "NotSupported": "java,javascript", + "Results": [ + { + "Start": 21, + "End": 33, + "Resolution": { + "values": [ + { + "timex": "2020-11-03", + "type": "date", + "value": "2020-11-03" + } + ] + }, + "Text": "the 11/3/2020", + "TypeName": "datetimeV2.date" + } + ] + }, + { + "Input": "this article was published on 06/10/2020", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Start": 30, + "End": 39, + "Resolution": { + "values": [ + { + "timex": "2020-06-10", + "type": "date", + "value": "2020-06-10" + } + ] + }, + "Text": "06/10/2020", + "TypeName": "datetimeV2.date" + } + ] } ] diff --git a/Specs/DateTime/EnglishOthers/DateParser.json b/Specs/DateTime/EnglishOthers/DateParser.json index 87fb54bc8f..2e3dc22382 100644 --- a/Specs/DateTime/EnglishOthers/DateParser.json +++ b/Specs/DateTime/EnglishOthers/DateParser.json @@ -67,5 +67,97 @@ "Length": 6 } ] + }, + { + "Input": "the photograph appeared in the 01/07 issue of the magazine", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Text": "the 01/07", + "Type": "date", + "Value": { + "Timex": "XXXX-07-01", + "FutureResolution": { + "date": "2023-07-01" + }, + "PastResolution": { + "date": "2022-07-01" + } + }, + "Start": 27, + "Length": 9 + } + ] + }, + { + "Input": "the contract is expected to start on 10/11", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Text": "10/11", + "Type": "date", + "Value": { + "Timex": "XXXX-11-10", + "FutureResolution": { + "date": "2023-11-10" + }, + "PastResolution": { + "date": "2022-11-10" + } + }, + "Start": 37, + "Length": 5 + } + ] + }, + { + "Input": "the software used in the 11/3/2020 election can be easily hacked", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Text": "the 11/3/2020", + "Type": "date", + "Value": { + "Timex": "2020-03-11", + "FutureResolution": { + "date": "2020-03-11" + }, + "PastResolution": { + "date": "2020-03-11" + } + }, + "Start": 21, + "Length": 13 + } + ] + }, + { + "Input": "this article was published on 06/10/2020", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Text": "06/10/2020", + "Type": "date", + "Value": { + "Timex": "2020-10-06", + "FutureResolution": { + "date": "2020-10-06" + }, + "PastResolution": { + "date": "2020-10-06" + } + }, + "Start": 30, + "Length": 10 + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/EnglishOthers/DateTimeModel.json b/Specs/DateTime/EnglishOthers/DateTimeModel.json index ddc0ec6008..ef5a9f57d6 100644 --- a/Specs/DateTime/EnglishOthers/DateTimeModel.json +++ b/Specs/DateTime/EnglishOthers/DateTimeModel.json @@ -1056,5 +1056,107 @@ } } ] + }, + { + "Input": "the photograph appeared in the 01/07 issue of the magazine", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Start": 27, + "End": 35, + "Resolution": { + "values": [ + { + "timex": "XXXX-07-01", + "type": "date", + "value": "2022-07-01" + }, + { + "timex": "XXXX-07-01", + "type": "date", + "value": "2023-07-01" + } + ] + }, + "Text": "the 01/07", + "TypeName": "datetimeV2.date" + } + ] + }, + { + "Input": "the contract is expected to start on 10/11", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Start": 37, + "End": 41, + "Resolution": { + "values": [ + { + "timex": "XXXX-11-10", + "type": "date", + "value": "2022-11-10" + }, + { + "timex": "XXXX-11-10", + "type": "date", + "value": "2023-11-10" + } + ] + }, + "Text": "10/11", + "TypeName": "datetimeV2.date" + } + ] + }, + { + "Input": "the software used in the 11/3/2020 election can be easily hacked", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Start": 21, + "End": 33, + "Resolution": { + "values": [ + { + "timex": "2020-03-11", + "type": "date", + "value": "2020-03-11" + } + ] + }, + "Text": "the 11/3/2020", + "TypeName": "datetimeV2.date" + } + ] + }, + { + "Input": "this article was published on 06/10/2020", + "Context": { + "ReferenceDateTime": "2023-05-05T00:00:00" + }, + "Results": [ + { + "Start": 30, + "End": 39, + "Resolution": { + "values": [ + { + "timex": "2020-10-06", + "type": "date", + "value": "2020-10-06" + } + ] + }, + "Text": "06/10/2020", + "TypeName": "datetimeV2.date" + } + ] } ] \ No newline at end of file