diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs index 775686912c..22d51b876e 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs @@ -35,8 +35,8 @@ public static class DateTimeDefinitions public const string CenturySuffixRegex = @"(^century)\b"; public const string ReferencePrefixRegex = @"(that|same)\b"; public const string FutureSuffixRegex = @"\b(in\s+the\s+)?(future|hence)\b"; - public const string DayRegex = @"(the\s*)?(?([1-3]1(th|st)?|[12]2(nd|th)?|[12]3(rd|th)?|(10|1[4-9]|20|2[4-9]|30)(th)?|0?(1(st)?|2(nd)?|3(rd)?|[4-9](th)?)))(?=\b|t)"; - public const string ImplicitDayRegex = @"(the\s*)?(?[1-3]1(th|st)|[12]2(nd|th)|[12]3(rd|th)|(10|1[4-9]|20|2[4-9]|30)(th)|0?(1st|2nd|3rd|[4-9]th))\b"; + public const string DayRegex = @"(the\s*)?(?(3[0-1]|[1-2]\d|0?[1-9])(th|nd|rd|st)?)(?=\b|t)"; + public const string ImplicitDayRegex = @"(the\s*)?(?(3[0-1]|[0-2]?\d)(th|nd|rd|st))\b"; public const string MonthNumRegex = @"(?1[0-2]|(0)?[1-9])\b"; public const string WrittenOneToNineRegex = @"(one|two|three|four|five|six|seven|eight|nine)"; public const string WrittenElevenToNineteenRegex = @"(eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen)"; @@ -107,7 +107,7 @@ public static class DateTimeDefinitions public static readonly string SpecialDayRegex = $@"\b((the\s+)?day before yesterday|(the\s+)?day after (tomorrow|tmr)|the\s+day\s+(before|after)(?!=\s+day)|((the\s+)?({RelativeRegex}|my)\s+day)|yesterday|tomorrow|tmr|today)\b"; public static readonly string SpecialDayWithNumRegex = $@"\b((?{WrittenNumRegex})\s+days?\s+from\s+(?yesterday|tomorrow|tmr|today))\b"; public static readonly string RelativeDayRegex = $@"\b(((the\s+)?{RelativeRegex}\s+day))\b"; - public const string SetWeekDayRegex = @"\b(?on\s+)?(?morning|afternoon|evening|night|sunday|monday|tuesday|wednesday|thursday|friday|saturday)s\b"; + public const string SetWeekDayRegex = @"\b(?on\s+)?(?morning|afternoon|evening|night|(sun|mon|tues|wednes|thurs|fri|satur)day)s\b"; public static readonly string WeekDayOfMonthRegex = $@"(?(the\s+)?(?first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|last)\s+{WeekDayRegex}\s+{MonthSuffixRegex})"; public static readonly string RelativeWeekDayRegex = $@"\b({WrittenNumRegex}\s+{WeekDayRegex}\s+(from\s+now|later))\b"; public static readonly string SpecialDate = $@"(?=\b(on|at)\s+the\s+){DayRegex}\b"; @@ -127,15 +127,15 @@ public static class DateTimeDefinitions public static readonly string OfMonth = $@"^\s*of\s*{MonthRegex}"; public static readonly string MonthEnd = $@"{MonthRegex}\s*(the)?\s*$"; public static readonly string WeekDayEnd = $@"(this\s+)?{WeekDayRegex}\s*,?\s*$"; - public const string RangeUnitRegex = @"\b(?years|year|months|month|weeks|week)\b"; + public const string RangeUnitRegex = @"\b(?years?|months?|weeks?)\b"; public const string HourNumRegex = @"\b(?zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\b"; public const string MinuteNumRegex = @"(?ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|one|two|three|four|five|six|seven|eight|nine)"; public const string DeltaMinuteNumRegex = @"(?ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|one|two|three|four|five|six|seven|eight|nine)"; public const string PmRegex = @"(?(((at|in|around|on|for)\s+(the\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\s+(the\s+)?night))"; - public const string PmRegexFull = @"(?((at|in|around|on|for)\s+(the\s+)?)?(afternoon|evening|midnight|night|lunchtime))"; + public const string PmRegexFull = @"(?((at|in|around|on|for)\s+(the\s+)?)?(afternoon|evening|(mid)?night|lunchtime))"; public const string AmRegex = @"(?((at|in|around|on|for)\s+(the\s+)?)?(morning))"; - public const string LunchRegex = @"\b(lunchtime)\b"; - public const string NightRegex = @"\b(midnight|night)\b"; + public const string LunchRegex = @"\blunchtime\b"; + public const string NightRegex = @"\b(mid)?night\b"; public const string CommonDatePrefixRegex = @"^[\.]"; public static readonly string LessThanOneHour = $@"(?(a\s+)?quarter|three quarter(s)?|half( an hour)?|{BaseDateTime.DeltaMinuteRegex}(\s+(minute|minutes|min|mins))|{DeltaMinuteNumRegex}(\s+(minute|minutes|min|mins)))"; public static readonly string WrittenTimeRegex = $@"(?{HourNumRegex}\s+({MinuteNumRegex}|(?twenty|thirty|forty|fourty|fifty)\s+{MinuteNumRegex}))"; @@ -143,18 +143,18 @@ public static class DateTimeDefinitions public static readonly string TimeSuffix = $@"(?{AmRegex}|{PmRegex}|{OclockRegex})"; public static readonly string TimeSuffixFull = $@"(?{AmRegex}|{PmRegexFull}|{OclockRegex})"; public static readonly string BasicTime = $@"\b(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex}(?![%\d]))"; - public const string MidnightRegex = @"(?midnight|mid-night|mid night)"; - public const string MidmorningRegex = @"(?midmorning|mid-morning|mid morning)"; - public const string MidafternoonRegex = @"(?midafternoon|mid-afternoon|mid afternoon)"; - public const string MiddayRegex = @"(?midday|mid-day|mid day|((12\s)?noon))"; + public const string MidnightRegex = @"(?mid\s*(-\s*)?night)"; + public const string MidmorningRegex = @"(?mid\s*(-\s*)?morning)"; + public const string MidafternoonRegex = @"(?mid\s*(-\s*)?afternoon)"; + public const string MiddayRegex = @"(?mid\s*(-\s*)?day|((12\s)?noon))"; public static readonly string MidTimeRegex = $@"(?({MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex}))"; public static readonly string AtRegex = $@"\b(((?<=\bat\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d)(\s*((?a)|(?p)))?|{MidTimeRegex}))|{MidTimeRegex})\b"; - public static readonly string IshRegex = $@"\b({BaseDateTime.HourRegex}(-|——)?ish|noonish|noon)\b"; + public static readonly string IshRegex = $@"\b({BaseDateTime.HourRegex}(-|——)?ish|noon(ish)?)\b"; public const string TimeUnitRegex = @"([^A-Za-z]{1,}|\b)(?hours?|hrs?|h|minutes?|mins?|seconds?|secs?)\b"; public const string RestrictedTimeUnitRegex = @"(?hour|minute)\b"; - public const string FivesRegex = @"(?(fifteen|twenty(\s*five)?|thirty(\s*five)?|forty(\s*five)?|fourty(\s*five)?|fifty(\s*five)?|ten|five))\b"; + public const string FivesRegex = @"(?(fifteen|(twen|thir|for|four|fif)ty(\s*five)?|ten|five))\b"; public static readonly string HourRegex = $@"\b{BaseDateTime.HourRegex}"; - public const string PeriodHourNumRegex = @"\b(?twenty (one|two|three|four)|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|zero|one|two|three|four|five|six|seven|eight|nine|ten)\b"; + public const string PeriodHourNumRegex = @"\b(?twenty( (one|two|three|four))?|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|zero|one|two|three|four|five|six|seven|eight|nine|ten)\b"; public static readonly string ConnectNumRegex = $@"\b{BaseDateTime.HourRegex}(?[0-5][0-9])\s*{DescRegex}"; public static readonly string TimeRegexWithDotConnector = $@"({BaseDateTime.HourRegex}(\s*\.\s*){BaseDateTime.MinuteRegex})"; public static readonly string TimeRegex1 = $@"\b({TimePrefix}\s+)?({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\s*|[.]){DescRegex}"; @@ -198,7 +198,7 @@ public static class DateTimeDefinitions public const string MoreThanRegex = @"\b(more\s+than)\b"; public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|hours?|hrs?|h|minutes?|mins?|seconds?|secs?)\b"; public const string SuffixAndRegex = @"(?\s*(and)\s+((an|a)\s+)?(?half|quarter))"; - public const string PeriodicRegex = @"\b(?daily|monthly|weekly|biweekly|yearly|annually|annual)\b"; + public const string PeriodicRegex = @"\b(?daily|monthly|weekly|biweekly|yearly|annual(ly)?)\b"; public static readonly string EachUnitRegex = $@"(?(each|every)(?\s+other)?\s*{DurationUnitRegex})"; public const string EachPrefixRegex = @"\b(?(each|(every))\s*$)"; public const string SetEachRegex = @"\b(?(each|(every))\s*)"; @@ -229,8 +229,8 @@ public static class DateTimeDefinitions public static readonly string MorningStartEndRegex = $@"(^(morning|{AmDescRegex}))|((morning|{AmDescRegex})$)"; public static readonly string AfternoonStartEndRegex = $@"(^(afternoon|{PmDescRegex}))|((afternoon|{PmDescRegex})$)"; public const string EveningStartEndRegex = @"(^(evening))|((evening)$)"; - public const string NightStartEndRegex = @"(^(overnight|tonight|night))|((overnight|tonight|night)$)"; - public const string InexactNumberRegex = @"\b(a few|few|some|several|(?(a\s+)?couple(\s+of)?))\b"; + public const string NightStartEndRegex = @"(^(over|to)?night)|((over|to)?night$)"; + public const string InexactNumberRegex = @"\b((a )?few|some|several|(?(a\s+)?couple(\s+of)?))\b"; public static readonly string InexactNumberUnitRegex = $@"({InexactNumberRegex})\s+({DurationUnitRegex})"; public static readonly string RelativeTimeUnitRegex = $@"((({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+({TimeUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))"; public static readonly string RelativeDurationUnitRegex = $@"(((?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+)({DurationUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))"; @@ -239,7 +239,7 @@ public static class DateTimeDefinitions public const string FromToRegex = @"\b(from).+(to)\b.+"; public const string SingleAmbiguousMonthRegex = @"^(the\s+)?(may|march)$"; public const string SingleAmbiguousTermsRegex = @"^(the\s+)?(day|week|month|year)$"; - public const string UnspecificDatePeriodRegex = @"^(week|weekend|month|year)$"; + public const string UnspecificDatePeriodRegex = @"^(week(end)?|month|year)$"; public const string PrepositionSuffixRegex = @"\b(on|in|at|around|from|to)$"; public const string FlexibleDayRegex = @"(?([A-Za-z]+\s)?[A-Za-z\d]+)"; public static readonly string ForTheRegex = $@"\b((((?<=for\s+)the\s+{FlexibleDayRegex})|((?<=on\s+)(the\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\s*(,|\.|!|\?|$)))"; @@ -261,14 +261,14 @@ public static class DateTimeDefinitions public static readonly string NumberAsTimeRegex = $@"\b({WrittenTimeRegex}|{PeriodHourNumRegex}|{BaseDateTime.HourRegex})\b"; public static readonly string TimeBeforeAfterRegex = $@"\b(((?<=\b(before|no later than|by|after)\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}|{MidTimeRegex}))|{MidTimeRegex})\b"; public const string DateNumberConnectorRegex = @"^\s*(?\s+at)\s*$"; - public const string DecadeRegex = @"(?noughties|twenties|thirties|forties|fifties|sixties|seventies|eighties|nineties|two thousands)"; + public const string DecadeRegex = @"(?(nough|twen|thir|for|four|fif|six|seven|eight|nine)ties|two thousands)"; public static readonly string DecadeWithCenturyRegex = $@"(the\s+)?(((?\d|1\d|2\d)?(')?(?\d0)(')?(\s)?s\b)|(({CenturyRegex}(\s+|-)(and\s+)?)?{DecadeRegex})|({CenturyRegex}(\s+|-)(and\s+)?(?tens|hundreds)))"; public static readonly string RelativeDecadeRegex = $@"\b((the\s+)?{RelativeRegex}\s+((?[\w,]+)\s+)?decades?)\b"; public static readonly string YearPeriodRegex = $@"((((from|during|in)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((between)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}))"; - public static readonly string StrictTillRegex = $@"(?\b(to|till|til|until|thru|through)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*))))"; + public static readonly string StrictTillRegex = $@"(?\b(to|(un)?till?|thru|through)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*))))"; public static readonly string StrictRangeConnectorRegex = $@"(?\b(and|through|to)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*))))"; public static readonly string ComplexDatePeriodRegex = $@"(((from|during|in)\s+)?(?.+)\s*({StrictTillRegex})\s*(?.+)|((between)\s+)(?.+)\s*({StrictRangeConnectorRegex})\s*(?.+))"; - public static readonly string FailFastRegex = $@"{BaseDateTime.DeltaMinuteRegex}|\b({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\b(zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|century|centuries|weekends?|quarters?|half|halves|yesterday|tomorrow|tmr|today|tonight|mornings?|noonish|\d(-|——)?ish|((the\s+\w*)|\d)th|afternoons?|evenings?|nights?|noon|lunchtime|lunch|dinnertime|dinner|midnight|mid-nights?|midmornings?|mid-mornings?|midafternoonss?|mid-afternoons?|midday|mid-day|daytime|nighttime|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eod|eom|eoy|mardi gras|mardi-gras|mardigras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|patrick|cinco de mayo|all hallow|all souls|guy fawkes|st patrick|hundreds?|noughties|aughts|thousands?)\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\b({DateUnitRegex}|{ImplicitDayRegex})"; + public static readonly string FailFastRegex = $@"{BaseDateTime.DeltaMinuteRegex}|\b({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\b(zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|centur(y|ies)|weekends?|quarters?|hal(f|ves)|yesterday|to(morrow|day|night)|tmr|noonish|\d(-|——)?ish|((the\s+\w*)|\d)(th|rd|nd|st)|(mid\s*(-\s*)?)?(night|morning|afternoon|day)s?|evenings?||noon|lunch(time)?|dinner(time)?|(day|night)time|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eo[dmy]|mardi[ -]?gras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|cinco de mayo|all (hallow|souls)|guy fawkes|(st )?patrick|hundreds?|noughties|aughts|thousands?)\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\b({DateUnitRegex}|{ImplicitDayRegex})"; public static readonly Dictionary UnitMap = new Dictionary { { @"decades", @"10Y" }, diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java index 0da71c382a..acae31089a 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java @@ -9,14 +9,14 @@ // Licensed under the MIT License. // ------------------------------------------------------------------------------ -package com.microsoft.recognizers.text.datetime.resources; - -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -import com.google.common.collect.ImmutableMap; - +package com.microsoft.recognizers.text.datetime.resources; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.ImmutableMap; + public class EnglishDateTime { public static final String TillRegex = "(?\\b(to|(un)?till?|thru|through)\\b|{BaseDateTime.RangeConnectorSymbolRegex})" @@ -51,9 +51,9 @@ public class EnglishDateTime { public static final String FutureSuffixRegex = "\\b(in\\s+the\\s+)?(future|hence)\\b"; - public static final String DayRegex = "(the\\s*)?(?([1-3]1(th|st)?|[12]2(nd|th)?|[12]3(rd|th)?|(10|1[4-9]|20|2[4-9]|30)(th)?|0?(1(st)?|2(nd)?|3(rd)?|[4-9](th)?)))(?=\\b|t)"; + public static final String DayRegex = "(the\\s*)?(?(3[0-1]|[1-2]\\d|0?[1-9])(th|nd|rd|st)?)(?=\\b|t)"; - public static final String ImplicitDayRegex = "(the\\s*)?(?[1-3]1(th|st)|[12]2(nd|th)|[12]3(rd|th)|(10|1[4-9]|20|2[4-9]|30)(th)|0?(1st|2nd|3rd|[4-9]th))\\b"; + public static final String ImplicitDayRegex = "(the\\s*)?(?(3[0-1]|[0-2]?\\d)(th|nd|rd|st))\\b"; public static final String MonthNumRegex = "(?1[0-2]|(0)?[1-9])\\b"; @@ -292,7 +292,7 @@ public class EnglishDateTime { public static final String RelativeDayRegex = "\\b(((the\\s+)?{RelativeRegex}\\s+day))\\b" .replace("{RelativeRegex}", RelativeRegex); - public static final String SetWeekDayRegex = "\\b(?on\\s+)?(?morning|afternoon|evening|night|sunday|monday|tuesday|wednesday|thursday|friday|saturday)s\\b"; + public static final String SetWeekDayRegex = "\\b(?on\\s+)?(?morning|afternoon|evening|night|(sun|mon|tues|wednes|thurs|fri|satur)day)s\\b"; public static final String WeekDayOfMonthRegex = "(?(the\\s+)?(?first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|last)\\s+{WeekDayRegex}\\s+{MonthSuffixRegex})" .replace("{WeekDayRegex}", WeekDayRegex) @@ -384,7 +384,7 @@ public class EnglishDateTime { public static final String WeekDayEnd = "(this\\s+)?{WeekDayRegex}\\s*,?\\s*$" .replace("{WeekDayRegex}", WeekDayRegex); - public static final String RangeUnitRegex = "\\b(?years|year|months|month|weeks|week)\\b"; + public static final String RangeUnitRegex = "\\b(?years?|months?|weeks?)\\b"; public static final String HourNumRegex = "\\b(?zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\\b"; @@ -394,13 +394,13 @@ public class EnglishDateTime { public static final String PmRegex = "(?(((at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\\s+(the\\s+)?night))"; - public static final String PmRegexFull = "(?((at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|midnight|night|lunchtime))"; + public static final String PmRegexFull = "(?((at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|(mid)?night|lunchtime))"; public static final String AmRegex = "(?((at|in|around|on|for)\\s+(the\\s+)?)?(morning))"; - public static final String LunchRegex = "\\b(lunchtime)\\b"; + public static final String LunchRegex = "\\blunchtime\\b"; - public static final String NightRegex = "\\b(midnight|night)\\b"; + public static final String NightRegex = "\\b(mid)?night\\b"; public static final String CommonDatePrefixRegex = "^[\\.]"; @@ -432,13 +432,13 @@ public class EnglishDateTime { .replace("{BaseDateTime.MinuteRegex}", BaseDateTime.MinuteRegex) .replace("{BaseDateTime.SecondRegex}", BaseDateTime.SecondRegex); - public static final String MidnightRegex = "(?midnight|mid-night|mid night)"; + public static final String MidnightRegex = "(?mid\\s*(-\\s*)?night)"; - public static final String MidmorningRegex = "(?midmorning|mid-morning|mid morning)"; + public static final String MidmorningRegex = "(?mid\\s*(-\\s*)?morning)"; - public static final String MidafternoonRegex = "(?midafternoon|mid-afternoon|mid afternoon)"; + public static final String MidafternoonRegex = "(?mid\\s*(-\\s*)?afternoon)"; - public static final String MiddayRegex = "(?midday|mid-day|mid day|((12\\s)?noon))"; + public static final String MiddayRegex = "(?mid\\s*(-\\s*)?day|((12\\s)?noon))"; public static final String MidTimeRegex = "(?({MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex}))" .replace("{MidnightRegex}", MidnightRegex) @@ -452,19 +452,19 @@ public class EnglishDateTime { .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex) .replace("{MidTimeRegex}", MidTimeRegex); - public static final String IshRegex = "\\b({BaseDateTime.HourRegex}(-|——)?ish|noonish|noon)\\b" + public static final String IshRegex = "\\b({BaseDateTime.HourRegex}(-|——)?ish|noon(ish)?)\\b" .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex); public static final String TimeUnitRegex = "([^A-Za-z]{1,}|\\b)(?hours?|hrs?|h|minutes?|mins?|seconds?|secs?)\\b"; public static final String RestrictedTimeUnitRegex = "(?hour|minute)\\b"; - public static final String FivesRegex = "(?(fifteen|twenty(\\s*five)?|thirty(\\s*five)?|forty(\\s*five)?|fourty(\\s*five)?|fifty(\\s*five)?|ten|five))\\b"; + public static final String FivesRegex = "(?(fifteen|(twen|thir|for|four|fif)ty(\\s*five)?|ten|five))\\b"; public static final String HourRegex = "\\b{BaseDateTime.HourRegex}" .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex); - public static final String PeriodHourNumRegex = "\\b(?twenty (one|two|three|four)|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|zero|one|two|three|four|five|six|seven|eight|nine|ten)\\b"; + public static final String PeriodHourNumRegex = "\\b(?twenty( (one|two|three|four))?|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|zero|one|two|three|four|five|six|seven|eight|nine|ten)\\b"; public static final String ConnectNumRegex = "\\b{BaseDateTime.HourRegex}(?[0-5][0-9])\\s*{DescRegex}" .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex) @@ -650,7 +650,7 @@ public class EnglishDateTime { public static final String SuffixAndRegex = "(?\\s*(and)\\s+((an|a)\\s+)?(?half|quarter))"; - public static final String PeriodicRegex = "\\b(?daily|monthly|weekly|biweekly|yearly|annually|annual)\\b"; + public static final String PeriodicRegex = "\\b(?daily|monthly|weekly|biweekly|yearly|annual(ly)?)\\b"; public static final String EachUnitRegex = "(?(each|every)(?\\s+other)?\\s*{DurationUnitRegex})" .replace("{DurationUnitRegex}", DurationUnitRegex); @@ -731,9 +731,9 @@ public class EnglishDateTime { public static final String EveningStartEndRegex = "(^(evening))|((evening)$)"; - public static final String NightStartEndRegex = "(^(overnight|tonight|night))|((overnight|tonight|night)$)"; + public static final String NightStartEndRegex = "(^(over|to)?night)|((over|to)?night$)"; - public static final String InexactNumberRegex = "\\b(a few|few|some|several|(?(a\\s+)?couple(\\s+of)?))\\b"; + public static final String InexactNumberRegex = "\\b((a )?few|some|several|(?(a\\s+)?couple(\\s+of)?))\\b"; public static final String InexactNumberUnitRegex = "({InexactNumberRegex})\\s+({DurationUnitRegex})" .replace("{InexactNumberRegex}", InexactNumberRegex) @@ -764,7 +764,7 @@ public class EnglishDateTime { public static final String SingleAmbiguousTermsRegex = "^(the\\s+)?(day|week|month|year)$"; - public static final String UnspecificDatePeriodRegex = "^(week|weekend|month|year)$"; + public static final String UnspecificDatePeriodRegex = "^(week(end)?|month|year)$"; public static final String PrepositionSuffixRegex = "\\b(on|in|at|around|from|to)$"; @@ -832,7 +832,7 @@ public class EnglishDateTime { public static final String DateNumberConnectorRegex = "^\\s*(?\\s+at)\\s*$"; - public static final String DecadeRegex = "(?noughties|twenties|thirties|forties|fifties|sixties|seventies|eighties|nineties|two thousands)"; + public static final String DecadeRegex = "(?(nough|twen|thir|for|four|fif|six|seven|eight|nine)ties|two thousands)"; public static final String DecadeWithCenturyRegex = "(the\\s+)?(((?\\d|1\\d|2\\d)?(')?(?\\d0)(')?(\\s)?s\\b)|(({CenturyRegex}(\\s+|-)(and\\s+)?)?{DecadeRegex})|({CenturyRegex}(\\s+|-)(and\\s+)?(?tens|hundreds)))" .replace("{CenturyRegex}", CenturyRegex) @@ -846,7 +846,7 @@ public class EnglishDateTime { .replace("{TillRegex}", TillRegex) .replace("{RangeConnectorRegex}", RangeConnectorRegex); - public static final String StrictTillRegex = "(?\\b(to|till|til|until|thru|through)\\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\\s*(h[1-2]|q[1-4])(?!(\\s+of|\\s*,\\s*))))" + public static final String StrictTillRegex = "(?\\b(to|(un)?till?|thru|through)\\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\\s*(h[1-2]|q[1-4])(?!(\\s+of|\\s*,\\s*))))" .replace("{BaseDateTime.RangeConnectorSymbolRegex}", BaseDateTime.RangeConnectorSymbolRegex); public static final String StrictRangeConnectorRegex = "(?\\b(and|through|to)\\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\\s*(h[1-2]|q[1-4])(?!(\\s+of|\\s*,\\s*))))" @@ -857,7 +857,7 @@ public class EnglishDateTime { .replace("{StrictTillRegex}", StrictTillRegex) .replace("{StrictRangeConnectorRegex}", StrictRangeConnectorRegex); - public static final String FailFastRegex = "{BaseDateTime.DeltaMinuteRegex}|\\b({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\\b(zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|century|centuries|weekends?|quarters?|half|halves|yesterday|tomorrow|tmr|today|tonight|mornings?|noonish|\\d(-|——)?ish|((the\\s+\\w*)|\\d)th|afternoons?|evenings?|nights?|noon|lunchtime|lunch|dinnertime|dinner|midnight|mid-nights?|midmornings?|mid-mornings?|midafternoonss?|mid-afternoons?|midday|mid-day|daytime|nighttime|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eod|eom|eoy|mardi gras|mardi-gras|mardigras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|patrick|cinco de mayo|all hallow|all souls|guy fawkes|st patrick|hundreds?|noughties|aughts|thousands?)\\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\\b({DateUnitRegex}|{ImplicitDayRegex})" + public static final String FailFastRegex = "{BaseDateTime.DeltaMinuteRegex}|\\b({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\\b(zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|centur(y|ies)|weekends?|quarters?|hal(f|ves)|yesterday|to(morrow|day|night)|tmr|noonish|\\d(-|——)?ish|((the\\s+\\w*)|\\d)(th|rd|nd|st)|(mid\\s*(-\\s*)?)?(night|morning|afternoon|day)s?|evenings?||noon|lunch(time)?|dinner(time)?|(day|night)time|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eo[dmy]|mardi[ -]?gras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|cinco de mayo|all (hallow|souls)|guy fawkes|(st )?patrick|hundreds?|noughties|aughts|thousands?)\\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\\b({DateUnitRegex}|{ImplicitDayRegex})" .replace("{BaseDateTime.DeltaMinuteRegex}", BaseDateTime.DeltaMinuteRegex) .replace("{BaseDateTime.BaseAmDescRegex}", BaseDateTime.BaseAmDescRegex) .replace("{BaseDateTime.BasePmDescRegex}", BaseDateTime.BasePmDescRegex) diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishTimeZone.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishTimeZone.java index ed8852dff3..ae90acd22c 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishTimeZone.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishTimeZone.java @@ -9,14 +9,14 @@ // Licensed under the MIT License. // ------------------------------------------------------------------------------ -package com.microsoft.recognizers.text.datetime.resources; - -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -import com.google.common.collect.ImmutableMap; - +package com.microsoft.recognizers.text.datetime.resources; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.ImmutableMap; + public class EnglishTimeZone { public static final String DirectUtcRegex = "\\b(utc|gmt)(\\s*[+\\-\\u00B1]?\\s*[\\d]{1,2}(\\s*:\\s*[\\d]{1,2})?)?\\b"; diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/BaseNumbers.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/BaseNumbers.java index 8d28dc6b05..58570bef78 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/BaseNumbers.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/BaseNumbers.java @@ -19,15 +19,15 @@ public class BaseNumbers { public static String IntegerRegexDefinition(String placeholder, String thousandsmark) { return "(((?", "// This code was generated by a tool.", @@ -53,20 +53,20 @@ public static void Generate(Path yamlFilePath, Path outputFilePath, String heade try { writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFilePath.toString()), StandardCharsets.UTF_8)); writer.write(headerComment); - writer.write(LineBreak); - writer.write(LineBreak); + writer.write(lineBreak); + writer.write(lineBreak); writer.write(header); - writer.write(LineBreak); + writer.write(lineBreak); BufferedWriter finalWriter = writer; for (String l : lines) { - writer.write(LineBreak); + writer.write(lineBreak); finalWriter.write(l); - writer.write(LineBreak); + writer.write(lineBreak); } writer.write(footer); - writer.write(LineBreak); + writer.write(lineBreak); } catch (Exception e) { throw e; } finally { diff --git a/Java/libraries/resource-generator/src/main/java/com/microsoft/recognizers/text/resources/ResourcesGenerator.java b/Java/libraries/resource-generator/src/main/java/com/microsoft/recognizers/text/resources/ResourcesGenerator.java index ad906b5b43..f0b58c697e 100644 --- a/Java/libraries/resource-generator/src/main/java/com/microsoft/recognizers/text/resources/ResourcesGenerator.java +++ b/Java/libraries/resource-generator/src/main/java/com/microsoft/recognizers/text/resources/ResourcesGenerator.java @@ -4,7 +4,6 @@ import java.io.*; import java.nio.file.FileSystems; -import java.nio.file.Files; import java.nio.file.Path; import java.util.stream.Collectors; @@ -12,6 +11,8 @@ public class ResourcesGenerator { private static final String ResourcesPath = "../Patterns"; + private static final String LineBreak = "\n"; + public static void main(String[] args) throws Exception { if (args.length == 0) { throw new Exception("Please specify path to pattern/resource file."); @@ -25,8 +26,8 @@ public static void main(String[] args) throws Exception { Path outputPath = FileSystems.getDefault().getPath(definition.outputPath, config.output + ".java"); System.out.println(String.format("%s => %s", inputPath.toString(), outputPath.toString())); - String header = String.join(System.lineSeparator(), config.header); - String footer = String.join(System.lineSeparator(), config.footer); + String header = String.join(LineBreak, config.header); + String footer = String.join(LineBreak, config.footer); try { CodeGenerator.Generate(inputPath, outputPath, header, footer); diff --git a/Java/libraries/resource-generator/src/main/java/com/microsoft/recognizers/text/resources/writters/ParamsRegexWriter.java b/Java/libraries/resource-generator/src/main/java/com/microsoft/recognizers/text/resources/writters/ParamsRegexWriter.java index 79feb76679..be1d839fa2 100644 --- a/Java/libraries/resource-generator/src/main/java/com/microsoft/recognizers/text/resources/writters/ParamsRegexWriter.java +++ b/Java/libraries/resource-generator/src/main/java/com/microsoft/recognizers/text/resources/writters/ParamsRegexWriter.java @@ -17,7 +17,7 @@ public ParamsRegexWriter(String name, ParamsRegex params) { @Override public String write() { String parameters = String.join(", ", Arrays.stream(this.params.params).map(p -> "String " + p).toArray(size -> new String[size])); - String replace = String.join("", Arrays.stream(this.params.params).map(p -> "\n\t\t\t.replace(\"{" + p + "}\", " + p + ")").toArray(size -> new String[size])); + String replace = String.join("", Arrays.stream(this.params.params).map(p -> "\n .replace(\"{" + p + "}\", " + p + ")").toArray(size -> new String[size])); String template = String.join( "\n ", diff --git a/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts index 78d4b41c4e..0562344057 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts @@ -25,8 +25,8 @@ export namespace EnglishDateTime { export const CenturySuffixRegex = `(^century)\\b`; export const ReferencePrefixRegex = `(that|same)\\b`; export const FutureSuffixRegex = `\\b(in\\s+the\\s+)?(future|hence)\\b`; - export const DayRegex = `(the\\s*)?(?([1-3]1(th|st)?|[12]2(nd|th)?|[12]3(rd|th)?|(10|1[4-9]|20|2[4-9]|30)(th)?|0?(1(st)?|2(nd)?|3(rd)?|[4-9](th)?)))(?=\\b|t)`; - export const ImplicitDayRegex = `(the\\s*)?(?[1-3]1(th|st)|[12]2(nd|th)|[12]3(rd|th)|(10|1[4-9]|20|2[4-9]|30)(th)|0?(1st|2nd|3rd|[4-9]th))\\b`; + export const DayRegex = `(the\\s*)?(?(3[0-1]|[1-2]\\d|0?[1-9])(th|nd|rd|st)?)(?=\\b|t)`; + export const ImplicitDayRegex = `(the\\s*)?(?(3[0-1]|[0-2]?\\d)(th|nd|rd|st))\\b`; export const MonthNumRegex = `(?1[0-2]|(0)?[1-9])\\b`; export const WrittenOneToNineRegex = `(one|two|three|four|five|six|seven|eight|nine)`; export const WrittenElevenToNineteenRegex = `(eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen)`; @@ -97,7 +97,7 @@ export namespace EnglishDateTime { export const SpecialDayRegex = `\\b((the\\s+)?day before yesterday|(the\\s+)?day after (tomorrow|tmr)|the\\s+day\\s+(before|after)(?!=\\s+day)|((the\\s+)?(${RelativeRegex}|my)\\s+day)|yesterday|tomorrow|tmr|today)\\b`; export const SpecialDayWithNumRegex = `\\b((?${WrittenNumRegex})\\s+days?\\s+from\\s+(?yesterday|tomorrow|tmr|today))\\b`; export const RelativeDayRegex = `\\b(((the\\s+)?${RelativeRegex}\\s+day))\\b`; - export const SetWeekDayRegex = `\\b(?on\\s+)?(?morning|afternoon|evening|night|sunday|monday|tuesday|wednesday|thursday|friday|saturday)s\\b`; + export const SetWeekDayRegex = `\\b(?on\\s+)?(?morning|afternoon|evening|night|(sun|mon|tues|wednes|thurs|fri|satur)day)s\\b`; export const WeekDayOfMonthRegex = `(?(the\\s+)?(?first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|last)\\s+${WeekDayRegex}\\s+${MonthSuffixRegex})`; export const RelativeWeekDayRegex = `\\b(${WrittenNumRegex}\\s+${WeekDayRegex}\\s+(from\\s+now|later))\\b`; export const SpecialDate = `(?=\\b(on|at)\\s+the\\s+)${DayRegex}\\b`; @@ -117,15 +117,15 @@ export namespace EnglishDateTime { export const OfMonth = `^\\s*of\\s*${MonthRegex}`; export const MonthEnd = `${MonthRegex}\\s*(the)?\\s*$`; export const WeekDayEnd = `(this\\s+)?${WeekDayRegex}\\s*,?\\s*$`; - export const RangeUnitRegex = `\\b(?years|year|months|month|weeks|week)\\b`; + export const RangeUnitRegex = `\\b(?years?|months?|weeks?)\\b`; export const HourNumRegex = `\\b(?zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\\b`; export const MinuteNumRegex = `(?ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|one|two|three|four|five|six|seven|eight|nine)`; export const DeltaMinuteNumRegex = `(?ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|one|two|three|four|five|six|seven|eight|nine)`; export const PmRegex = `(?(((at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\\s+(the\\s+)?night))`; - export const PmRegexFull = `(?((at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|midnight|night|lunchtime))`; + export const PmRegexFull = `(?((at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|(mid)?night|lunchtime))`; export const AmRegex = `(?((at|in|around|on|for)\\s+(the\\s+)?)?(morning))`; - export const LunchRegex = `\\b(lunchtime)\\b`; - export const NightRegex = `\\b(midnight|night)\\b`; + export const LunchRegex = `\\blunchtime\\b`; + export const NightRegex = `\\b(mid)?night\\b`; export const CommonDatePrefixRegex = `^[\\.]`; export const LessThanOneHour = `(?(a\\s+)?quarter|three quarter(s)?|half( an hour)?|${BaseDateTime.DeltaMinuteRegex}(\\s+(minute|minutes|min|mins))|${DeltaMinuteNumRegex}(\\s+(minute|minutes|min|mins)))`; export const WrittenTimeRegex = `(?${HourNumRegex}\\s+(${MinuteNumRegex}|(?twenty|thirty|forty|fourty|fifty)\\s+${MinuteNumRegex}))`; @@ -133,18 +133,18 @@ export namespace EnglishDateTime { export const TimeSuffix = `(?${AmRegex}|${PmRegex}|${OclockRegex})`; export const TimeSuffixFull = `(?${AmRegex}|${PmRegexFull}|${OclockRegex})`; export const BasicTime = `\\b(?${WrittenTimeRegex}|${HourNumRegex}|${BaseDateTime.HourRegex}:${BaseDateTime.MinuteRegex}(:${BaseDateTime.SecondRegex})?|${BaseDateTime.HourRegex}(?![%\\d]))`; - export const MidnightRegex = `(?midnight|mid-night|mid night)`; - export const MidmorningRegex = `(?midmorning|mid-morning|mid morning)`; - export const MidafternoonRegex = `(?midafternoon|mid-afternoon|mid afternoon)`; - export const MiddayRegex = `(?midday|mid-day|mid day|((12\\s)?noon))`; + export const MidnightRegex = `(?mid\\s*(-\\s*)?night)`; + export const MidmorningRegex = `(?mid\\s*(-\\s*)?morning)`; + export const MidafternoonRegex = `(?mid\\s*(-\\s*)?afternoon)`; + export const MiddayRegex = `(?mid\\s*(-\\s*)?day|((12\\s)?noon))`; export const MidTimeRegex = `(?(${MidnightRegex}|${MidmorningRegex}|${MidafternoonRegex}|${MiddayRegex}))`; export const AtRegex = `\\b(((?<=\\bat\\s+)(${WrittenTimeRegex}|${HourNumRegex}|${BaseDateTime.HourRegex}(?!\\.\\d)(\\s*((?a)|(?p)))?|${MidTimeRegex}))|${MidTimeRegex})\\b`; - export const IshRegex = `\\b(${BaseDateTime.HourRegex}(-|——)?ish|noonish|noon)\\b`; + export const IshRegex = `\\b(${BaseDateTime.HourRegex}(-|——)?ish|noon(ish)?)\\b`; export const TimeUnitRegex = `([^A-Za-z]{1,}|\\b)(?hours?|hrs?|h|minutes?|mins?|seconds?|secs?)\\b`; export const RestrictedTimeUnitRegex = `(?hour|minute)\\b`; - export const FivesRegex = `(?(fifteen|twenty(\\s*five)?|thirty(\\s*five)?|forty(\\s*five)?|fourty(\\s*five)?|fifty(\\s*five)?|ten|five))\\b`; + export const FivesRegex = `(?(fifteen|(twen|thir|for|four|fif)ty(\\s*five)?|ten|five))\\b`; export const HourRegex = `\\b${BaseDateTime.HourRegex}`; - export const PeriodHourNumRegex = `\\b(?twenty (one|two|three|four)|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|zero|one|two|three|four|five|six|seven|eight|nine|ten)\\b`; + export const PeriodHourNumRegex = `\\b(?twenty( (one|two|three|four))?|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|zero|one|two|three|four|five|six|seven|eight|nine|ten)\\b`; export const ConnectNumRegex = `\\b${BaseDateTime.HourRegex}(?[0-5][0-9])\\s*${DescRegex}`; export const TimeRegexWithDotConnector = `(${BaseDateTime.HourRegex}(\\s*\\.\\s*)${BaseDateTime.MinuteRegex})`; export const TimeRegex1 = `\\b(${TimePrefix}\\s+)?(${WrittenTimeRegex}|${HourNumRegex}|${BaseDateTime.HourRegex})(\\s*|[.])${DescRegex}`; @@ -188,7 +188,7 @@ export namespace EnglishDateTime { export const MoreThanRegex = `\\b(more\\s+than)\\b`; export const DurationUnitRegex = `(?${DateUnitRegex}|hours?|hrs?|h|minutes?|mins?|seconds?|secs?)\\b`; export const SuffixAndRegex = `(?\\s*(and)\\s+((an|a)\\s+)?(?half|quarter))`; - export const PeriodicRegex = `\\b(?daily|monthly|weekly|biweekly|yearly|annually|annual)\\b`; + export const PeriodicRegex = `\\b(?daily|monthly|weekly|biweekly|yearly|annual(ly)?)\\b`; export const EachUnitRegex = `(?(each|every)(?\\s+other)?\\s*${DurationUnitRegex})`; export const EachPrefixRegex = `\\b(?(each|(every))\\s*$)`; export const SetEachRegex = `\\b(?(each|(every))\\s*)`; @@ -219,8 +219,8 @@ export namespace EnglishDateTime { export const MorningStartEndRegex = `(^(morning|${AmDescRegex}))|((morning|${AmDescRegex})$)`; export const AfternoonStartEndRegex = `(^(afternoon|${PmDescRegex}))|((afternoon|${PmDescRegex})$)`; export const EveningStartEndRegex = `(^(evening))|((evening)$)`; - export const NightStartEndRegex = `(^(overnight|tonight|night))|((overnight|tonight|night)$)`; - export const InexactNumberRegex = `\\b(a few|few|some|several|(?(a\\s+)?couple(\\s+of)?))\\b`; + export const NightStartEndRegex = `(^(over|to)?night)|((over|to)?night$)`; + export const InexactNumberRegex = `\\b((a )?few|some|several|(?(a\\s+)?couple(\\s+of)?))\\b`; export const InexactNumberUnitRegex = `(${InexactNumberRegex})\\s+(${DurationUnitRegex})`; export const RelativeTimeUnitRegex = `(((${NextPrefixRegex}|${PreviousPrefixRegex}|${ThisPrefixRegex})\\s+(${TimeUnitRegex}))|((the|my))\\s+(${RestrictedTimeUnitRegex}))`; export const RelativeDurationUnitRegex = `(((?<=(${NextPrefixRegex}|${PreviousPrefixRegex}|${ThisPrefixRegex})\\s+)(${DurationUnitRegex}))|((the|my))\\s+(${RestrictedTimeUnitRegex}))`; @@ -229,7 +229,7 @@ export namespace EnglishDateTime { export const FromToRegex = `\\b(from).+(to)\\b.+`; export const SingleAmbiguousMonthRegex = `^(the\\s+)?(may|march)$`; export const SingleAmbiguousTermsRegex = `^(the\\s+)?(day|week|month|year)$`; - export const UnspecificDatePeriodRegex = `^(week|weekend|month|year)$`; + export const UnspecificDatePeriodRegex = `^(week(end)?|month|year)$`; export const PrepositionSuffixRegex = `\\b(on|in|at|around|from|to)$`; export const FlexibleDayRegex = `(?([A-Za-z]+\\s)?[A-Za-z\\d]+)`; export const ForTheRegex = `\\b((((?<=for\\s+)the\\s+${FlexibleDayRegex})|((?<=on\\s+)(the\\s+)?${FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\\s*(,|\\.|!|\\?|$)))`; @@ -251,14 +251,14 @@ export namespace EnglishDateTime { export const NumberAsTimeRegex = `\\b(${WrittenTimeRegex}|${PeriodHourNumRegex}|${BaseDateTime.HourRegex})\\b`; export const TimeBeforeAfterRegex = `\\b(((?<=\\b(before|no later than|by|after)\\s+)(${WrittenTimeRegex}|${HourNumRegex}|${BaseDateTime.HourRegex}|${MidTimeRegex}))|${MidTimeRegex})\\b`; export const DateNumberConnectorRegex = `^\\s*(?\\s+at)\\s*$`; - export const DecadeRegex = `(?noughties|twenties|thirties|forties|fifties|sixties|seventies|eighties|nineties|two thousands)`; + export const DecadeRegex = `(?(nough|twen|thir|for|four|fif|six|seven|eight|nine)ties|two thousands)`; export const DecadeWithCenturyRegex = `(the\\s+)?(((?\\d|1\\d|2\\d)?(')?(?\\d0)(')?(\\s)?s\\b)|((${CenturyRegex}(\\s+|-)(and\\s+)?)?${DecadeRegex})|(${CenturyRegex}(\\s+|-)(and\\s+)?(?tens|hundreds)))`; export const RelativeDecadeRegex = `\\b((the\\s+)?${RelativeRegex}\\s+((?[\\w,]+)\\s+)?decades?)\\b`; export const YearPeriodRegex = `((((from|during|in)\\s+)?${YearRegex}\\s*(${TillRegex})\\s*${YearRegex})|(((between)\\s+)${YearRegex}\\s*(${RangeConnectorRegex})\\s*${YearRegex}))`; - export const StrictTillRegex = `(?\\b(to|till|til|until|thru|through)\\b|${BaseDateTime.RangeConnectorSymbolRegex}(?!\\s*(h[1-2]|q[1-4])(?!(\\s+of|\\s*,\\s*))))`; + export const StrictTillRegex = `(?\\b(to|(un)?till?|thru|through)\\b|${BaseDateTime.RangeConnectorSymbolRegex}(?!\\s*(h[1-2]|q[1-4])(?!(\\s+of|\\s*,\\s*))))`; export const StrictRangeConnectorRegex = `(?\\b(and|through|to)\\b|${BaseDateTime.RangeConnectorSymbolRegex}(?!\\s*(h[1-2]|q[1-4])(?!(\\s+of|\\s*,\\s*))))`; export const ComplexDatePeriodRegex = `(((from|during|in)\\s+)?(?.+)\\s*(${StrictTillRegex})\\s*(?.+)|((between)\\s+)(?.+)\\s*(${StrictRangeConnectorRegex})\\s*(?.+))`; - export const FailFastRegex = `${BaseDateTime.DeltaMinuteRegex}|\\b(${BaseDateTime.BaseAmDescRegex}|${BaseDateTime.BasePmDescRegex})|${BaseDateTime.BaseAmPmDescRegex}|\\b(zero|${WrittenOneToNineRegex}|${WrittenElevenToNineteenRegex}|${WrittenTensRegex}|${WrittenMonthRegex}|${SeasonDescRegex}|${DecadeRegex}|century|centuries|weekends?|quarters?|half|halves|yesterday|tomorrow|tmr|today|tonight|mornings?|noonish|\\d(-|——)?ish|((the\\s+\\w*)|\\d)th|afternoons?|evenings?|nights?|noon|lunchtime|lunch|dinnertime|dinner|midnight|mid-nights?|midmornings?|mid-mornings?|midafternoonss?|mid-afternoons?|midday|mid-day|daytime|nighttime|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eod|eom|eoy|mardi gras|mardi-gras|mardigras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|patrick|cinco de mayo|all hallow|all souls|guy fawkes|st patrick|hundreds?|noughties|aughts|thousands?)\\b|${WeekDayRegex}|${SetWeekDayRegex}|${NowRegex}|${PeriodicRegex}|\\b(${DateUnitRegex}|${ImplicitDayRegex})`; + export const FailFastRegex = `${BaseDateTime.DeltaMinuteRegex}|\\b(${BaseDateTime.BaseAmDescRegex}|${BaseDateTime.BasePmDescRegex})|${BaseDateTime.BaseAmPmDescRegex}|\\b(zero|${WrittenOneToNineRegex}|${WrittenElevenToNineteenRegex}|${WrittenTensRegex}|${WrittenMonthRegex}|${SeasonDescRegex}|${DecadeRegex}|centur(y|ies)|weekends?|quarters?|hal(f|ves)|yesterday|to(morrow|day|night)|tmr|noonish|\\d(-|——)?ish|((the\\s+\\w*)|\\d)(th|rd|nd|st)|(mid\\s*(-\\s*)?)?(night|morning|afternoon|day)s?|evenings?||noon|lunch(time)?|dinner(time)?|(day|night)time|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eo[dmy]|mardi[ -]?gras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|cinco de mayo|all (hallow|souls)|guy fawkes|(st )?patrick|hundreds?|noughties|aughts|thousands?)\\b|${WeekDayRegex}|${SetWeekDayRegex}|${NowRegex}|${PeriodicRegex}|\\b(${DateUnitRegex}|${ImplicitDayRegex})`; export const UnitMap: ReadonlyMap = new Map([["decades", "10Y"],["decade", "10Y"],["years", "Y"],["year", "Y"],["months", "MON"],["month", "MON"],["fortnights", "2W"],["fortnight", "2W"],["weeks", "W"],["week", "W"],["days", "D"],["day", "D"],["hours", "H"],["hour", "H"],["hrs", "H"],["hr", "H"],["h", "H"],["minutes", "M"],["minute", "M"],["mins", "M"],["min", "M"],["seconds", "S"],["second", "S"],["secs", "S"],["sec", "S"]]); export const UnitValueMap: ReadonlyMap = new Map([["decades", 315360000],["decade", 315360000],["years", 31536000],["year", 31536000],["months", 2592000],["month", 2592000],["fortnights", 1209600],["fortnight", 1209600],["weeks", 604800],["week", 604800],["days", 86400],["day", 86400],["hours", 3600],["hour", 3600],["hrs", 3600],["hr", 3600],["h", 3600],["minutes", 60],["minute", 60],["mins", 60],["min", 60],["seconds", 1],["second", 1],["secs", 1],["sec", 1]]); export const SeasonMap: ReadonlyMap = new Map([["spring", "SP"],["summer", "SU"],["fall", "FA"],["autumn", "FA"],["winter", "WI"]]); diff --git a/Patterns/English/English-DateTime.yaml b/Patterns/English/English-DateTime.yaml index 749aa95782..a0131a18b8 100644 --- a/Patterns/English/English-DateTime.yaml +++ b/Patterns/English/English-DateTime.yaml @@ -36,9 +36,9 @@ ReferencePrefixRegex: !simpleRegex FutureSuffixRegex: !simpleRegex def: \b(in\s+the\s+)?(future|hence)\b DayRegex: !simpleRegex - def: (the\s*)?(?([1-3]1(th|st)?|[12]2(nd|th)?|[12]3(rd|th)?|(10|1[4-9]|20|2[4-9]|30)(th)?|0?(1(st)?|2(nd)?|3(rd)?|[4-9](th)?)))(?=\b|t) + def: (the\s*)?(?(3[0-1]|[1-2]\d|0?[1-9])(th|nd|rd|st)?)(?=\b|t) ImplicitDayRegex: !simpleRegex - def: (the\s*)?(?[1-3]1(th|st)|[12]2(nd|th)|[12]3(rd|th)|(10|1[4-9]|20|2[4-9]|30)(th)|0?(1st|2nd|3rd|[4-9]th))\b + def: (the\s*)?(?(3[0-1]|[0-2]?\d)(th|nd|rd|st))\b MonthNumRegex: !simpleRegex def: (?1[0-2]|(0)?[1-9])\b WrittenOneToNineRegex: !simpleRegex @@ -222,7 +222,7 @@ RelativeDayRegex: !nestedRegex def: \b(((the\s+)?{RelativeRegex}\s+day))\b references: [ RelativeRegex ] SetWeekDayRegex: !simpleRegex - def: \b(?on\s+)?(?morning|afternoon|evening|night|sunday|monday|tuesday|wednesday|thursday|friday|saturday)s\b + def: \b(?on\s+)?(?morning|afternoon|evening|night|(sun|mon|tues|wednes|thurs|fri|satur)day)s\b WeekDayOfMonthRegex: !nestedRegex def: (?(the\s+)?(?first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|last)\s+{WeekDayRegex}\s+{MonthSuffixRegex}) references: [ WeekDayRegex, MonthSuffixRegex ] @@ -284,7 +284,7 @@ WeekDayEnd: !nestedRegex def: '(this\s+)?{WeekDayRegex}\s*,?\s*$' references: [ WeekDayRegex ] RangeUnitRegex: !simpleRegex - def: \b(?years|year|months|month|weeks|week)\b + def: \b(?years?|months?|weeks?)\b HourNumRegex: !simpleRegex def: \b(?zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\b MinuteNumRegex: !simpleRegex @@ -294,13 +294,13 @@ DeltaMinuteNumRegex: !simpleRegex PmRegex: !simpleRegex def: (?(((at|in|around|on|for)\s+(the\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\s+(the\s+)?night)) PmRegexFull: !simpleRegex - def: (?((at|in|around|on|for)\s+(the\s+)?)?(afternoon|evening|midnight|night|lunchtime)) + def: (?((at|in|around|on|for)\s+(the\s+)?)?(afternoon|evening|(mid)?night|lunchtime)) AmRegex: !simpleRegex def: (?((at|in|around|on|for)\s+(the\s+)?)?(morning)) LunchRegex: !simpleRegex - def: \b(lunchtime)\b + def: \blunchtime\b NightRegex: !simpleRegex - def: \b(midnight|night)\b + def: \b(mid)?night\b CommonDatePrefixRegex: !simpleRegex def: ^[\.] LessThanOneHour: !nestedRegex @@ -322,13 +322,13 @@ BasicTime: !nestedRegex def: \b(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex}(?![%\d])) references: [ WrittenTimeRegex, HourNumRegex, BaseDateTime.HourRegex, BaseDateTime.MinuteRegex, BaseDateTime.SecondRegex ] MidnightRegex: !simpleRegex - def: (?midnight|mid-night|mid night) + def: (?mid\s*(-\s*)?night) MidmorningRegex: !simpleRegex - def: (?midmorning|mid-morning|mid morning) + def: (?mid\s*(-\s*)?morning) MidafternoonRegex: !simpleRegex - def: (?midafternoon|mid-afternoon|mid afternoon) + def: (?mid\s*(-\s*)?afternoon) MiddayRegex: !simpleRegex - def: (?midday|mid-day|mid day|((12\s)?noon)) + def: (?mid\s*(-\s*)?day|((12\s)?noon)) MidTimeRegex: !nestedRegex def: (?({MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex})) references: [ MidnightRegex, MidmorningRegex, MidafternoonRegex, MiddayRegex ] @@ -336,19 +336,19 @@ AtRegex: !nestedRegex def: \b(((?<=\bat\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d)(\s*((?a)|(?p)))?|{MidTimeRegex}))|{MidTimeRegex})\b references: [ WrittenTimeRegex, HourNumRegex, BaseDateTime.HourRegex, MidTimeRegex ] IshRegex: !nestedRegex - def: '\b({BaseDateTime.HourRegex}(-|——)?ish|noonish|noon)\b' + def: '\b({BaseDateTime.HourRegex}(-|——)?ish|noon(ish)?)\b' references: [ BaseDateTime.HourRegex ] TimeUnitRegex: !simpleRegex def: ([^A-Za-z]{1,}|\b)(?hours?|hrs?|h|minutes?|mins?|seconds?|secs?)\b RestrictedTimeUnitRegex: !simpleRegex def: (?hour|minute)\b FivesRegex: !simpleRegex - def: (?(fifteen|twenty(\s*five)?|thirty(\s*five)?|forty(\s*five)?|fourty(\s*five)?|fifty(\s*five)?|ten|five))\b + def: (?(fifteen|(twen|thir|for|four|fif)ty(\s*five)?|ten|five))\b HourRegex: !nestedRegex def: \b{BaseDateTime.HourRegex} references: [ BaseDateTime.HourRegex ] PeriodHourNumRegex: !simpleRegex - def: \b(?twenty (one|two|three|four)|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|zero|one|two|three|four|five|six|seven|eight|nine|ten)\b + def: \b(?twenty( (one|two|three|four))?|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|zero|one|two|three|four|five|six|seven|eight|nine|ten)\b ConnectNumRegex: !nestedRegex def: '\b{BaseDateTime.HourRegex}(?[0-5][0-9])\s*{DescRegex}' references: [ BaseDateTime.HourRegex, DescRegex ] @@ -464,7 +464,7 @@ DurationUnitRegex: !nestedRegex SuffixAndRegex: !simpleRegex def: (?\s*(and)\s+((an|a)\s+)?(?half|quarter)) PeriodicRegex: !simpleRegex - def: \b(?daily|monthly|weekly|biweekly|yearly|annually|annual)\b + def: \b(?daily|monthly|weekly|biweekly|yearly|annual(ly)?)\b EachUnitRegex: !nestedRegex def: (?(each|every)(?\s+other)?\s*{DurationUnitRegex}) references: [ DurationUnitRegex ] @@ -545,9 +545,9 @@ AfternoonStartEndRegex: !nestedRegex EveningStartEndRegex: !simpleRegex def: (^(evening))|((evening)$) NightStartEndRegex: !simpleRegex - def: (^(overnight|tonight|night))|((overnight|tonight|night)$) + def: (^(over|to)?night)|((over|to)?night$) InexactNumberRegex: !simpleRegex - def: \b(a few|few|some|several|(?(a\s+)?couple(\s+of)?))\b + def: \b((a )?few|some|several|(?(a\s+)?couple(\s+of)?))\b InexactNumberUnitRegex: !nestedRegex def: ({InexactNumberRegex})\s+({DurationUnitRegex}) references: [InexactNumberRegex, DurationUnitRegex] @@ -570,7 +570,7 @@ SingleAmbiguousMonthRegex: !simpleRegex SingleAmbiguousTermsRegex: !simpleRegex def: ^(the\s+)?(day|week|month|year)$ UnspecificDatePeriodRegex: !simpleRegex - def: ^(week|weekend|month|year)$ + def: ^(week(end)?|month|year)$ PrepositionSuffixRegex: !simpleRegex def: \b(on|in|at|around|from|to)$ FlexibleDayRegex: !simpleRegex @@ -623,7 +623,7 @@ TimeBeforeAfterRegex: !nestedRegex DateNumberConnectorRegex: !simpleRegex def: ^\s*(?\s+at)\s*$ DecadeRegex: !simpleRegex - def: (?noughties|twenties|thirties|forties|fifties|sixties|seventies|eighties|nineties|two thousands) + def: (?(nough|twen|thir|for|four|fif|six|seven|eight|nine)ties|two thousands) DecadeWithCenturyRegex: !nestedRegex def: (the\s+)?(((?\d|1\d|2\d)?(')?(?\d0)(')?(\s)?s\b)|(({CenturyRegex}(\s+|-)(and\s+)?)?{DecadeRegex})|({CenturyRegex}(\s+|-)(and\s+)?(?tens|hundreds))) references: [ CenturyRegex, DecadeRegex ] @@ -634,7 +634,7 @@ YearPeriodRegex: !nestedRegex def: ((((from|during|in)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((between)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex})) references: [ YearRegex, TillRegex, RangeConnectorRegex ] StrictTillRegex: !nestedRegex - def: (?\b(to|till|til|until|thru|through)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*)))) + def: (?\b(to|(un)?till?|thru|through)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*)))) references: [ BaseDateTime.RangeConnectorSymbolRegex ] StrictRangeConnectorRegex : !nestedRegex def: (?\b(and|through|to)\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\s*(h[1-2]|q[1-4])(?!(\s+of|\s*,\s*)))) @@ -644,7 +644,7 @@ ComplexDatePeriodRegex: !nestedRegex references: [ YearRegex, StrictTillRegex, StrictRangeConnectorRegex ] # Do not localize FailFastRegex to other cultures at this momment. Experimental feature. To be improved. FailFastRegex: !nestedRegex - def: '{BaseDateTime.DeltaMinuteRegex}|\b({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\b(zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|century|centuries|weekends?|quarters?|half|halves|yesterday|tomorrow|tmr|today|tonight|mornings?|noonish|\d(-|——)?ish|((the\s+\w*)|\d)th|afternoons?|evenings?|nights?|noon|lunchtime|lunch|dinnertime|dinner|midnight|mid-nights?|midmornings?|mid-mornings?|midafternoonss?|mid-afternoons?|midday|mid-day|daytime|nighttime|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eod|eom|eoy|mardi gras|mardi-gras|mardigras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|patrick|cinco de mayo|all hallow|all souls|guy fawkes|st patrick|hundreds?|noughties|aughts|thousands?)\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\b({DateUnitRegex}|{ImplicitDayRegex})' + def: '{BaseDateTime.DeltaMinuteRegex}|\b({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\b(zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|centur(y|ies)|weekends?|quarters?|hal(f|ves)|yesterday|to(morrow|day|night)|tmr|noonish|\d(-|——)?ish|((the\s+\w*)|\d)(th|rd|nd|st)|(mid\s*(-\s*)?)?(night|morning|afternoon|day)s?|evenings?||noon|lunch(time)?|dinner(time)?|(day|night)time|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eo[dmy]|mardi[ -]?gras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|cinco de mayo|all (hallow|souls)|guy fawkes|(st )?patrick|hundreds?|noughties|aughts|thousands?)\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\b({DateUnitRegex}|{ImplicitDayRegex})' references: [ BaseDateTime.DeltaMinuteRegex, BaseDateTime.BaseAmDescRegex, BaseDateTime.BasePmDescRegex, BaseDateTime.BaseAmPmDescRegex, ImplicitDayRegex, DateUnitRegex, WeekDayRegex, SetWeekDayRegex, NowRegex, PeriodicRegex, DecadeRegex, SeasonDescRegex, WrittenMonthRegex, WrittenTensRegex, WrittenElevenToNineteenRegex, WrittenOneToNineRegex ] UnitMap: !dictionary types: [ string, string ] diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py index 62b8dac2a6..ccbee1bf3d 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py @@ -26,8 +26,8 @@ class EnglishDateTime: CenturySuffixRegex = f'(^century)\\b' ReferencePrefixRegex = f'(that|same)\\b' FutureSuffixRegex = f'\\b(in\\s+the\\s+)?(future|hence)\\b' - DayRegex = f'(the\\s*)?(?([1-3]1(th|st)?|[12]2(nd|th)?|[12]3(rd|th)?|(10|1[4-9]|20|2[4-9]|30)(th)?|0?(1(st)?|2(nd)?|3(rd)?|[4-9](th)?)))(?=\\b|t)' - ImplicitDayRegex = f'(the\\s*)?(?[1-3]1(th|st)|[12]2(nd|th)|[12]3(rd|th)|(10|1[4-9]|20|2[4-9]|30)(th)|0?(1st|2nd|3rd|[4-9]th))\\b' + DayRegex = f'(the\\s*)?(?(3[0-1]|[1-2]\\d|0?[1-9])(th|nd|rd|st)?)(?=\\b|t)' + ImplicitDayRegex = f'(the\\s*)?(?(3[0-1]|[0-2]?\\d)(th|nd|rd|st))\\b' MonthNumRegex = f'(?1[0-2]|(0)?[1-9])\\b' WrittenOneToNineRegex = f'(one|two|three|four|five|six|seven|eight|nine)' WrittenElevenToNineteenRegex = f'(eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen)' @@ -98,7 +98,7 @@ class EnglishDateTime: SpecialDayRegex = f'\\b((the\\s+)?day before yesterday|(the\\s+)?day after (tomorrow|tmr)|the\\s+day\\s+(before|after)(?!=\\s+day)|((the\\s+)?({RelativeRegex}|my)\\s+day)|yesterday|tomorrow|tmr|today)\\b' SpecialDayWithNumRegex = f'\\b((?{WrittenNumRegex})\\s+days?\\s+from\\s+(?yesterday|tomorrow|tmr|today))\\b' RelativeDayRegex = f'\\b(((the\\s+)?{RelativeRegex}\\s+day))\\b' - SetWeekDayRegex = f'\\b(?on\\s+)?(?morning|afternoon|evening|night|sunday|monday|tuesday|wednesday|thursday|friday|saturday)s\\b' + SetWeekDayRegex = f'\\b(?on\\s+)?(?morning|afternoon|evening|night|(sun|mon|tues|wednes|thurs|fri|satur)day)s\\b' WeekDayOfMonthRegex = f'(?(the\\s+)?(?first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|last)\\s+{WeekDayRegex}\\s+{MonthSuffixRegex})' RelativeWeekDayRegex = f'\\b({WrittenNumRegex}\\s+{WeekDayRegex}\\s+(from\\s+now|later))\\b' SpecialDate = f'(?=\\b(on|at)\\s+the\\s+){DayRegex}\\b' @@ -118,15 +118,15 @@ class EnglishDateTime: OfMonth = f'^\\s*of\\s*{MonthRegex}' MonthEnd = f'{MonthRegex}\\s*(the)?\\s*$' WeekDayEnd = f'(this\\s+)?{WeekDayRegex}\\s*,?\\s*$' - RangeUnitRegex = f'\\b(?years|year|months|month|weeks|week)\\b' + RangeUnitRegex = f'\\b(?years?|months?|weeks?)\\b' HourNumRegex = f'\\b(?zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\\b' MinuteNumRegex = f'(?ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|one|two|three|four|five|six|seven|eight|nine)' DeltaMinuteNumRegex = f'(?ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|one|two|three|four|five|six|seven|eight|nine)' PmRegex = f'(?(((at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\\s+(the\\s+)?night))' - PmRegexFull = f'(?((at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|midnight|night|lunchtime))' + PmRegexFull = f'(?((at|in|around|on|for)\\s+(the\\s+)?)?(afternoon|evening|(mid)?night|lunchtime))' AmRegex = f'(?((at|in|around|on|for)\\s+(the\\s+)?)?(morning))' - LunchRegex = f'\\b(lunchtime)\\b' - NightRegex = f'\\b(midnight|night)\\b' + LunchRegex = f'\\blunchtime\\b' + NightRegex = f'\\b(mid)?night\\b' CommonDatePrefixRegex = f'^[\\.]' LessThanOneHour = f'(?(a\\s+)?quarter|three quarter(s)?|half( an hour)?|{BaseDateTime.DeltaMinuteRegex}(\\s+(minute|minutes|min|mins))|{DeltaMinuteNumRegex}(\\s+(minute|minutes|min|mins)))' WrittenTimeRegex = f'(?{HourNumRegex}\\s+({MinuteNumRegex}|(?twenty|thirty|forty|fourty|fifty)\\s+{MinuteNumRegex}))' @@ -134,18 +134,18 @@ class EnglishDateTime: TimeSuffix = f'(?{AmRegex}|{PmRegex}|{OclockRegex})' TimeSuffixFull = f'(?{AmRegex}|{PmRegexFull}|{OclockRegex})' BasicTime = f'\\b(?{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}:{BaseDateTime.MinuteRegex}(:{BaseDateTime.SecondRegex})?|{BaseDateTime.HourRegex}(?![%\\d]))' - MidnightRegex = f'(?midnight|mid-night|mid night)' - MidmorningRegex = f'(?midmorning|mid-morning|mid morning)' - MidafternoonRegex = f'(?midafternoon|mid-afternoon|mid afternoon)' - MiddayRegex = f'(?midday|mid-day|mid day|((12\\s)?noon))' + MidnightRegex = f'(?mid\\s*(-\\s*)?night)' + MidmorningRegex = f'(?mid\\s*(-\\s*)?morning)' + MidafternoonRegex = f'(?mid\\s*(-\\s*)?afternoon)' + MiddayRegex = f'(?mid\\s*(-\\s*)?day|((12\\s)?noon))' MidTimeRegex = f'(?({MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex}))' AtRegex = f'\\b(((?<=\\bat\\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\\.\\d)(\\s*((?a)|(?p)))?|{MidTimeRegex}))|{MidTimeRegex})\\b' - IshRegex = f'\\b({BaseDateTime.HourRegex}(-|——)?ish|noonish|noon)\\b' + IshRegex = f'\\b({BaseDateTime.HourRegex}(-|——)?ish|noon(ish)?)\\b' TimeUnitRegex = f'([^A-Za-z]{{1,}}|\\b)(?hours?|hrs?|h|minutes?|mins?|seconds?|secs?)\\b' RestrictedTimeUnitRegex = f'(?hour|minute)\\b' - FivesRegex = f'(?(fifteen|twenty(\\s*five)?|thirty(\\s*five)?|forty(\\s*five)?|fourty(\\s*five)?|fifty(\\s*five)?|ten|five))\\b' + FivesRegex = f'(?(fifteen|(twen|thir|for|four|fif)ty(\\s*five)?|ten|five))\\b' HourRegex = f'\\b{BaseDateTime.HourRegex}' - PeriodHourNumRegex = f'\\b(?twenty (one|two|three|four)|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|zero|one|two|three|four|five|six|seven|eight|nine|ten)\\b' + PeriodHourNumRegex = f'\\b(?twenty( (one|two|three|four))?|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|zero|one|two|three|four|five|six|seven|eight|nine|ten)\\b' ConnectNumRegex = f'\\b{BaseDateTime.HourRegex}(?[0-5][0-9])\\s*{DescRegex}' TimeRegexWithDotConnector = f'({BaseDateTime.HourRegex}(\\s*\\.\\s*){BaseDateTime.MinuteRegex})' TimeRegex1 = f'\\b({TimePrefix}\\s+)?({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\\s*|[.]){DescRegex}' @@ -189,7 +189,7 @@ class EnglishDateTime: MoreThanRegex = f'\\b(more\\s+than)\\b' DurationUnitRegex = f'(?{DateUnitRegex}|hours?|hrs?|h|minutes?|mins?|seconds?|secs?)\\b' SuffixAndRegex = f'(?\\s*(and)\\s+((an|a)\\s+)?(?half|quarter))' - PeriodicRegex = f'\\b(?daily|monthly|weekly|biweekly|yearly|annually|annual)\\b' + PeriodicRegex = f'\\b(?daily|monthly|weekly|biweekly|yearly|annual(ly)?)\\b' EachUnitRegex = f'(?(each|every)(?\\s+other)?\\s*{DurationUnitRegex})' EachPrefixRegex = f'\\b(?(each|(every))\\s*$)' SetEachRegex = f'\\b(?(each|(every))\\s*)' @@ -220,8 +220,8 @@ class EnglishDateTime: MorningStartEndRegex = f'(^(morning|{AmDescRegex}))|((morning|{AmDescRegex})$)' AfternoonStartEndRegex = f'(^(afternoon|{PmDescRegex}))|((afternoon|{PmDescRegex})$)' EveningStartEndRegex = f'(^(evening))|((evening)$)' - NightStartEndRegex = f'(^(overnight|tonight|night))|((overnight|tonight|night)$)' - InexactNumberRegex = f'\\b(a few|few|some|several|(?(a\\s+)?couple(\\s+of)?))\\b' + NightStartEndRegex = f'(^(over|to)?night)|((over|to)?night$)' + InexactNumberRegex = f'\\b((a )?few|some|several|(?(a\\s+)?couple(\\s+of)?))\\b' InexactNumberUnitRegex = f'({InexactNumberRegex})\\s+({DurationUnitRegex})' RelativeTimeUnitRegex = f'((({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\\s+({TimeUnitRegex}))|((the|my))\\s+({RestrictedTimeUnitRegex}))' RelativeDurationUnitRegex = f'(((?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\\s+)({DurationUnitRegex}))|((the|my))\\s+({RestrictedTimeUnitRegex}))' @@ -230,7 +230,7 @@ class EnglishDateTime: FromToRegex = f'\\b(from).+(to)\\b.+' SingleAmbiguousMonthRegex = f'^(the\\s+)?(may|march)$' SingleAmbiguousTermsRegex = f'^(the\\s+)?(day|week|month|year)$' - UnspecificDatePeriodRegex = f'^(week|weekend|month|year)$' + UnspecificDatePeriodRegex = f'^(week(end)?|month|year)$' PrepositionSuffixRegex = f'\\b(on|in|at|around|from|to)$' FlexibleDayRegex = f'(?([A-Za-z]+\\s)?[A-Za-z\\d]+)' ForTheRegex = f'\\b((((?<=for\\s+)the\\s+{FlexibleDayRegex})|((?<=on\\s+)(the\\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\\s*(,|\\.|!|\\?|$)))' @@ -252,14 +252,14 @@ class EnglishDateTime: NumberAsTimeRegex = f'\\b({WrittenTimeRegex}|{PeriodHourNumRegex}|{BaseDateTime.HourRegex})\\b' TimeBeforeAfterRegex = f'\\b(((?<=\\b(before|no later than|by|after)\\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}|{MidTimeRegex}))|{MidTimeRegex})\\b' DateNumberConnectorRegex = f'^\\s*(?\\s+at)\\s*$' - DecadeRegex = f'(?noughties|twenties|thirties|forties|fifties|sixties|seventies|eighties|nineties|two thousands)' + DecadeRegex = f'(?(nough|twen|thir|for|four|fif|six|seven|eight|nine)ties|two thousands)' DecadeWithCenturyRegex = f'(the\\s+)?(((?\\d|1\\d|2\\d)?(\')?(?\\d0)(\')?(\\s)?s\\b)|(({CenturyRegex}(\\s+|-)(and\\s+)?)?{DecadeRegex})|({CenturyRegex}(\\s+|-)(and\\s+)?(?tens|hundreds)))' RelativeDecadeRegex = f'\\b((the\\s+)?{RelativeRegex}\\s+((?[\\w,]+)\\s+)?decades?)\\b' YearPeriodRegex = f'((((from|during|in)\\s+)?{YearRegex}\\s*({TillRegex})\\s*{YearRegex})|(((between)\\s+){YearRegex}\\s*({RangeConnectorRegex})\\s*{YearRegex}))' - StrictTillRegex = f'(?\\b(to|till|til|until|thru|through)\\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\\s*(h[1-2]|q[1-4])(?!(\\s+of|\\s*,\\s*))))' + StrictTillRegex = f'(?\\b(to|(un)?till?|thru|through)\\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\\s*(h[1-2]|q[1-4])(?!(\\s+of|\\s*,\\s*))))' StrictRangeConnectorRegex = f'(?\\b(and|through|to)\\b|{BaseDateTime.RangeConnectorSymbolRegex}(?!\\s*(h[1-2]|q[1-4])(?!(\\s+of|\\s*,\\s*))))' ComplexDatePeriodRegex = f'(((from|during|in)\\s+)?(?.+)\\s*({StrictTillRegex})\\s*(?.+)|((between)\\s+)(?.+)\\s*({StrictRangeConnectorRegex})\\s*(?.+))' - FailFastRegex = f'{BaseDateTime.DeltaMinuteRegex}|\\b({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\\b(zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|century|centuries|weekends?|quarters?|half|halves|yesterday|tomorrow|tmr|today|tonight|mornings?|noonish|\\d(-|——)?ish|((the\\s+\\w*)|\\d)th|afternoons?|evenings?|nights?|noon|lunchtime|lunch|dinnertime|dinner|midnight|mid-nights?|midmornings?|mid-mornings?|midafternoonss?|mid-afternoons?|midday|mid-day|daytime|nighttime|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eod|eom|eoy|mardi gras|mardi-gras|mardigras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|patrick|cinco de mayo|all hallow|all souls|guy fawkes|st patrick|hundreds?|noughties|aughts|thousands?)\\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\\b({DateUnitRegex}|{ImplicitDayRegex})' + FailFastRegex = f'{BaseDateTime.DeltaMinuteRegex}|\\b({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex})|{BaseDateTime.BaseAmPmDescRegex}|\\b(zero|{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}|{WrittenMonthRegex}|{SeasonDescRegex}|{DecadeRegex}|centur(y|ies)|weekends?|quarters?|hal(f|ves)|yesterday|to(morrow|day|night)|tmr|noonish|\\d(-|——)?ish|((the\\s+\\w*)|\\d)(th|rd|nd|st)|(mid\\s*(-\\s*)?)?(night|morning|afternoon|day)s?|evenings?||noon|lunch(time)?|dinner(time)?|(day|night)time|overnight|dawn|dusk|sunset|hours?|hrs?|h|minutes?|mins?|seconds?|secs?|eo[dmy]|mardi[ -]?gras|birthday|eve|christmas|xmas|thanksgiving|halloween|yuandan|easter|yuan dan|april fools|cinco de mayo|all (hallow|souls)|guy fawkes|(st )?patrick|hundreds?|noughties|aughts|thousands?)\\b|{WeekDayRegex}|{SetWeekDayRegex}|{NowRegex}|{PeriodicRegex}|\\b({DateUnitRegex}|{ImplicitDayRegex})' UnitMap = dict([("decades", "10Y"), ("decade", "10Y"), ("years", "Y"), diff --git a/Python/libraries/recognizers-number/recognizers_number/resources/base_numbers.py b/Python/libraries/recognizers-number/recognizers_number/resources/base_numbers.py index fdffa67288..ccc04a6fe4 100644 --- a/Python/libraries/recognizers-number/recognizers_number/resources/base_numbers.py +++ b/Python/libraries/recognizers-number/recognizers_number/resources/base_numbers.py @@ -1,22 +1,22 @@ -# ------------------------------------------------------------------------------ -# -# This code was generated by a tool. -# Changes to this file may cause incorrect behavior and will be lost if -# the code is regenerated. -# -# -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# ------------------------------------------------------------------------------ - -# pylint: disable=line-too-long -class BaseNumbers: - NumberReplaceToken = '@builtin.num' - FractionNumberReplaceToken = '@builtin.num.fraction' - IntegerRegexDefinition = lambda placeholder, thousandsmark: f'(((? +# This code was generated by a tool. +# Changes to this file may cause incorrect behavior and will be lost if +# the code is regenerated. +# +# +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# ------------------------------------------------------------------------------ + +# pylint: disable=line-too-long +class BaseNumbers: + NumberReplaceToken = '@builtin.num' + FractionNumberReplaceToken = '@builtin.num.fraction' + IntegerRegexDefinition = lambda placeholder, thousandsmark: f'(((? -# This code was generated by a tool. -# Changes to this file may cause incorrect behavior and will be lost if -# the code is regenerated. -# -# -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# ------------------------------------------------------------------------------ - -from .base_numbers import BaseNumbers -# pylint: disable=line-too-long -class ChineseNumeric: - LangMarker = 'Chs' - CompoundNumberLanguage = True - MultiDecimalSeparatorCulture = True - DecimalSeparatorChar = '.' - FractionMarkerToken = '' - NonDecimalSeparatorChar = ' ' - HalfADozenText = '' - WordSeparatorToken = '' - ZeroChar = '零' - PairChar = '对' - RoundNumberMap = dict([("k", 1000), - ("m", 1000000), - ("g", 1000000000), - ("t", 1000000000000)]) - RoundNumberMapChar = dict([("十", 10), - ("百", 100), - ("千", 1000), - ("万", 10000), - ("亿", 100000000), - ("兆", 1000000000000), - ("拾", 10), - ("佰", 100), - ("仟", 1000), - ("萬", 10000), - ("億", 100000000)]) - ZeroToNineMap = dict([("零", 0), - ("一", 1), - ("二", 2), - ("三", 3), - ("四", 4), - ("五", 5), - ("六", 6), - ("七", 7), - ("八", 8), - ("九", 9), - ("〇", 0), - ("壹", 1), - ("贰", 2), - ("貳", 2), - ("叁", 3), - ("肆", 4), - ("伍", 5), - ("陆", 6), - ("陸", 6), - ("柒", 7), - ("捌", 8), - ("玖", 9), - ("0", 0), - ("1", 1), - ("2", 2), - ("3", 3), - ("4", 4), - ("5", 5), - ("6", 6), - ("7", 7), - ("8", 8), - ("9", 9), - ("0", 0), - ("1", 1), - ("2", 2), - ("3", 3), - ("4", 4), - ("5", 5), - ("6", 6), - ("7", 7), - ("8", 8), - ("9", 9), - ("半", 0.5), - ("两", 2), - ("兩", 2), - ("俩", 2), - ("倆", 2), - ("仨", 3)]) - FullToHalfMap = dict([("0", "0"), - ("1", "1"), - ("2", "2"), - ("3", "3"), - ("4", "4"), - ("5", "5"), - ("6", "6"), - ("7", "7"), - ("8", "8"), - ("9", "9"), - ("/", "/"), - ("-", "-"), - (",", "'"), - ("G", "G"), - ("M", "M"), - ("T", "T"), - ("K", "K"), - ("k", "k"), - (".", ".")]) - TratoSimMap = dict([("佰", "百"), - ("點", "点"), - ("個", "个"), - ("幾", "几"), - ("對", "对"), - ("雙", "双")]) - UnitMap = dict([("萬萬", "億"), - ("億萬", "兆"), - ("萬億", "兆"), - ("万万", "亿"), - ("万亿", "兆"), - ("亿万", "兆"), - (" ", ""), - ("多", ""), - ("余", ""), - ("几", "")]) - RoundDirectList = [r'万', r'萬', r'亿', r'兆', r'億'] - TenChars = [r'十', r'拾'] - DigitalNumberRegex = f'((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))' - ZeroToNineFullHalfRegex = f'[\\d1234567890]' - DigitNumRegex = f'{ZeroToNineFullHalfRegex}+' - DozenRegex = f'.*打$' - PercentageRegex = f'(?<=百\\s*分\\s*之).+|.+(?=个\\s*百\\s*分\\s*点)|.*(?=[%%])' - DoubleAndRoundRegex = f'{ZeroToNineFullHalfRegex}+(\\.{ZeroToNineFullHalfRegex}+)?\\s*[多几余]?[万亿萬億]{{1,2}}' - FracSplitRegex = f'又|分\\s*之' - ZeroToNineIntegerRegex = f'[一二三四五六七八九零壹贰貳叁肆伍陆陸柒捌玖〇两兩俩倆仨]' - HalfUnitRegex = f'半' - NegativeNumberTermsRegex = f'[负負]' - NegativeNumberTermsRegexNum = f'((?)' - LessRegex = f'(小于|少于|低于|小於|少於|低於|不到|不足|<)' - EqualRegex = f'(等于|等於|=)' - MoreOrEqual = f'(({MoreRegex}\\s*(或|或者)?\\s*{EqualRegex})|至少|最少|不{LessRegex})' - MoreOrEqualSuffix = f'(或|或者)\\s*(以上|之上|更[大多高])' - LessOrEqual = f'(({LessRegex}\\s*(或|或者)?\\s*{EqualRegex})|至多|最多|不{MoreRegex})' - LessOrEqualSuffix = f'(或|或者)\\s*(以下|之下|更[小少低])' - OneNumberRangeMoreRegex1 = f'({MoreOrEqual}|{MoreRegex})\\s*(?((?!([并且而並的同時时]|([,,](?!\\d+))|。)).)+)' - OneNumberRangeMoreRegex2 = f'比\\s*(?((?!(([,,](?!\\d+))|。)).)+)\\s*更?[大多高]' - OneNumberRangeMoreRegex3 = f'(?((?!(([,,](?!\\d+))|。|[或者])).)+)\\s*(或|或者)?\\s*([多几余幾餘]|以上|之上|更[大多高])(?![万亿萬億]{{1,2}})' - OneNumberRangeLessRegex1 = f'({LessOrEqual}|{LessRegex})\\s*(?((?!([并且而並的同時时]|([,,](?!\\d+))|。)).)+)' - OneNumberRangeLessRegex2 = f'比\\s*(?((?!(([,,](?!\\d+))|。)).)+)\\s*更?[小少低]' - OneNumberRangeLessRegex3 = f'(?((?!(([,,](?!\\d+))|。|[或者])).)+)\\s*(或|或者)?\\s*(以下|之下|更[小少低])' - OneNumberRangeMoreSeparateRegex = f'^[.]' - OneNumberRangeLessSeparateRegex = f'^[.]' - OneNumberRangeEqualRegex = f'{EqualRegex}\\s*(?((?!(([,,](?!\\d+))|。)).)+)' - TwoNumberRangeRegex1 = f'((位于|在|位於)|(?=(\\d|\\+|\\-)))\\s*(?((?!(([,,](?!\\d+))|。)).)+)\\s*(和|与|與|{TillRegex})\\s*(?((?!(([,,](?!\\d+))|。))[^之])+)\\s*(之)?(间|間)' - TwoNumberRangeRegex2 = f'({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2}|{OneNumberRangeMoreRegex3})\\s*(且|并且|而且|並且|((的)?同時)|((的)?同时)|[,,])?\\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2}|{OneNumberRangeLessRegex3})' - TwoNumberRangeRegex3 = f'({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2}|{OneNumberRangeLessRegex3})\\s*(且|并且|而且|並且|((的)?同時)|((的)?同时)|[,,])?\\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2}|{OneNumberRangeMoreRegex3})' - TwoNumberRangeRegex4 = f'(?((?!(([,,](?!\\d+))|。)).)+)\\s*{TillRegex}\\s*(?((?!(([,,](?!\\d+))|。)).)+)' - AmbiguousFractionConnectorsRegex = f'^[.]' - RelativeReferenceOffsetMap = dict([("", "")]) - RelativeReferenceRelativeToMap = dict([("", "")]) -# pylint: enable=line-too-long +# ------------------------------------------------------------------------------ +# +# This code was generated by a tool. +# Changes to this file may cause incorrect behavior and will be lost if +# the code is regenerated. +# +# +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# ------------------------------------------------------------------------------ + +from .base_numbers import BaseNumbers +# pylint: disable=line-too-long +class ChineseNumeric: + LangMarker = 'Chs' + CompoundNumberLanguage = True + MultiDecimalSeparatorCulture = True + DecimalSeparatorChar = '.' + FractionMarkerToken = '' + NonDecimalSeparatorChar = ' ' + HalfADozenText = '' + WordSeparatorToken = '' + ZeroChar = '零' + PairChar = '对' + RoundNumberMap = dict([("k", 1000), + ("m", 1000000), + ("g", 1000000000), + ("t", 1000000000000)]) + RoundNumberMapChar = dict([("十", 10), + ("百", 100), + ("千", 1000), + ("万", 10000), + ("亿", 100000000), + ("兆", 1000000000000), + ("拾", 10), + ("佰", 100), + ("仟", 1000), + ("萬", 10000), + ("億", 100000000)]) + ZeroToNineMap = dict([("零", 0), + ("一", 1), + ("二", 2), + ("三", 3), + ("四", 4), + ("五", 5), + ("六", 6), + ("七", 7), + ("八", 8), + ("九", 9), + ("〇", 0), + ("壹", 1), + ("贰", 2), + ("貳", 2), + ("叁", 3), + ("肆", 4), + ("伍", 5), + ("陆", 6), + ("陸", 6), + ("柒", 7), + ("捌", 8), + ("玖", 9), + ("0", 0), + ("1", 1), + ("2", 2), + ("3", 3), + ("4", 4), + ("5", 5), + ("6", 6), + ("7", 7), + ("8", 8), + ("9", 9), + ("0", 0), + ("1", 1), + ("2", 2), + ("3", 3), + ("4", 4), + ("5", 5), + ("6", 6), + ("7", 7), + ("8", 8), + ("9", 9), + ("半", 0.5), + ("两", 2), + ("兩", 2), + ("俩", 2), + ("倆", 2), + ("仨", 3)]) + FullToHalfMap = dict([("0", "0"), + ("1", "1"), + ("2", "2"), + ("3", "3"), + ("4", "4"), + ("5", "5"), + ("6", "6"), + ("7", "7"), + ("8", "8"), + ("9", "9"), + ("/", "/"), + ("-", "-"), + (",", "'"), + ("G", "G"), + ("M", "M"), + ("T", "T"), + ("K", "K"), + ("k", "k"), + (".", ".")]) + TratoSimMap = dict([("佰", "百"), + ("點", "点"), + ("個", "个"), + ("幾", "几"), + ("對", "对"), + ("雙", "双")]) + UnitMap = dict([("萬萬", "億"), + ("億萬", "兆"), + ("萬億", "兆"), + ("万万", "亿"), + ("万亿", "兆"), + ("亿万", "兆"), + (" ", ""), + ("多", ""), + ("余", ""), + ("几", "")]) + RoundDirectList = [r'万', r'萬', r'亿', r'兆', r'億'] + TenChars = [r'十', r'拾'] + DigitalNumberRegex = f'((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))' + ZeroToNineFullHalfRegex = f'[\\d1234567890]' + DigitNumRegex = f'{ZeroToNineFullHalfRegex}+' + DozenRegex = f'.*打$' + PercentageRegex = f'(?<=百\\s*分\\s*之).+|.+(?=个\\s*百\\s*分\\s*点)|.*(?=[%%])' + DoubleAndRoundRegex = f'{ZeroToNineFullHalfRegex}+(\\.{ZeroToNineFullHalfRegex}+)?\\s*[多几余]?[万亿萬億]{{1,2}}' + FracSplitRegex = f'又|分\\s*之' + ZeroToNineIntegerRegex = f'[一二三四五六七八九零壹贰貳叁肆伍陆陸柒捌玖〇两兩俩倆仨]' + HalfUnitRegex = f'半' + NegativeNumberTermsRegex = f'[负負]' + NegativeNumberTermsRegexNum = f'((?)' + LessRegex = f'(小于|少于|低于|小於|少於|低於|不到|不足|<)' + EqualRegex = f'(等于|等於|=)' + MoreOrEqual = f'(({MoreRegex}\\s*(或|或者)?\\s*{EqualRegex})|至少|最少|不{LessRegex})' + MoreOrEqualSuffix = f'(或|或者)\\s*(以上|之上|更[大多高])' + LessOrEqual = f'(({LessRegex}\\s*(或|或者)?\\s*{EqualRegex})|至多|最多|不{MoreRegex})' + LessOrEqualSuffix = f'(或|或者)\\s*(以下|之下|更[小少低])' + OneNumberRangeMoreRegex1 = f'({MoreOrEqual}|{MoreRegex})\\s*(?((?!([并且而並的同時时]|([,,](?!\\d+))|。)).)+)' + OneNumberRangeMoreRegex2 = f'比\\s*(?((?!(([,,](?!\\d+))|。)).)+)\\s*更?[大多高]' + OneNumberRangeMoreRegex3 = f'(?((?!(([,,](?!\\d+))|。|[或者])).)+)\\s*(或|或者)?\\s*([多几余幾餘]|以上|之上|更[大多高])(?![万亿萬億]{{1,2}})' + OneNumberRangeLessRegex1 = f'({LessOrEqual}|{LessRegex})\\s*(?((?!([并且而並的同時时]|([,,](?!\\d+))|。)).)+)' + OneNumberRangeLessRegex2 = f'比\\s*(?((?!(([,,](?!\\d+))|。)).)+)\\s*更?[小少低]' + OneNumberRangeLessRegex3 = f'(?((?!(([,,](?!\\d+))|。|[或者])).)+)\\s*(或|或者)?\\s*(以下|之下|更[小少低])' + OneNumberRangeMoreSeparateRegex = f'^[.]' + OneNumberRangeLessSeparateRegex = f'^[.]' + OneNumberRangeEqualRegex = f'{EqualRegex}\\s*(?((?!(([,,](?!\\d+))|。)).)+)' + TwoNumberRangeRegex1 = f'((位于|在|位於)|(?=(\\d|\\+|\\-)))\\s*(?((?!(([,,](?!\\d+))|。)).)+)\\s*(和|与|與|{TillRegex})\\s*(?((?!(([,,](?!\\d+))|。))[^之])+)\\s*(之)?(间|間)' + TwoNumberRangeRegex2 = f'({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2}|{OneNumberRangeMoreRegex3})\\s*(且|并且|而且|並且|((的)?同時)|((的)?同时)|[,,])?\\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2}|{OneNumberRangeLessRegex3})' + TwoNumberRangeRegex3 = f'({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2}|{OneNumberRangeLessRegex3})\\s*(且|并且|而且|並且|((的)?同時)|((的)?同时)|[,,])?\\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2}|{OneNumberRangeMoreRegex3})' + TwoNumberRangeRegex4 = f'(?((?!(([,,](?!\\d+))|。)).)+)\\s*{TillRegex}\\s*(?((?!(([,,](?!\\d+))|。)).)+)' + AmbiguousFractionConnectorsRegex = f'^[.]' + RelativeReferenceOffsetMap = dict([("", "")]) + RelativeReferenceRelativeToMap = dict([("", "")]) +# pylint: enable=line-too-long diff --git a/Python/libraries/recognizers-number/recognizers_number/resources/english_numeric.py b/Python/libraries/recognizers-number/recognizers_number/resources/english_numeric.py index e8fa2b8977..19c11e3684 100644 --- a/Python/libraries/recognizers-number/recognizers_number/resources/english_numeric.py +++ b/Python/libraries/recognizers-number/recognizers_number/resources/english_numeric.py @@ -1,254 +1,254 @@ -# ------------------------------------------------------------------------------ -# -# This code was generated by a tool. -# Changes to this file may cause incorrect behavior and will be lost if -# the code is regenerated. -# -# -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# ------------------------------------------------------------------------------ - -from .base_numbers import BaseNumbers -# pylint: disable=line-too-long -class EnglishNumeric: - LangMarker = 'Eng' - CompoundNumberLanguage = True - MultiDecimalSeparatorCulture = True - RoundNumberIntegerRegex = f'(hundred|thousand|million|billion|trillion)' - ZeroToNineIntegerRegex = f'(three|seven|eight|four|five|zero|nine|one|two|six)' - TwoToNineIntegerRegex = f'(three|seven|eight|four|five|nine|two|six)' - NegativeNumberTermsRegex = f'((minus|negative)\\s+)' - NegativeNumberSignRegex = f'^{NegativeNumberTermsRegex}.*' - AnIntRegex = f'(an|a)(?=\\s)' - TenToNineteenIntegerRegex = f'(seventeen|thirteen|fourteen|eighteen|nineteen|fifteen|sixteen|eleven|twelve|ten)' - TensNumberIntegerRegex = f'(seventy|twenty|thirty|eighty|ninety|forty|fifty|sixty)' - SeparaIntRegex = f'((({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}(\\s+(and\\s+)?|\\s*-\\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex})(\\s+{RoundNumberIntegerRegex})*))|(({AnIntRegex}(\\s+{RoundNumberIntegerRegex})+))' - AllIntRegex = f'(((({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}(\\s+(and\\s+)?|\\s*-\\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{AnIntRegex})(\\s+{RoundNumberIntegerRegex})+)\\s+(and\\s+)?)*{SeparaIntRegex})' - PlaceHolderPureNumber = f'\\b' - PlaceHolderDefault = f'\\D|\\b' - NumbersWithPlaceHolder = lambda placeholder: f'(((?({AllIntRegex})|((?({AllIntRegex})|(\\d+)(?![\\.,]))(?=\\b)' - FractionPrepositionWithinPercentModeRegex = f'(?<=\\b)(?({AllIntRegex})|((?({AllIntRegex})|(\\d+)(?![\\.,]))(?=\\b)' - AllPointRegex = f'((\\s+{ZeroToNineIntegerRegex})+|(\\s+{SeparaIntRegex}))' - AllFloatRegex = f'{AllIntRegex}(\\s+point){AllPointRegex}' - DoubleWithMultiplierRegex = f'(((?and)' - NumberWithSuffixPercentage = f'(?)' - LessRegex = f'((less|lower|smaller|fewer)(\\s+than)?|below|under|(?|=)<)' - EqualRegex = f'(equal(s|ing)?(\\s+(to|than))?|(?)=)' - MoreOrEqualPrefix = f'((no\\s+{LessRegex})|(at\\s+least))' - MoreOrEqual = f'(({MoreRegex}\\s+(or)?\\s+{EqualRegex})|({EqualRegex}\\s+(or)?\\s+{MoreRegex})|{MoreOrEqualPrefix}(\\s+(or)?\\s+{EqualRegex})?|({EqualRegex}\\s+(or)?\\s+)?{MoreOrEqualPrefix}|>\\s*=)' - MoreOrEqualSuffix = f'((and|or)\\s+(more|greater|higher|larger|bigger)((?!\\s+than)|(\\s+than(?!(\\s*\\d+)))))' - LessOrEqualPrefix = f'((no\\s+{MoreRegex})|(at\\s+most))' - LessOrEqual = f'(({LessRegex}\\s+(or)?\\s+{EqualRegex})|({EqualRegex}\\s+(or)?\\s+{LessRegex})|{LessOrEqualPrefix}(\\s+(or)?\\s+{EqualRegex})?|({EqualRegex}\\s+(or)?\\s+)?{LessOrEqualPrefix}|<\\s*=)' - LessOrEqualSuffix = f'((and|or)\\s+(less|lower|smaller|fewer)((?!\\s+than)|(\\s+than(?!(\\s*\\d+)))))' - NumberSplitMark = f'(?![,.](?!\\d+))' - MoreRegexNoNumberSucceed = f'((bigger|greater|more|higher|larger)((?!\\s+than)|\\s+(than(?!(\\s*\\d+))))|(above|over)(?!(\\s*\\d+)))' - LessRegexNoNumberSucceed = f'((less|lower|smaller|fewer)((?!\\s+than)|\\s+(than(?!(\\s*\\d+))))|(below|under)(?!(\\s*\\d+)))' - EqualRegexNoNumberSucceed = f'(equal(s|ing)?((?!\\s+(to|than))|(\\s+(to|than)(?!(\\s*\\d+)))))' - OneNumberRangeMoreRegex1 = f'({MoreOrEqual}|{MoreRegex})\\s*(the\\s+)?(?({NumberSplitMark}.)+)' - OneNumberRangeMoreRegex2 = f'(?({NumberSplitMark}.)+)\\s*{MoreOrEqualSuffix}' - OneNumberRangeMoreSeparateRegex = f'({EqualRegex}\\s+(?({NumberSplitMark}.)+)(\\s+or\\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\\s+(?({NumberSplitMark}.)+)(\\s+or\\s+){EqualRegexNoNumberSucceed})' - OneNumberRangeLessRegex1 = f'({LessOrEqual}|{LessRegex})\\s*(the\\s+)?(?({NumberSplitMark}.)+)' - OneNumberRangeLessRegex2 = f'(?({NumberSplitMark}.)+)\\s*{LessOrEqualSuffix}' - OneNumberRangeLessSeparateRegex = f'({EqualRegex}\\s+(?({NumberSplitMark}.)+)(\\s+or\\s+){LessRegexNoNumberSucceed})|({LessRegex}\\s+(?({NumberSplitMark}.)+)(\\s+or\\s+){EqualRegexNoNumberSucceed})' - OneNumberRangeEqualRegex = f'{EqualRegex}\\s*(the\\s+)?(?({NumberSplitMark}.)+)' - TwoNumberRangeRegex1 = f'between\\s*(the\\s+)?(?({NumberSplitMark}.)+)\\s*and\\s*(the\\s+)?(?({NumberSplitMark}.)+)' - TwoNumberRangeRegex2 = f'({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\\s*(and|but|,)\\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})' - TwoNumberRangeRegex3 = f'({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\\s*(and|but|,)\\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})' - TwoNumberRangeRegex4 = f'(from\\s+)?(?({NumberSplitMark}(?!\\bfrom\\b).)+)\\s*{TillRegex}\\s*(the\\s+)?(?({NumberSplitMark}.)+)' - AmbiguousFractionConnectorsRegex = f'(\\bin\\b)' - DecimalSeparatorChar = '.' - FractionMarkerToken = 'over' - NonDecimalSeparatorChar = ',' - HalfADozenText = 'six' - WordSeparatorToken = 'and' - WrittenDecimalSeparatorTexts = [r'point'] - WrittenGroupSeparatorTexts = [r'punto'] - WrittenIntegerSeparatorTexts = [r'and'] - WrittenFractionSeparatorTexts = [r'and'] - HalfADozenRegex = f'half\\s+a\\s+dozen' - DigitalNumberRegex = f'((?<=\\b)(hundred|thousand|million|billion|trillion|dozen(s)?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))' - CardinalNumberMap = dict([("a", 1), - ("zero", 0), - ("an", 1), - ("one", 1), - ("two", 2), - ("three", 3), - ("four", 4), - ("five", 5), - ("six", 6), - ("seven", 7), - ("eight", 8), - ("nine", 9), - ("ten", 10), - ("eleven", 11), - ("twelve", 12), - ("dozen", 12), - ("dozens", 12), - ("thirteen", 13), - ("fourteen", 14), - ("fifteen", 15), - ("sixteen", 16), - ("seventeen", 17), - ("eighteen", 18), - ("nineteen", 19), - ("twenty", 20), - ("thirty", 30), - ("forty", 40), - ("fifty", 50), - ("sixty", 60), - ("seventy", 70), - ("eighty", 80), - ("ninety", 90), - ("hundred", 100), - ("thousand", 1000), - ("million", 1000000), - ("billion", 1000000000), - ("trillion", 1000000000000)]) - OrdinalNumberMap = dict([("first", 1), - ("second", 2), - ("secondary", 2), - ("half", 2), - ("third", 3), - ("fourth", 4), - ("quarter", 4), - ("fifth", 5), - ("sixth", 6), - ("seventh", 7), - ("eighth", 8), - ("ninth", 9), - ("tenth", 10), - ("eleventh", 11), - ("twelfth", 12), - ("thirteenth", 13), - ("fourteenth", 14), - ("fifteenth", 15), - ("sixteenth", 16), - ("seventeenth", 17), - ("eighteenth", 18), - ("nineteenth", 19), - ("twentieth", 20), - ("thirtieth", 30), - ("fortieth", 40), - ("fiftieth", 50), - ("sixtieth", 60), - ("seventieth", 70), - ("eightieth", 80), - ("ninetieth", 90), - ("hundredth", 100), - ("thousandth", 1000), - ("millionth", 1000000), - ("billionth", 1000000000), - ("trillionth", 1000000000000), - ("firsts", 1), - ("halves", 2), - ("thirds", 3), - ("fourths", 4), - ("quarters", 4), - ("fifths", 5), - ("sixths", 6), - ("sevenths", 7), - ("eighths", 8), - ("ninths", 9), - ("tenths", 10), - ("elevenths", 11), - ("twelfths", 12), - ("thirteenths", 13), - ("fourteenths", 14), - ("fifteenths", 15), - ("sixteenths", 16), - ("seventeenths", 17), - ("eighteenths", 18), - ("nineteenths", 19), - ("twentieths", 20), - ("thirtieths", 30), - ("fortieths", 40), - ("fiftieths", 50), - ("sixtieths", 60), - ("seventieths", 70), - ("eightieths", 80), - ("ninetieths", 90), - ("hundredths", 100), - ("thousandths", 1000), - ("millionths", 1000000), - ("billionths", 1000000000), - ("trillionths", 1000000000000)]) - RoundNumberMap = dict([("hundred", 100), - ("thousand", 1000), - ("million", 1000000), - ("billion", 1000000000), - ("trillion", 1000000000000), - ("hundredth", 100), - ("thousandth", 1000), - ("millionth", 1000000), - ("billionth", 1000000000), - ("trillionth", 1000000000000), - ("hundredths", 100), - ("thousandths", 1000), - ("millionths", 1000000), - ("billionths", 1000000000), - ("trillionths", 1000000000000), - ("dozen", 12), - ("dozens", 12), - ("k", 1000), - ("m", 1000000), - ("g", 1000000000), - ("b", 1000000000), - ("t", 1000000000000)]) - AmbiguityFiltersDict = dict([("\\bone\\b", "\\b(the|this|that|which)\\s+(one)\\b")]) - RelativeReferenceOffsetMap = dict([("last", "0"), - ("next one", "1"), - ("previous one", "-1"), - ("the second to last", "-1"), - ("the one before the last one", "-1"), - ("next to last", "-1"), - ("penultimate", "-1"), - ("the last but one", "-1"), - ("antepenultimate", "-2"), - ("next", "1"), - ("previous", "-1")]) - RelativeReferenceRelativeToMap = dict([("last", "end"), - ("next one", "current"), - ("previous one", "current"), - ("the second to last", "end"), - ("the one before the last one", "end"), - ("next to last", "end"), - ("penultimate", "end"), - ("the last but one", "end"), - ("antepenultimate", "end"), - ("next", "current"), - ("previous", "current")]) -# pylint: enable=line-too-long +# ------------------------------------------------------------------------------ +# +# This code was generated by a tool. +# Changes to this file may cause incorrect behavior and will be lost if +# the code is regenerated. +# +# +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# ------------------------------------------------------------------------------ + +from .base_numbers import BaseNumbers +# pylint: disable=line-too-long +class EnglishNumeric: + LangMarker = 'Eng' + CompoundNumberLanguage = True + MultiDecimalSeparatorCulture = True + RoundNumberIntegerRegex = f'(hundred|thousand|million|billion|trillion)' + ZeroToNineIntegerRegex = f'(three|seven|eight|four|five|zero|nine|one|two|six)' + TwoToNineIntegerRegex = f'(three|seven|eight|four|five|nine|two|six)' + NegativeNumberTermsRegex = f'((minus|negative)\\s+)' + NegativeNumberSignRegex = f'^{NegativeNumberTermsRegex}.*' + AnIntRegex = f'(an|a)(?=\\s)' + TenToNineteenIntegerRegex = f'(seventeen|thirteen|fourteen|eighteen|nineteen|fifteen|sixteen|eleven|twelve|ten)' + TensNumberIntegerRegex = f'(seventy|twenty|thirty|eighty|ninety|forty|fifty|sixty)' + SeparaIntRegex = f'((({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}(\\s+(and\\s+)?|\\s*-\\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex})(\\s+{RoundNumberIntegerRegex})*))|(({AnIntRegex}(\\s+{RoundNumberIntegerRegex})+))' + AllIntRegex = f'(((({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}(\\s+(and\\s+)?|\\s*-\\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{AnIntRegex})(\\s+{RoundNumberIntegerRegex})+)\\s+(and\\s+)?)*{SeparaIntRegex})' + PlaceHolderPureNumber = f'\\b' + PlaceHolderDefault = f'\\D|\\b' + NumbersWithPlaceHolder = lambda placeholder: f'(((?({AllIntRegex})|((?({AllIntRegex})|(\\d+)(?![\\.,]))(?=\\b)' + FractionPrepositionWithinPercentModeRegex = f'(?<=\\b)(?({AllIntRegex})|((?({AllIntRegex})|(\\d+)(?![\\.,]))(?=\\b)' + AllPointRegex = f'((\\s+{ZeroToNineIntegerRegex})+|(\\s+{SeparaIntRegex}))' + AllFloatRegex = f'{AllIntRegex}(\\s+point){AllPointRegex}' + DoubleWithMultiplierRegex = f'(((?and)' + NumberWithSuffixPercentage = f'(?)' + LessRegex = f'((less|lower|smaller|fewer)(\\s+than)?|below|under|(?|=)<)' + EqualRegex = f'(equal(s|ing)?(\\s+(to|than))?|(?)=)' + MoreOrEqualPrefix = f'((no\\s+{LessRegex})|(at\\s+least))' + MoreOrEqual = f'(({MoreRegex}\\s+(or)?\\s+{EqualRegex})|({EqualRegex}\\s+(or)?\\s+{MoreRegex})|{MoreOrEqualPrefix}(\\s+(or)?\\s+{EqualRegex})?|({EqualRegex}\\s+(or)?\\s+)?{MoreOrEqualPrefix}|>\\s*=)' + MoreOrEqualSuffix = f'((and|or)\\s+(more|greater|higher|larger|bigger)((?!\\s+than)|(\\s+than(?!(\\s*\\d+)))))' + LessOrEqualPrefix = f'((no\\s+{MoreRegex})|(at\\s+most))' + LessOrEqual = f'(({LessRegex}\\s+(or)?\\s+{EqualRegex})|({EqualRegex}\\s+(or)?\\s+{LessRegex})|{LessOrEqualPrefix}(\\s+(or)?\\s+{EqualRegex})?|({EqualRegex}\\s+(or)?\\s+)?{LessOrEqualPrefix}|<\\s*=)' + LessOrEqualSuffix = f'((and|or)\\s+(less|lower|smaller|fewer)((?!\\s+than)|(\\s+than(?!(\\s*\\d+)))))' + NumberSplitMark = f'(?![,.](?!\\d+))' + MoreRegexNoNumberSucceed = f'((bigger|greater|more|higher|larger)((?!\\s+than)|\\s+(than(?!(\\s*\\d+))))|(above|over)(?!(\\s*\\d+)))' + LessRegexNoNumberSucceed = f'((less|lower|smaller|fewer)((?!\\s+than)|\\s+(than(?!(\\s*\\d+))))|(below|under)(?!(\\s*\\d+)))' + EqualRegexNoNumberSucceed = f'(equal(s|ing)?((?!\\s+(to|than))|(\\s+(to|than)(?!(\\s*\\d+)))))' + OneNumberRangeMoreRegex1 = f'({MoreOrEqual}|{MoreRegex})\\s*(the\\s+)?(?({NumberSplitMark}.)+)' + OneNumberRangeMoreRegex2 = f'(?({NumberSplitMark}.)+)\\s*{MoreOrEqualSuffix}' + OneNumberRangeMoreSeparateRegex = f'({EqualRegex}\\s+(?({NumberSplitMark}.)+)(\\s+or\\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\\s+(?({NumberSplitMark}.)+)(\\s+or\\s+){EqualRegexNoNumberSucceed})' + OneNumberRangeLessRegex1 = f'({LessOrEqual}|{LessRegex})\\s*(the\\s+)?(?({NumberSplitMark}.)+)' + OneNumberRangeLessRegex2 = f'(?({NumberSplitMark}.)+)\\s*{LessOrEqualSuffix}' + OneNumberRangeLessSeparateRegex = f'({EqualRegex}\\s+(?({NumberSplitMark}.)+)(\\s+or\\s+){LessRegexNoNumberSucceed})|({LessRegex}\\s+(?({NumberSplitMark}.)+)(\\s+or\\s+){EqualRegexNoNumberSucceed})' + OneNumberRangeEqualRegex = f'{EqualRegex}\\s*(the\\s+)?(?({NumberSplitMark}.)+)' + TwoNumberRangeRegex1 = f'between\\s*(the\\s+)?(?({NumberSplitMark}.)+)\\s*and\\s*(the\\s+)?(?({NumberSplitMark}.)+)' + TwoNumberRangeRegex2 = f'({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\\s*(and|but|,)\\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})' + TwoNumberRangeRegex3 = f'({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\\s*(and|but|,)\\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})' + TwoNumberRangeRegex4 = f'(from\\s+)?(?({NumberSplitMark}(?!\\bfrom\\b).)+)\\s*{TillRegex}\\s*(the\\s+)?(?({NumberSplitMark}.)+)' + AmbiguousFractionConnectorsRegex = f'(\\bin\\b)' + DecimalSeparatorChar = '.' + FractionMarkerToken = 'over' + NonDecimalSeparatorChar = ',' + HalfADozenText = 'six' + WordSeparatorToken = 'and' + WrittenDecimalSeparatorTexts = [r'point'] + WrittenGroupSeparatorTexts = [r'punto'] + WrittenIntegerSeparatorTexts = [r'and'] + WrittenFractionSeparatorTexts = [r'and'] + HalfADozenRegex = f'half\\s+a\\s+dozen' + DigitalNumberRegex = f'((?<=\\b)(hundred|thousand|million|billion|trillion|dozen(s)?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))' + CardinalNumberMap = dict([("a", 1), + ("zero", 0), + ("an", 1), + ("one", 1), + ("two", 2), + ("three", 3), + ("four", 4), + ("five", 5), + ("six", 6), + ("seven", 7), + ("eight", 8), + ("nine", 9), + ("ten", 10), + ("eleven", 11), + ("twelve", 12), + ("dozen", 12), + ("dozens", 12), + ("thirteen", 13), + ("fourteen", 14), + ("fifteen", 15), + ("sixteen", 16), + ("seventeen", 17), + ("eighteen", 18), + ("nineteen", 19), + ("twenty", 20), + ("thirty", 30), + ("forty", 40), + ("fifty", 50), + ("sixty", 60), + ("seventy", 70), + ("eighty", 80), + ("ninety", 90), + ("hundred", 100), + ("thousand", 1000), + ("million", 1000000), + ("billion", 1000000000), + ("trillion", 1000000000000)]) + OrdinalNumberMap = dict([("first", 1), + ("second", 2), + ("secondary", 2), + ("half", 2), + ("third", 3), + ("fourth", 4), + ("quarter", 4), + ("fifth", 5), + ("sixth", 6), + ("seventh", 7), + ("eighth", 8), + ("ninth", 9), + ("tenth", 10), + ("eleventh", 11), + ("twelfth", 12), + ("thirteenth", 13), + ("fourteenth", 14), + ("fifteenth", 15), + ("sixteenth", 16), + ("seventeenth", 17), + ("eighteenth", 18), + ("nineteenth", 19), + ("twentieth", 20), + ("thirtieth", 30), + ("fortieth", 40), + ("fiftieth", 50), + ("sixtieth", 60), + ("seventieth", 70), + ("eightieth", 80), + ("ninetieth", 90), + ("hundredth", 100), + ("thousandth", 1000), + ("millionth", 1000000), + ("billionth", 1000000000), + ("trillionth", 1000000000000), + ("firsts", 1), + ("halves", 2), + ("thirds", 3), + ("fourths", 4), + ("quarters", 4), + ("fifths", 5), + ("sixths", 6), + ("sevenths", 7), + ("eighths", 8), + ("ninths", 9), + ("tenths", 10), + ("elevenths", 11), + ("twelfths", 12), + ("thirteenths", 13), + ("fourteenths", 14), + ("fifteenths", 15), + ("sixteenths", 16), + ("seventeenths", 17), + ("eighteenths", 18), + ("nineteenths", 19), + ("twentieths", 20), + ("thirtieths", 30), + ("fortieths", 40), + ("fiftieths", 50), + ("sixtieths", 60), + ("seventieths", 70), + ("eightieths", 80), + ("ninetieths", 90), + ("hundredths", 100), + ("thousandths", 1000), + ("millionths", 1000000), + ("billionths", 1000000000), + ("trillionths", 1000000000000)]) + RoundNumberMap = dict([("hundred", 100), + ("thousand", 1000), + ("million", 1000000), + ("billion", 1000000000), + ("trillion", 1000000000000), + ("hundredth", 100), + ("thousandth", 1000), + ("millionth", 1000000), + ("billionth", 1000000000), + ("trillionth", 1000000000000), + ("hundredths", 100), + ("thousandths", 1000), + ("millionths", 1000000), + ("billionths", 1000000000), + ("trillionths", 1000000000000), + ("dozen", 12), + ("dozens", 12), + ("k", 1000), + ("m", 1000000), + ("g", 1000000000), + ("b", 1000000000), + ("t", 1000000000000)]) + AmbiguityFiltersDict = dict([("\\bone\\b", "\\b(the|this|that|which)\\s+(one)\\b")]) + RelativeReferenceOffsetMap = dict([("last", "0"), + ("next one", "1"), + ("previous one", "-1"), + ("the second to last", "-1"), + ("the one before the last one", "-1"), + ("next to last", "-1"), + ("penultimate", "-1"), + ("the last but one", "-1"), + ("antepenultimate", "-2"), + ("next", "1"), + ("previous", "-1")]) + RelativeReferenceRelativeToMap = dict([("last", "end"), + ("next one", "current"), + ("previous one", "current"), + ("the second to last", "end"), + ("the one before the last one", "end"), + ("next to last", "end"), + ("penultimate", "end"), + ("the last but one", "end"), + ("antepenultimate", "end"), + ("next", "current"), + ("previous", "current")]) +# pylint: enable=line-too-long diff --git a/Python/libraries/recognizers-number/recognizers_number/resources/french_numeric.py b/Python/libraries/recognizers-number/recognizers_number/resources/french_numeric.py index 3f880a0867..98f36908a5 100644 --- a/Python/libraries/recognizers-number/recognizers_number/resources/french_numeric.py +++ b/Python/libraries/recognizers-number/recognizers_number/resources/french_numeric.py @@ -1,328 +1,328 @@ -# ------------------------------------------------------------------------------ -# -# This code was generated by a tool. -# Changes to this file may cause incorrect behavior and will be lost if -# the code is regenerated. -# -# -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# ------------------------------------------------------------------------------ - -from .base_numbers import BaseNumbers -# pylint: disable=line-too-long -class FrenchNumeric: - LangMarker = 'Fr' - CompoundNumberLanguage = True - MultiDecimalSeparatorCulture = True - RoundNumberIntegerRegex = f'(cent|mille|millions|million|milliard|milliards|billion|billions)' - ZeroToNineIntegerRegex = f'(et un|un|une|deux|trois|quatre|cinq|six|sept|huit|neuf)' - TenToNineteenIntegerRegex = f'((seize|quinze|quatorze|treize|douze|onze)|dix(\\Wneuf|\\Whuit|\\Wsept)?)' - TensNumberIntegerRegex = f'(quatre\\Wvingt(s|\\Wdix)?|soixante\\Wdix|vingt|trente|quarante|cinquante|soixante|septante|octante|huitante|nonante)' - DigitsNumberRegex = f'\\d|\\d{{1,3}}(\\.\\d{{3}})' - NegativeNumberTermsRegex = f'^[.]' - NegativeNumberSignRegex = f'^({NegativeNumberTermsRegex}\\s+).*' - HundredsNumberIntegerRegex = f'(({ZeroToNineIntegerRegex}(\\s+cent))|cent|((\\s+cent\\s)+{TensNumberIntegerRegex}))' - BelowHundredsRegex = f'(({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}([-\\s]+({TenToNineteenIntegerRegex}|{ZeroToNineIntegerRegex}))?))|{ZeroToNineIntegerRegex})' - BelowThousandsRegex = f'(({HundredsNumberIntegerRegex}(\\s+{BelowHundredsRegex})?|{BelowHundredsRegex}|{TenToNineteenIntegerRegex})|cent\\s+{TenToNineteenIntegerRegex})' - SupportThousandsRegex = f'(({BelowThousandsRegex}|{BelowHundredsRegex})\\s+{RoundNumberIntegerRegex}(\\s+{RoundNumberIntegerRegex})?)' - SeparaIntRegex = f'({SupportThousandsRegex}(\\s+{SupportThousandsRegex})*(\\s+{BelowThousandsRegex})?|{BelowThousandsRegex})' - AllIntRegex = f'({SeparaIntRegex}|mille(\\s+{BelowThousandsRegex})?)' - NumbersWithPlaceHolder = lambda placeholder: f'(((?({AllIntRegex})|((?({AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)' - AllPointRegex = f'((\\s+{ZeroToNineIntegerRegex})+|(\\s+{SeparaIntRegex}))' - AllFloatRegex = f'({AllIntRegex}(\\s+(virgule|point)){AllPointRegex})' - DoubleDecimalPointRegex = lambda placeholder: f'(((? +# This code was generated by a tool. +# Changes to this file may cause incorrect behavior and will be lost if +# the code is regenerated. +# +# +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# ------------------------------------------------------------------------------ + +from .base_numbers import BaseNumbers +# pylint: disable=line-too-long +class FrenchNumeric: + LangMarker = 'Fr' + CompoundNumberLanguage = True + MultiDecimalSeparatorCulture = True + RoundNumberIntegerRegex = f'(cent|mille|millions|million|milliard|milliards|billion|billions)' + ZeroToNineIntegerRegex = f'(et un|un|une|deux|trois|quatre|cinq|six|sept|huit|neuf)' + TenToNineteenIntegerRegex = f'((seize|quinze|quatorze|treize|douze|onze)|dix(\\Wneuf|\\Whuit|\\Wsept)?)' + TensNumberIntegerRegex = f'(quatre\\Wvingt(s|\\Wdix)?|soixante\\Wdix|vingt|trente|quarante|cinquante|soixante|septante|octante|huitante|nonante)' + DigitsNumberRegex = f'\\d|\\d{{1,3}}(\\.\\d{{3}})' + NegativeNumberTermsRegex = f'^[.]' + NegativeNumberSignRegex = f'^({NegativeNumberTermsRegex}\\s+).*' + HundredsNumberIntegerRegex = f'(({ZeroToNineIntegerRegex}(\\s+cent))|cent|((\\s+cent\\s)+{TensNumberIntegerRegex}))' + BelowHundredsRegex = f'(({TenToNineteenIntegerRegex}|({TensNumberIntegerRegex}([-\\s]+({TenToNineteenIntegerRegex}|{ZeroToNineIntegerRegex}))?))|{ZeroToNineIntegerRegex})' + BelowThousandsRegex = f'(({HundredsNumberIntegerRegex}(\\s+{BelowHundredsRegex})?|{BelowHundredsRegex}|{TenToNineteenIntegerRegex})|cent\\s+{TenToNineteenIntegerRegex})' + SupportThousandsRegex = f'(({BelowThousandsRegex}|{BelowHundredsRegex})\\s+{RoundNumberIntegerRegex}(\\s+{RoundNumberIntegerRegex})?)' + SeparaIntRegex = f'({SupportThousandsRegex}(\\s+{SupportThousandsRegex})*(\\s+{BelowThousandsRegex})?|{BelowThousandsRegex})' + AllIntRegex = f'({SeparaIntRegex}|mille(\\s+{BelowThousandsRegex})?)' + NumbersWithPlaceHolder = lambda placeholder: f'(((?({AllIntRegex})|((?({AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)' + AllPointRegex = f'((\\s+{ZeroToNineIntegerRegex})+|(\\s+{SeparaIntRegex}))' + AllFloatRegex = f'({AllIntRegex}(\\s+(virgule|point)){AllPointRegex})' + DoubleDecimalPointRegex = lambda placeholder: f'(((? -# This code was generated by a tool. -# Changes to this file may cause incorrect behavior and will be lost if -# the code is regenerated. -# -# -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# ------------------------------------------------------------------------------ - -from .base_numbers import BaseNumbers -# pylint: disable=line-too-long -class PortugueseNumeric: - LangMarker = 'Por' - CompoundNumberLanguage = True - MultiDecimalSeparatorCulture = True - HundredsNumberIntegerRegex = f'(quatrocent[ao]s|trezent[ao]s|seiscent[ao]s|setecent[ao]s|oitocent[ao]s|novecent[ao]s|duzent[ao]s|quinhent[ao]s|cem|(?({AllIntRegex})|((?({AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)' - AllFloatRegex = f'{AllIntRegex}(\\s+(vírgula|virgula|e|ponto)){AllPointRegex}' - DoubleWithMultiplierRegex = f'(((? +# This code was generated by a tool. +# Changes to this file may cause incorrect behavior and will be lost if +# the code is regenerated. +# +# +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# ------------------------------------------------------------------------------ + +from .base_numbers import BaseNumbers +# pylint: disable=line-too-long +class PortugueseNumeric: + LangMarker = 'Por' + CompoundNumberLanguage = True + MultiDecimalSeparatorCulture = True + HundredsNumberIntegerRegex = f'(quatrocent[ao]s|trezent[ao]s|seiscent[ao]s|setecent[ao]s|oitocent[ao]s|novecent[ao]s|duzent[ao]s|quinhent[ao]s|cem|(?({AllIntRegex})|((?({AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)' + AllFloatRegex = f'{AllIntRegex}(\\s+(vírgula|virgula|e|ponto)){AllPointRegex}' + DoubleWithMultiplierRegex = f'(((? -# This code was generated by a tool. -# Changes to this file may cause incorrect behavior and will be lost if -# the code is regenerated. -# -# -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# ------------------------------------------------------------------------------ - -from .base_numbers import BaseNumbers -# pylint: disable=line-too-long -class SpanishNumeric: - LangMarker = 'Spa' - CompoundNumberLanguage = True - MultiDecimalSeparatorCulture = True - HundredsNumberIntegerRegex = f'(cuatrocient[ao]s|trescient[ao]s|seiscient[ao]s|setecient[ao]s|ochocient[ao]s|novecient[ao]s|doscient[ao]s|quinient[ao]s|(?({AllIntRegex})|((?({AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)' - AllPointRegex = f'((\\s+{ZeroToNineIntegerRegex})+|(\\s+{AllIntRegex}))' - AllFloatRegex = f'{AllIntRegex}(\\s+(coma|con)){AllPointRegex}' - DoubleDecimalPointRegex = lambda placeholder: f'(((?)' - LessRegex = f'((menos|menor|menores|por\\s+debajo)(\\s+(que|de|del))?|más\\s+baj[oa]\\s+que|(?|=)<)' - EqualRegex = f'((igual(es)?|equivalente(s)?|equivale|equivalen|son)(\\s+(a|que|de|al|del))?|(?)=)' - MoreOrEqualPrefix = f'((no\\s+{LessRegex})|(por\\s+lo\\s+menos|como\\s+m[íi]nimo|al\\s+menos))' - MoreOrEqual = f'(({MoreRegex}\\s+(o)?\\s+{EqualRegex})|({EqualRegex}\\s+(o|y)\\s+{MoreRegex})|{MoreOrEqualPrefix}(\\s+(o)\\s+{EqualRegex})?|({EqualRegex}\\s+(o)\\s+)?{MoreOrEqualPrefix}|>\\s*=)' - MoreOrEqualSuffix = f'((\\b(y|o)\\b\\s+(m[áa]s|mayor|mayores)((?!\\s+(alt[oa]|baj[oa]|que|de|del))|(\\s+(que|de|del)(?!(\\s*\\d+)))))|como\\s+m[áa]ximo|por\\s+lo\\s+menos|al\\s+menos)' - LessOrEqualPrefix = f'((no\\s+{MoreRegex})|(como\\s+máximo|como\\s+maximo|como\\s+mucho))' - LessOrEqual = f'(({LessRegex}\\s+(o)?\\s+{EqualRegex})|({EqualRegex}\\s+(o)?\\s+{LessRegex})|{LessOrEqualPrefix}(\\s+(o)?\\s+{EqualRegex})?|({EqualRegex}\\s+(o)?\\s+)?{LessOrEqualPrefix}|<\\s*=)' - LessOrEqualSuffix = f'((\\b(y|o)\\b\\s+(menos|menor|menores)((?!\\s+(alt[oa]|baj[oa]|que|de|del))|(\\s+(que|de|del)(?!(\\s*\\d+)))))|como\\s+m[íi]nimo)' - NumberSplitMark = f'(?![,.](?!\\d+))' - MoreRegexNoNumberSucceed = f'((m[áa]s|mayor|mayores)((?!\\s+(que|de|del))|\\s+((que|de|del)(?!(\\s*\\d+))))|(por encima)(?!(\\s*\\d+)))' - LessRegexNoNumberSucceed = f'((menos|menor|menores)((?!\\s+(que|de|del))|\\s+((que|de|del)(?!(\\s*\\d+))))|(por debajo)(?!(\\s*\\d+)))' - EqualRegexNoNumberSucceed = f'((igual|iguales|equivalente|equivalentes|equivale|equivalen)((?!\\s+(a|que|de|al|del))|(\\s+(a|que|de|al|del)(?!(\\s*\\d+)))))' - OneNumberRangeMoreRegex1 = f'({MoreOrEqual}|{MoreRegex})\\s*((el|la|los|las)\\s+)?(?({NumberSplitMark}.)+)' - OneNumberRangeMoreRegex2 = f'(?({NumberSplitMark}.)+)\\s*{MoreOrEqualSuffix}' - OneNumberRangeMoreSeparateRegex = f'({EqualRegex}\\s+(?({NumberSplitMark}.)+)(\\s+o\\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\\s+(?({NumberSplitMark}.)+)(\\s+o\\s+){EqualRegexNoNumberSucceed})' - OneNumberRangeLessRegex1 = f'({LessOrEqual}|{LessRegex})\\s*((el|la|los|las)\\s+)?(?({NumberSplitMark}.)+)' - OneNumberRangeLessRegex2 = f'(?({NumberSplitMark}.)+)\\s*{LessOrEqualSuffix}' - OneNumberRangeLessSeparateRegex = f'({EqualRegex}\\s+(?({NumberSplitMark}.)+)(\\s+o\\s+){LessRegexNoNumberSucceed})|({LessRegex}\\s+(?({NumberSplitMark}.)+)(\\s+o\\s+){EqualRegexNoNumberSucceed})' - OneNumberRangeEqualRegex = f'{EqualRegex}\\s*((el|la|los|las)\\s+)?(?({NumberSplitMark}.)+)' - TwoNumberRangeRegex1 = f'entre\\s*((el|la|los|las)\\s+)?(?({NumberSplitMark}.)+)\\s*y\\s*((el|la|los|las)\\s+)?(?({NumberSplitMark}.)+)' - TwoNumberRangeRegex2 = f'({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\\s*(\\by\\b|\\be\\b|pero|,)\\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})' - TwoNumberRangeRegex3 = f'({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\\s*(\\by\\b|\\be\\b|pero|,)\\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})' - TwoNumberRangeRegex4 = f'((de|desde)\\s+)?((el|la|los|las)\\s+)?(?({NumberSplitMark}(?!\\b(entre|de|desde|es)\\b).)+)\\s*{TillRegex}\\s*((el|la|los|las)\\s+)?(?({NumberSplitMark}.)+)' - AmbiguousFractionConnectorsRegex = f'(\\b(en|de)\\b)' - DecimalSeparatorChar = ',' - FractionMarkerToken = 'sobre' - NonDecimalSeparatorChar = '.' - HalfADozenText = 'seis' - WordSeparatorToken = 'y' - WrittenDecimalSeparatorTexts = [r'coma', r'con'] - WrittenGroupSeparatorTexts = [r'punto'] - WrittenIntegerSeparatorTexts = [r'y'] - WrittenFractionSeparatorTexts = [r'con'] - HalfADozenRegex = f'media\\s+docena' - DigitalNumberRegex = f'((?<=\\b)(mil|millones|mill[oó]n|billones|bill[oó]n|trillones|trill[oó]n|docenas?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))' - CardinalNumberMap = dict([("cero", 0), - ("un", 1), - ("una", 1), - ("uno", 1), - ("dos", 2), - ("tres", 3), - ("cuatro", 4), - ("cinco", 5), - ("seis", 6), - ("siete", 7), - ("ocho", 8), - ("nueve", 9), - ("diez", 10), - ("once", 11), - ("doce", 12), - ("docena", 12), - ("docenas", 12), - ("trece", 13), - ("catorce", 14), - ("quince", 15), - ("dieciseis", 16), - ("dieciséis", 16), - ("diecisiete", 17), - ("dieciocho", 18), - ("diecinueve", 19), - ("veinte", 20), - ("ventiuna", 21), - ("ventiuno", 21), - ("veintiun", 21), - ("veintiún", 21), - ("veintiuno", 21), - ("veintiuna", 21), - ("veintidos", 22), - ("veintidós", 22), - ("veintitres", 23), - ("veintitrés", 23), - ("veinticuatro", 24), - ("veinticinco", 25), - ("veintiseis", 26), - ("veintiséis", 26), - ("veintisiete", 27), - ("veintiocho", 28), - ("veintinueve", 29), - ("treinta", 30), - ("cuarenta", 40), - ("cincuenta", 50), - ("sesenta", 60), - ("setenta", 70), - ("ochenta", 80), - ("noventa", 90), - ("cien", 100), - ("ciento", 100), - ("doscientas", 200), - ("doscientos", 200), - ("trescientas", 300), - ("trescientos", 300), - ("cuatrocientas", 400), - ("cuatrocientos", 400), - ("quinientas", 500), - ("quinientos", 500), - ("seiscientas", 600), - ("seiscientos", 600), - ("setecientas", 700), - ("setecientos", 700), - ("ochocientas", 800), - ("ochocientos", 800), - ("novecientas", 900), - ("novecientos", 900), - ("mil", 1000), - ("millon", 1000000), - ("millón", 1000000), - ("millones", 1000000), - ("billon", 1000000000000), - ("billón", 1000000000000), - ("billones", 1000000000000), - ("trillon", 1000000000000000000), - ("trillón", 1000000000000000000), - ("trillones", 1000000000000000000)]) - OrdinalNumberMap = dict([("primero", 1), - ("primera", 1), - ("primer", 1), - ("segundo", 2), - ("segunda", 2), - ("medio", 2), - ("media", 2), - ("tercero", 3), - ("tercera", 3), - ("tercer", 3), - ("tercio", 3), - ("cuarto", 4), - ("cuarta", 4), - ("quinto", 5), - ("quinta", 5), - ("sexto", 6), - ("sexta", 6), - ("septimo", 7), - ("septima", 7), - ("octavo", 8), - ("octava", 8), - ("noveno", 9), - ("novena", 9), - ("decimo", 10), - ("décimo", 10), - ("decima", 10), - ("décima", 10), - ("undecimo", 11), - ("undecima", 11), - ("duodecimo", 12), - ("duodecima", 12), - ("decimotercero", 13), - ("decimotercera", 13), - ("decimocuarto", 14), - ("decimocuarta", 14), - ("decimoquinto", 15), - ("decimoquinta", 15), - ("decimosexto", 16), - ("decimosexta", 16), - ("decimoseptimo", 17), - ("decimoseptima", 17), - ("decimoctavo", 18), - ("decimoctava", 18), - ("decimonoveno", 19), - ("decimonovena", 19), - ("vigesimo", 20), - ("vigesima", 20), - ("trigesimo", 30), - ("trigesima", 30), - ("cuadragesimo", 40), - ("cuadragesima", 40), - ("quincuagesimo", 50), - ("quincuagesima", 50), - ("sexagesimo", 60), - ("sexagesima", 60), - ("septuagesimo", 70), - ("septuagesima", 70), - ("octogesimo", 80), - ("octogesima", 80), - ("nonagesimo", 90), - ("nonagesima", 90), - ("centesimo", 100), - ("centesima", 100), - ("ducentesimo", 200), - ("ducentesima", 200), - ("tricentesimo", 300), - ("tricentesima", 300), - ("cuadringentesimo", 400), - ("cuadringentesima", 400), - ("quingentesimo", 500), - ("quingentesima", 500), - ("sexcentesimo", 600), - ("sexcentesima", 600), - ("septingentesimo", 700), - ("septingentesima", 700), - ("octingentesimo", 800), - ("octingentesima", 800), - ("noningentesimo", 900), - ("noningentesima", 900), - ("milesimo", 1000), - ("milesima", 1000), - ("millonesimo", 1000000), - ("millonesima", 1000000), - ("billonesimo", 1000000000000), - ("billonesima", 1000000000000)]) - PrefixCardinalMap = dict([("dos", 2), - ("tres", 3), - ("cuatro", 4), - ("cinco", 5), - ("seis", 6), - ("siete", 7), - ("ocho", 8), - ("nueve", 9), - ("diez", 10), - ("once", 11), - ("doce", 12), - ("trece", 13), - ("catorce", 14), - ("quince", 15), - ("dieciseis", 16), - ("dieciséis", 16), - ("diecisiete", 17), - ("dieciocho", 18), - ("diecinueve", 19), - ("veinte", 20), - ("ventiuna", 21), - ("veintiun", 21), - ("veintiún", 21), - ("veintidos", 22), - ("veintitres", 23), - ("veinticuatro", 24), - ("veinticinco", 25), - ("veintiseis", 26), - ("veintisiete", 27), - ("veintiocho", 28), - ("veintinueve", 29), - ("treinta", 30), - ("cuarenta", 40), - ("cincuenta", 50), - ("sesenta", 60), - ("setenta", 70), - ("ochenta", 80), - ("noventa", 90), - ("cien", 100), - ("doscientos", 200), - ("trescientos", 300), - ("cuatrocientos", 400), - ("quinientos", 500), - ("seiscientos", 600), - ("setecientos", 700), - ("ochocientos", 800), - ("novecientos", 900)]) - SuffixOrdinalMap = dict([("milesimo", 1000), - ("millonesimo", 1000000), - ("billonesimo", 1000000000000)]) - RoundNumberMap = dict([("mil", 1000), - ("milesimo", 1000), - ("millon", 1000000), - ("millón", 1000000), - ("millones", 1000000), - ("millonesimo", 1000000), - ("billon", 1000000000000), - ("billón", 1000000000000), - ("billones", 1000000000000), - ("billonesimo", 1000000000000), - ("trillon", 1000000000000000000), - ("trillón", 1000000000000000000), - ("trillones", 1000000000000000000), - ("trillonesimo", 1000000000000000000), - ("docena", 12), - ("docenas", 12), - ("k", 1000), - ("m", 1000000), - ("g", 1000000000), - ("b", 1000000000), - ("t", 1000000000000)]) - RelativeReferenceOffsetMap = dict([("", "")]) - RelativeReferenceRelativeToMap = dict([("", "")]) -# pylint: enable=line-too-long +# ------------------------------------------------------------------------------ +# +# This code was generated by a tool. +# Changes to this file may cause incorrect behavior and will be lost if +# the code is regenerated. +# +# +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# ------------------------------------------------------------------------------ + +from .base_numbers import BaseNumbers +# pylint: disable=line-too-long +class SpanishNumeric: + LangMarker = 'Spa' + CompoundNumberLanguage = True + MultiDecimalSeparatorCulture = True + HundredsNumberIntegerRegex = f'(cuatrocient[ao]s|trescient[ao]s|seiscient[ao]s|setecient[ao]s|ochocient[ao]s|novecient[ao]s|doscient[ao]s|quinient[ao]s|(?({AllIntRegex})|((?({AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)' + AllPointRegex = f'((\\s+{ZeroToNineIntegerRegex})+|(\\s+{AllIntRegex}))' + AllFloatRegex = f'{AllIntRegex}(\\s+(coma|con)){AllPointRegex}' + DoubleDecimalPointRegex = lambda placeholder: f'(((?)' + LessRegex = f'((menos|menor|menores|por\\s+debajo)(\\s+(que|de|del))?|más\\s+baj[oa]\\s+que|(?|=)<)' + EqualRegex = f'((igual(es)?|equivalente(s)?|equivale|equivalen|son)(\\s+(a|que|de|al|del))?|(?)=)' + MoreOrEqualPrefix = f'((no\\s+{LessRegex})|(por\\s+lo\\s+menos|como\\s+m[íi]nimo|al\\s+menos))' + MoreOrEqual = f'(({MoreRegex}\\s+(o)?\\s+{EqualRegex})|({EqualRegex}\\s+(o|y)\\s+{MoreRegex})|{MoreOrEqualPrefix}(\\s+(o)\\s+{EqualRegex})?|({EqualRegex}\\s+(o)\\s+)?{MoreOrEqualPrefix}|>\\s*=)' + MoreOrEqualSuffix = f'((\\b(y|o)\\b\\s+(m[áa]s|mayor|mayores)((?!\\s+(alt[oa]|baj[oa]|que|de|del))|(\\s+(que|de|del)(?!(\\s*\\d+)))))|como\\s+m[áa]ximo|por\\s+lo\\s+menos|al\\s+menos)' + LessOrEqualPrefix = f'((no\\s+{MoreRegex})|(como\\s+máximo|como\\s+maximo|como\\s+mucho))' + LessOrEqual = f'(({LessRegex}\\s+(o)?\\s+{EqualRegex})|({EqualRegex}\\s+(o)?\\s+{LessRegex})|{LessOrEqualPrefix}(\\s+(o)?\\s+{EqualRegex})?|({EqualRegex}\\s+(o)?\\s+)?{LessOrEqualPrefix}|<\\s*=)' + LessOrEqualSuffix = f'((\\b(y|o)\\b\\s+(menos|menor|menores)((?!\\s+(alt[oa]|baj[oa]|que|de|del))|(\\s+(que|de|del)(?!(\\s*\\d+)))))|como\\s+m[íi]nimo)' + NumberSplitMark = f'(?![,.](?!\\d+))' + MoreRegexNoNumberSucceed = f'((m[áa]s|mayor|mayores)((?!\\s+(que|de|del))|\\s+((que|de|del)(?!(\\s*\\d+))))|(por encima)(?!(\\s*\\d+)))' + LessRegexNoNumberSucceed = f'((menos|menor|menores)((?!\\s+(que|de|del))|\\s+((que|de|del)(?!(\\s*\\d+))))|(por debajo)(?!(\\s*\\d+)))' + EqualRegexNoNumberSucceed = f'((igual|iguales|equivalente|equivalentes|equivale|equivalen)((?!\\s+(a|que|de|al|del))|(\\s+(a|que|de|al|del)(?!(\\s*\\d+)))))' + OneNumberRangeMoreRegex1 = f'({MoreOrEqual}|{MoreRegex})\\s*((el|la|los|las)\\s+)?(?({NumberSplitMark}.)+)' + OneNumberRangeMoreRegex2 = f'(?({NumberSplitMark}.)+)\\s*{MoreOrEqualSuffix}' + OneNumberRangeMoreSeparateRegex = f'({EqualRegex}\\s+(?({NumberSplitMark}.)+)(\\s+o\\s+){MoreRegexNoNumberSucceed})|({MoreRegex}\\s+(?({NumberSplitMark}.)+)(\\s+o\\s+){EqualRegexNoNumberSucceed})' + OneNumberRangeLessRegex1 = f'({LessOrEqual}|{LessRegex})\\s*((el|la|los|las)\\s+)?(?({NumberSplitMark}.)+)' + OneNumberRangeLessRegex2 = f'(?({NumberSplitMark}.)+)\\s*{LessOrEqualSuffix}' + OneNumberRangeLessSeparateRegex = f'({EqualRegex}\\s+(?({NumberSplitMark}.)+)(\\s+o\\s+){LessRegexNoNumberSucceed})|({LessRegex}\\s+(?({NumberSplitMark}.)+)(\\s+o\\s+){EqualRegexNoNumberSucceed})' + OneNumberRangeEqualRegex = f'{EqualRegex}\\s*((el|la|los|las)\\s+)?(?({NumberSplitMark}.)+)' + TwoNumberRangeRegex1 = f'entre\\s*((el|la|los|las)\\s+)?(?({NumberSplitMark}.)+)\\s*y\\s*((el|la|los|las)\\s+)?(?({NumberSplitMark}.)+)' + TwoNumberRangeRegex2 = f'({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})\\s*(\\by\\b|\\be\\b|pero|,)\\s*({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})' + TwoNumberRangeRegex3 = f'({OneNumberRangeLessRegex1}|{OneNumberRangeLessRegex2})\\s*(\\by\\b|\\be\\b|pero|,)\\s*({OneNumberRangeMoreRegex1}|{OneNumberRangeMoreRegex2})' + TwoNumberRangeRegex4 = f'((de|desde)\\s+)?((el|la|los|las)\\s+)?(?({NumberSplitMark}(?!\\b(entre|de|desde|es)\\b).)+)\\s*{TillRegex}\\s*((el|la|los|las)\\s+)?(?({NumberSplitMark}.)+)' + AmbiguousFractionConnectorsRegex = f'(\\b(en|de)\\b)' + DecimalSeparatorChar = ',' + FractionMarkerToken = 'sobre' + NonDecimalSeparatorChar = '.' + HalfADozenText = 'seis' + WordSeparatorToken = 'y' + WrittenDecimalSeparatorTexts = [r'coma', r'con'] + WrittenGroupSeparatorTexts = [r'punto'] + WrittenIntegerSeparatorTexts = [r'y'] + WrittenFractionSeparatorTexts = [r'con'] + HalfADozenRegex = f'media\\s+docena' + DigitalNumberRegex = f'((?<=\\b)(mil|millones|mill[oó]n|billones|bill[oó]n|trillones|trill[oó]n|docenas?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))' + CardinalNumberMap = dict([("cero", 0), + ("un", 1), + ("una", 1), + ("uno", 1), + ("dos", 2), + ("tres", 3), + ("cuatro", 4), + ("cinco", 5), + ("seis", 6), + ("siete", 7), + ("ocho", 8), + ("nueve", 9), + ("diez", 10), + ("once", 11), + ("doce", 12), + ("docena", 12), + ("docenas", 12), + ("trece", 13), + ("catorce", 14), + ("quince", 15), + ("dieciseis", 16), + ("dieciséis", 16), + ("diecisiete", 17), + ("dieciocho", 18), + ("diecinueve", 19), + ("veinte", 20), + ("ventiuna", 21), + ("ventiuno", 21), + ("veintiun", 21), + ("veintiún", 21), + ("veintiuno", 21), + ("veintiuna", 21), + ("veintidos", 22), + ("veintidós", 22), + ("veintitres", 23), + ("veintitrés", 23), + ("veinticuatro", 24), + ("veinticinco", 25), + ("veintiseis", 26), + ("veintiséis", 26), + ("veintisiete", 27), + ("veintiocho", 28), + ("veintinueve", 29), + ("treinta", 30), + ("cuarenta", 40), + ("cincuenta", 50), + ("sesenta", 60), + ("setenta", 70), + ("ochenta", 80), + ("noventa", 90), + ("cien", 100), + ("ciento", 100), + ("doscientas", 200), + ("doscientos", 200), + ("trescientas", 300), + ("trescientos", 300), + ("cuatrocientas", 400), + ("cuatrocientos", 400), + ("quinientas", 500), + ("quinientos", 500), + ("seiscientas", 600), + ("seiscientos", 600), + ("setecientas", 700), + ("setecientos", 700), + ("ochocientas", 800), + ("ochocientos", 800), + ("novecientas", 900), + ("novecientos", 900), + ("mil", 1000), + ("millon", 1000000), + ("millón", 1000000), + ("millones", 1000000), + ("billon", 1000000000000), + ("billón", 1000000000000), + ("billones", 1000000000000), + ("trillon", 1000000000000000000), + ("trillón", 1000000000000000000), + ("trillones", 1000000000000000000)]) + OrdinalNumberMap = dict([("primero", 1), + ("primera", 1), + ("primer", 1), + ("segundo", 2), + ("segunda", 2), + ("medio", 2), + ("media", 2), + ("tercero", 3), + ("tercera", 3), + ("tercer", 3), + ("tercio", 3), + ("cuarto", 4), + ("cuarta", 4), + ("quinto", 5), + ("quinta", 5), + ("sexto", 6), + ("sexta", 6), + ("septimo", 7), + ("septima", 7), + ("octavo", 8), + ("octava", 8), + ("noveno", 9), + ("novena", 9), + ("decimo", 10), + ("décimo", 10), + ("decima", 10), + ("décima", 10), + ("undecimo", 11), + ("undecima", 11), + ("duodecimo", 12), + ("duodecima", 12), + ("decimotercero", 13), + ("decimotercera", 13), + ("decimocuarto", 14), + ("decimocuarta", 14), + ("decimoquinto", 15), + ("decimoquinta", 15), + ("decimosexto", 16), + ("decimosexta", 16), + ("decimoseptimo", 17), + ("decimoseptima", 17), + ("decimoctavo", 18), + ("decimoctava", 18), + ("decimonoveno", 19), + ("decimonovena", 19), + ("vigesimo", 20), + ("vigesima", 20), + ("trigesimo", 30), + ("trigesima", 30), + ("cuadragesimo", 40), + ("cuadragesima", 40), + ("quincuagesimo", 50), + ("quincuagesima", 50), + ("sexagesimo", 60), + ("sexagesima", 60), + ("septuagesimo", 70), + ("septuagesima", 70), + ("octogesimo", 80), + ("octogesima", 80), + ("nonagesimo", 90), + ("nonagesima", 90), + ("centesimo", 100), + ("centesima", 100), + ("ducentesimo", 200), + ("ducentesima", 200), + ("tricentesimo", 300), + ("tricentesima", 300), + ("cuadringentesimo", 400), + ("cuadringentesima", 400), + ("quingentesimo", 500), + ("quingentesima", 500), + ("sexcentesimo", 600), + ("sexcentesima", 600), + ("septingentesimo", 700), + ("septingentesima", 700), + ("octingentesimo", 800), + ("octingentesima", 800), + ("noningentesimo", 900), + ("noningentesima", 900), + ("milesimo", 1000), + ("milesima", 1000), + ("millonesimo", 1000000), + ("millonesima", 1000000), + ("billonesimo", 1000000000000), + ("billonesima", 1000000000000)]) + PrefixCardinalMap = dict([("dos", 2), + ("tres", 3), + ("cuatro", 4), + ("cinco", 5), + ("seis", 6), + ("siete", 7), + ("ocho", 8), + ("nueve", 9), + ("diez", 10), + ("once", 11), + ("doce", 12), + ("trece", 13), + ("catorce", 14), + ("quince", 15), + ("dieciseis", 16), + ("dieciséis", 16), + ("diecisiete", 17), + ("dieciocho", 18), + ("diecinueve", 19), + ("veinte", 20), + ("ventiuna", 21), + ("veintiun", 21), + ("veintiún", 21), + ("veintidos", 22), + ("veintitres", 23), + ("veinticuatro", 24), + ("veinticinco", 25), + ("veintiseis", 26), + ("veintisiete", 27), + ("veintiocho", 28), + ("veintinueve", 29), + ("treinta", 30), + ("cuarenta", 40), + ("cincuenta", 50), + ("sesenta", 60), + ("setenta", 70), + ("ochenta", 80), + ("noventa", 90), + ("cien", 100), + ("doscientos", 200), + ("trescientos", 300), + ("cuatrocientos", 400), + ("quinientos", 500), + ("seiscientos", 600), + ("setecientos", 700), + ("ochocientos", 800), + ("novecientos", 900)]) + SuffixOrdinalMap = dict([("milesimo", 1000), + ("millonesimo", 1000000), + ("billonesimo", 1000000000000)]) + RoundNumberMap = dict([("mil", 1000), + ("milesimo", 1000), + ("millon", 1000000), + ("millón", 1000000), + ("millones", 1000000), + ("millonesimo", 1000000), + ("billon", 1000000000000), + ("billón", 1000000000000), + ("billones", 1000000000000), + ("billonesimo", 1000000000000), + ("trillon", 1000000000000000000), + ("trillón", 1000000000000000000), + ("trillones", 1000000000000000000), + ("trillonesimo", 1000000000000000000), + ("docena", 12), + ("docenas", 12), + ("k", 1000), + ("m", 1000000), + ("g", 1000000000), + ("b", 1000000000), + ("t", 1000000000000)]) + RelativeReferenceOffsetMap = dict([("", "")]) + RelativeReferenceRelativeToMap = dict([("", "")]) +# pylint: enable=line-too-long diff --git a/Python/libraries/resource-generator/lib/base_code_generator.py b/Python/libraries/resource-generator/lib/base_code_generator.py index 85b0ddde93..205d844497 100644 --- a/Python/libraries/resource-generator/lib/base_code_generator.py +++ b/Python/libraries/resource-generator/lib/base_code_generator.py @@ -25,7 +25,7 @@ def generate(yaml_file_path: str, py_file_name: str, header: str, footer: str): if not os.path.exists(os.path.dirname(py_file_name)): os.makedirs(os.path.dirname(py_file_name)) - with open(py_file_name, mode='w', encoding='utf-8') as file: + with open(py_file_name, mode='w', encoding='utf-8', newline=EOL) as file: file.write(HEADER_COMMENT + EOL + EOL) file.write(header + EOL) diff --git a/Specs/DateTime/English/DateTimeModel.json b/Specs/DateTime/English/DateTimeModel.json index 75c155576d..3d3e6cfa0c 100644 --- a/Specs/DateTime/English/DateTimeModel.json +++ b/Specs/DateTime/English/DateTimeModel.json @@ -12060,5 +12060,29 @@ } } ] + }, + { + "Input": "Brunch with Anna at 13:00 February 28, 2013", + "Context": { + "ReferenceDateTime": "2013-06-03T12:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "13:00 february 28, 2013", + "Start": 20, + "End": 42, + "TypeName": "datetimeV2.datetime", + "Resolution": { + "values": [ + { + "timex": "2013-02-28T13:00", + "type": "datetime", + "value": "2013-02-28 13:00:00" + } + ] + } + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/English/DateTimeModelComplexCalendar.json b/Specs/DateTime/English/DateTimeModelComplexCalendar.json index f01dce9fdb..d32dcb5ca1 100644 --- a/Specs/DateTime/English/DateTimeModelComplexCalendar.json +++ b/Specs/DateTime/English/DateTimeModelComplexCalendar.json @@ -10233,5 +10233,29 @@ } } ] + }, + { + "Input": "Brunch with Anna at 13:00 February 28, 2013", + "Context": { + "ReferenceDateTime": "2013-06-03T12:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "13:00 february 28, 2013", + "Start": 20, + "End": 42, + "TypeName": "datetimeV2.datetime", + "Resolution": { + "values": [ + { + "timex": "2013-02-28T13:00", + "type": "datetime", + "value": "2013-02-28 13:00:00" + } + ] + } + } + ] } ]