From 6f9a886cb9a4ae50bb8b724fc41117329903d454 Mon Sep 17 00:00:00 2001 From: Bo Li <306414383@qq.com> Date: Mon, 26 Aug 2019 16:24:51 +0800 Subject: [PATCH] [ES DateTimeV2] Fix problem recognizing entities like "la semana pasada" (#1818) Also, add a workaround for Age false positives in DE/ES/PT. --- .../German/NumbersWithUnitDefinitions.cs | 11 ++ .../Portuguese/NumbersWithUnitDefinitions.cs | 12 +++ .../Spanish/DateTimeDefinitions.cs | 7 +- .../Spanish/NumbersWithUnitDefinitions.cs | 13 +++ .../SpanishDatePeriodParserConfiguration.cs | 26 +++-- .../Extractors/AgeExtractorConfiguration.cs | 4 +- .../Extractors/AgeExtractorConfiguration.cs | 4 +- .../Extractors/AgeExtractorConfiguration.cs | 4 +- ...ishNumberWithUnitExtractorConfiguration.cs | 1 + .../datetime/resources/EnglishDateTime.java | 2 + .../datetime/resources/FrenchDateTime.java | 2 + .../resources/PortugueseDateTime.java | 2 + .../datetime/resources/SpanishDateTime.java | 10 +- .../SpanishDatePeriodParserConfiguration.java | 22 +++- .../extractors/AgeExtractorConfiguration.java | 2 +- .../extractors/AgeExtractorConfiguration.java | 2 +- .../resources/GermanNumericWithUnit.java | 2 + .../resources/PortugueseNumericWithUnit.java | 2 + .../resources/SpanishNumericWithUnit.java | 2 + .../extractors/AgeExtractorConfiguration.java | 2 +- .../src/resources/englishDateTime.ts | 1 + .../src/resources/frenchDateTime.ts | 1 + .../src/resources/portugueseDateTime.ts | 1 + .../src/resources/spanishDateTime.ts | 8 +- .../src/numberWithUnit/portuguese/age.ts | 2 +- .../src/numberWithUnit/spanish/age.ts | 2 +- .../resources/portugueseNumericWithUnit.ts | 1 + .../src/resources/spanishNumericWithUnit.ts | 1 + Patterns/German/German-NumbersWithUnit.yaml | 12 +++ .../Portuguese-NumbersWithUnit.yaml | 13 +++ Patterns/Spanish/Spanish-DateTime.yaml | 8 +- Patterns/Spanish/Spanish-NumbersWithUnit.yaml | 14 +++ .../resources/english_date_time.py | 1 + .../resources/french_date_time.py | 1 + .../resources/portuguese_date_time.py | 1 + .../resources/spanish_date_time.py | 8 +- .../number_with_unit/portuguese/extractors.py | 2 +- .../number_with_unit/spanish/extractors.py | 2 +- .../resources/portuguese_numeric_with_unit.py | 1 + .../resources/spanish_numeric_with_unit.py | 1 + .../DateTime/Spanish/DatePeriodExtractor.json | 36 +++++++ Specs/DateTime/Spanish/DatePeriodParser.json | 85 ++++++++++++++- Specs/DateTime/Spanish/DateTimeModel.json | 100 ++++++++++++++++++ Specs/NumberWithUnit/German/AgeModel.json | 6 ++ Specs/NumberWithUnit/Portuguese/AgeModel.json | 4 + Specs/NumberWithUnit/Spanish/AgeModel.json | 4 + 46 files changed, 407 insertions(+), 41 deletions(-) diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/German/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/German/NumbersWithUnitDefinitions.cs index 6b6e835162..f67cb813d6 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/German/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/German/NumbersWithUnitDefinitions.cs @@ -28,6 +28,17 @@ public static class NumbersWithUnitDefinitions { @"Week", @"woche alt|wochen alt|wochen|woche" }, { @"Day", @"tag alt|tage alt|tagen|tage" } }; + public static readonly IList AmbiguousAgeUnitList = new List + { + @"jahren", + @"jahre", + @"monaten", + @"monate", + @"wochen", + @"woche", + @"tagen", + @"tage" + }; public static readonly Dictionary AreaSuffixList = new Dictionary { { @"Square kilometer", @"qkm|quadratkilometer|km^2|km²" }, diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/NumbersWithUnitDefinitions.cs index 0cc6e8d8e8..b3abf00c56 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/NumbersWithUnitDefinitions.cs @@ -28,6 +28,18 @@ public static class NumbersWithUnitDefinitions { @"Semana", @"semanas|semana" }, { @"Dia", @"dias|dia" } }; + public static readonly IList AmbiguousAgeUnitList = new List + { + @"anos", + @"ano", + @"meses", + @"mes", + @"mês", + @"semanas", + @"semana", + @"dias", + @"dia" + }; public static readonly Dictionary AreaSuffixList = new Dictionary { { @"Quilômetro quadrado", @"quilômetro quadrado|quilómetro quadrado|quilometro quadrado|quilômetros quadrados|quilómetros quadrados|quilomeros quadrados|km2|km^2|km²" }, diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs index 7bf1a73623..7941920b03 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs @@ -50,7 +50,7 @@ public static class DateTimeDefinitions public static readonly string MonthFrontSimpleCasesRegex = $@"\b{MonthSuffixRegex}\s+((desde\s+el|desde|del)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*)(en\s+|del\s+|de\s+)?{YearRegex})?\b"; public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+((entre|entre\s+el)\s+)({DayRegex})\s*{AndRegex}\s*({DayRegex})((\s+|\s*,\s*)(en\s+|del\s+|de\s+)?{YearRegex})?\b"; public static readonly string DayBetweenRegex = $@"\b((entre|entre\s+el)\s+)({DayRegex})(\s+{MonthSuffixRegex})?\s*{AndRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*)(en\s+|del\s+|de\s+)?{YearRegex})?\b"; - public static readonly string OneWordPeriodRegex = $@"\b(((((la|el)\s+)?mes\s+(({OfPrepositionRegex})\s+))|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\s+))?({MonthRegex})|((la|el)\s+)?((({RelativeRegex}\s+){DateUnitRegex}(\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\s+{AfterNextSuffixRegex}))|va\s+de\s+{DateUnitRegex})"; + public static readonly string OneWordPeriodRegex = $@"\b(((((la|el)\s+)?mes\s+(({OfPrepositionRegex})\s+)?)|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\s+))?({MonthRegex})|((la|el)\s+)?((({RelativeRegex}\s+){DateUnitRegex}(\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\s+{AfterNextSuffixRegex}))|va\s+de\s+{DateUnitRegex})"; public static readonly string MonthWithYearRegex = $@"\b(((pr[oó]xim[oa](s)?|este|esta|[uú]ltim[oa]?)\s+)?({MonthRegex})(\s+|(\s*[,-]\s*))((de|del|de la)\s+)?({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\s+año))\b"; public static readonly string MonthNumWithYearRegex = $@"({YearRegex}(\s*?)[/\-\.~](\s*?){MonthNumRegex})|({MonthNumRegex}(\s*?)[/\-\.~](\s*?){YearRegex})"; public static readonly string WeekOfMonthRegex = $@"(?(la\s+)?(?primera?|1ra|segunda|2da|tercera?|3ra|cuarta|4ta|quinta|5ta|[uú]ltima)\s+semana\s+{MonthSuffixRegex})"; @@ -61,7 +61,7 @@ public static class DateTimeDefinitions public static readonly string QuarterRegex = $@"(el\s+)?{QuarterTermRegex}((\s+del?|\s*,\s*)?\s+({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\s+a[ñn]o|a[ñn]o(\s+{AfterNextSuffixRegex})))?"; public static readonly string QuarterRegexYearFront = $@"({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\s+a[ñn]o)\s+(el\s+)?{QuarterTermRegex}"; public const string AllHalfYearRegex = @"^[.]"; - public static readonly string EarlyPrefixRegex = $@"\b(?((comienzos|inicios)\s+({OfPrepositionRegex})))\b"; + public static readonly string EarlyPrefixRegex = $@"\b(?((comienzos?|inicios?)\s+({OfPrepositionRegex})))\b"; public static readonly string MidPrefixRegex = $@"\b(?(mediados\s+({OfPrepositionRegex})))\b"; public static readonly string LaterPrefixRegex = $@"\b(?((fines|finales)\s+({OfPrepositionRegex})))\b"; public static readonly string PrefixPeriodRegex = $@"({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex})"; @@ -462,6 +462,7 @@ public static class DateTimeDefinitions public static readonly string NextPrefixRegex = $@"(pr[oó]xim[oa]|siguiente|{UpcomingPrefixRegex})\b"; public const string PastPrefixRegex = @".^"; public static readonly string PreviousPrefixRegex = $@"([uú]ltim[oa]|{PastPrefixRegex})\b"; + public const string PreviousSuffixRegex = @"\b(pasad[ao])\b"; public const string ThisPrefixRegex = @"(est[ea])\b"; public const string RelativeDayRegex = @"(?((este|pr[oó]ximo|([uú]ltim(o|as|os)))\s+días)|(días\s+((que\s+viene)|pasado)))\b"; public const string RestOfDateRegex = @"\bresto\s+((del|de)\s+)?((la|el|est[ae])\s+)?(?semana|mes|año|decada)(\s+actual)?\b"; @@ -478,7 +479,7 @@ public static class DateTimeDefinitions public const string CommonDatePrefixRegex = @"^[\.]"; public const string DurationUnitRegex = @"^[\.]"; public const string DurationConnectorRegex = @"^[.]"; - public const string SuffixAfterRegex = @"^[.]"; + public const string SuffixAfterRegex = @"^[.](?!$)"; public const string YearPeriodRegex = @"^[.]"; public const string FutureSuffixRegex = @"\b(despu[ée]s)\b"; public static readonly Dictionary WrittenDecades = new Dictionary diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/NumbersWithUnitDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/NumbersWithUnitDefinitions.cs index 3cb30aa5cc..f476aa4460 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/NumbersWithUnitDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/NumbersWithUnitDefinitions.cs @@ -28,6 +28,19 @@ public static class NumbersWithUnitDefinitions { @"Semana", @"semanas|semana" }, { @"Día", @"dias|días|día|dia" } }; + public static readonly IList AmbiguousAgeUnitList = new List + { + @"años", + @"año", + @"meses", + @"mes", + @"semanas", + @"semana", + @"dias", + @"días", + @"día", + @"dia" + }; public static readonly Dictionary AreaSuffixList = new Dictionary { { @"Kilómetro cuadrado", @"kilómetro cuadrado|kilómetros cuadrados|km2|km^2|km²" }, diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDatePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDatePeriodParserConfiguration.cs index d5dcb87b6e..cd89e1785c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDatePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Parsers/SpanishDatePeriodParserConfiguration.cs @@ -1,4 +1,6 @@ -using System.Collections.Immutable; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Diagnostics; using System.Linq; using System.Text.RegularExpressions; @@ -15,9 +17,15 @@ public class SpanishDatePeriodParserConfiguration : BaseDateTimeOptionsConfigura public static readonly Regex PreviousPrefixRegex = new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags); + public static readonly Regex PreviousSuffixRegex = + new Regex(DateTimeDefinitions.PreviousSuffixRegex, RegexFlags); + public static readonly Regex ThisPrefixRegex = new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags); + public static readonly Regex AfterNextSuffixRegex = + new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags); + public static readonly Regex RelativeRegex = new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags); @@ -223,7 +231,7 @@ public int GetSwiftDayOrMonth(string text) swift = 1; } - if (PreviousPrefixRegex.IsMatch(trimmedText)) + if (PreviousPrefixRegex.IsMatch(trimmedText) || PreviousSuffixRegex.IsMatch(trimmedText)) { swift = -1; } @@ -267,7 +275,8 @@ public bool IsLastCardinal(string text) public bool IsMonthOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o)) || + (DateTimeDefinitions.MonthTerms.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); } public bool IsMonthToDate(string text) @@ -279,20 +288,23 @@ public bool IsMonthToDate(string text) public bool IsWeekend(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)) || + (DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); } public bool IsWeekOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)) && - !DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)); + return (DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)) || + (DateTimeDefinitions.WeekTerms.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText))) && + !DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.Contains(o)); } public bool IsYearOnly(string text) { var trimmedText = text.Trim(); - return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o)); + return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o)) || + (DateTimeDefinitions.YearTerms.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText)); } public bool IsYearToDate(string text) diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AgeExtractorConfiguration.cs index 816a5d3631..86bdd028b8 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/German/Extractors/AgeExtractorConfiguration.cs @@ -9,6 +9,8 @@ public class AgeExtractorConfiguration : GermanNumberWithUnitExtractorConfigurat { public static readonly ImmutableDictionary AgeSuffixList = NumbersWithUnitDefinitions.AgeSuffixList.ToImmutableDictionary(); + public static readonly ImmutableList AmbiguousAgeUnitList = NumbersWithUnitDefinitions.AmbiguousAgeUnitList.ToImmutableList(); + public AgeExtractorConfiguration() : this(new CultureInfo(Culture.German)) { @@ -23,7 +25,7 @@ public AgeExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => null; - public override ImmutableList AmbiguousUnitList => null; + public override ImmutableList AmbiguousUnitList => AmbiguousAgeUnitList; public override string ExtractType => Constants.SYS_UNIT_AGE; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AgeExtractorConfiguration.cs index b017ac3924..ba22e831e0 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Portuguese/Extractors/AgeExtractorConfiguration.cs @@ -9,6 +9,8 @@ public class AgeExtractorConfiguration : PortugueseNumberWithUnitExtractorConfig { public static readonly ImmutableDictionary AgeSuffixList = NumbersWithUnitDefinitions.AgeSuffixList.ToImmutableDictionary(); + public static readonly ImmutableList AmbiguousAgeUnitList = NumbersWithUnitDefinitions.AmbiguousAgeUnitList.ToImmutableList(); + public AgeExtractorConfiguration() : this(new CultureInfo(Culture.Portuguese)) { @@ -23,7 +25,7 @@ public AgeExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => null; - public override ImmutableList AmbiguousUnitList => null; + public override ImmutableList AmbiguousUnitList => AmbiguousAgeUnitList; public override string ExtractType => Constants.SYS_UNIT_AGE; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AgeExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AgeExtractorConfiguration.cs index 712be233a6..abe8cd54e1 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AgeExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/AgeExtractorConfiguration.cs @@ -9,6 +9,8 @@ public class AgeExtractorConfiguration : SpanishNumberWithUnitExtractorConfigura { public static readonly ImmutableDictionary AgeSuffixList = NumbersWithUnitDefinitions.AgeSuffixList.ToImmutableDictionary(); + public static readonly ImmutableList AmbiguousAgeUnitList = NumbersWithUnitDefinitions.AmbiguousAgeUnitList.ToImmutableList(); + public AgeExtractorConfiguration() : this(new CultureInfo(Culture.Spanish)) { @@ -23,7 +25,7 @@ public AgeExtractorConfiguration(CultureInfo ci) public override ImmutableDictionary PrefixList => null; - public override ImmutableList AmbiguousUnitList => null; + public override ImmutableList AmbiguousUnitList => AmbiguousAgeUnitList; public override string ExtractType => Constants.SYS_UNIT_AGE; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs index 11f671712e..9f27801d34 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs @@ -5,6 +5,7 @@ using Microsoft.Recognizers.Definitions; using Microsoft.Recognizers.Definitions.Spanish; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.Number; using Microsoft.Recognizers.Text.Number.Spanish; diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java index d57fb8e232..84b76ca08c 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java @@ -393,6 +393,8 @@ public class EnglishDateTime { public static final String WeekDayEnd = "(this\\s+)?{WeekDayRegex}\\s*,?\\s*$" .replace("{WeekDayRegex}", WeekDayRegex); + public static final String WeekDayStart = "^[\\.]"; + public static final String RangeUnitRegex = "\\b(?years?|months?|weeks?)\\b"; public static final String HourNumRegex = "\\b(?zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\\b"; diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java index 9ab99c8ac5..6af24f15fe 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java @@ -263,6 +263,8 @@ public class FrenchDateTime { public static final String WeekDayEnd = "{WeekDayRegex}\\s*,?\\s*$" .replace("{WeekDayRegex}", WeekDayRegex); + public static final String WeekDayStart = "^[\\.]"; + public static final String RangeUnitRegex = "\\b(?(l')?ann[eé]e(s)?|mois|semaines?)\\b"; public static final String HourNumRegex = "\\b(?zero|[aá]\\s+une?|deux|trois|quatre|cinq|six|sept|huit|neuf|onze|douze|treize|quatorze|quinze|dix-six|dix-sept|dix-huit|dix-neuf|vingt|vingt-et-un|vingt-deux|vingt-trois|dix)\\b"; diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java index 8af0bb55eb..f5854600e0 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java @@ -196,6 +196,8 @@ public class PortugueseDateTime { public static final String WeekDayEnd = "{WeekDayRegex}\\s*,?\\s*$" .replace("{WeekDayRegex}", WeekDayRegex); + public static final String WeekDayStart = "^[\\.]"; + public static final String DateYearRegex = "(?{YearRegex}|{TwoDigitYearRegex})" .replace("{YearRegex}", YearRegex) .replace("{TwoDigitYearRegex}", TwoDigitYearRegex); diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java index 2800adcda5..bd360a9266 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/SpanishDateTime.java @@ -107,7 +107,7 @@ public class SpanishDateTime { .replace("{MonthSuffixRegex}", MonthSuffixRegex) .replace("{YearRegex}", YearRegex); - public static final String OneWordPeriodRegex = "\\b(((((la|el)\\s+)?mes\\s+(({OfPrepositionRegex})\\s+))|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\\s+))?({MonthRegex})|((la|el)\\s+)?((({RelativeRegex}\\s+){DateUnitRegex}(\\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\\s+{AfterNextSuffixRegex}))|va\\s+de\\s+{DateUnitRegex})" + public static final String OneWordPeriodRegex = "\\b(((((la|el)\\s+)?mes\\s+(({OfPrepositionRegex})\\s+)?)|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\\s+))?({MonthRegex})|((la|el)\\s+)?((({RelativeRegex}\\s+){DateUnitRegex}(\\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\\s+{AfterNextSuffixRegex}))|va\\s+de\\s+{DateUnitRegex})" .replace("{MonthRegex}", MonthRegex) .replace("{RelativeRegex}", RelativeRegex) .replace("{OfPrepositionRegex}", OfPrepositionRegex) @@ -148,7 +148,7 @@ public class SpanishDateTime { public static final String AllHalfYearRegex = "^[.]"; - public static final String EarlyPrefixRegex = "\\b(?((comienzos|inicios)\\s+({OfPrepositionRegex})))\\b" + public static final String EarlyPrefixRegex = "\\b(?((comienzos?|inicios?)\\s+({OfPrepositionRegex})))\\b" .replace("{OfPrepositionRegex}", OfPrepositionRegex); public static final String MidPrefixRegex = "\\b(?(mediados\\s+({OfPrepositionRegex})))\\b" @@ -238,6 +238,8 @@ public class SpanishDateTime { public static final String WeekDayEnd = "{WeekDayRegex}\\s*,?\\s*$" .replace("{WeekDayRegex}", WeekDayRegex); + public static final String WeekDayStart = "^[\\.]"; + public static final String DateYearRegex = "(?{YearRegex}|{TwoDigitYearRegex})" .replace("{YearRegex}", YearRegex) .replace("{TwoDigitYearRegex}", TwoDigitYearRegex); @@ -890,6 +892,8 @@ public class SpanishDateTime { public static final String PreviousPrefixRegex = "([uú]ltim[oa]|{PastPrefixRegex})\\b" .replace("{PastPrefixRegex}", PastPrefixRegex); + public static final String PreviousSuffixRegex = "\\b(pasad[ao])\\b"; + public static final String ThisPrefixRegex = "(est[ea])\\b"; public static final String RelativeDayRegex = "(?((este|pr[oó]ximo|([uú]ltim(o|as|os)))\\s+días)|(días\\s+((que\\s+viene)|pasado)))\\b"; @@ -924,7 +928,7 @@ public class SpanishDateTime { public static final String DurationConnectorRegex = "^[.]"; - public static final String SuffixAfterRegex = "^[.]"; + public static final String SuffixAfterRegex = "^[.](?!$)"; public static final String YearPeriodRegex = "^[.]"; diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/spanish/parsers/SpanishDatePeriodParserConfiguration.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/spanish/parsers/SpanishDatePeriodParserConfiguration.java index 48c44ff4f5..eadd26ad45 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/spanish/parsers/SpanishDatePeriodParserConfiguration.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/spanish/parsers/SpanishDatePeriodParserConfiguration.java @@ -24,6 +24,7 @@ public class SpanishDatePeriodParserConfiguration extends BaseOptionsConfigurati public static final Pattern nextPrefixRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.NextPrefixRegex); public static final Pattern previousPrefixRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.PreviousPrefixRegex); + public static final Pattern previousSuffixRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.PreviousSuffixRegex); public static final Pattern thisPrefixRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.ThisPrefixRegex); public static final Pattern relativeRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.RelativeRegex); public static final Pattern unspecificEndOfRangeRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.UnspecificEndOfRangeRegex); @@ -88,6 +89,8 @@ public SpanishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c numbers = config.getNumbers(); writtenDecades = config.getWrittenDecades(); specialDecadeCases = config.getSpecialDecadeCases(); + + afterNextSuffixRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.AfterNextSuffixRegex); } // Regex @@ -184,6 +187,8 @@ public SpanishDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration c private final Pattern centurySuffixRegex; + private final Pattern afterNextSuffixRegex; + private final Pattern nowRegex; // Dictionaries @@ -509,11 +514,12 @@ public int getSwiftDayOrMonth(String text) { int swift = 0; Optional matchNext = Arrays.stream(RegExpUtility.getMatches(nextPrefixRegex, trimmedText)).findFirst(); - Optional matchPast = Arrays.stream(RegExpUtility.getMatches(previousPrefixRegex, trimmedText)).findFirst(); + Optional matchPastPrefix = Arrays.stream(RegExpUtility.getMatches(previousPrefixRegex, trimmedText)).findFirst(); + Optional matchPastSuffix = Arrays.stream(RegExpUtility.getMatches(previousSuffixRegex, trimmedText)).findFirst(); if (matchNext.isPresent()) { swift = 1; - } else if (matchPast.isPresent()) { + } else if (matchPastPrefix.isPresent() || matchPastSuffix.isPresent()) { swift = -1; } @@ -561,7 +567,9 @@ public boolean isLastCardinal(String text) { @Override public boolean isMonthOnly(String text) { String trimmedText = text.trim().toLowerCase(); - return SpanishDateTime.MonthTerms.stream().anyMatch(o -> trimmedText.endsWith(o)); + Optional matchAfterNext = Arrays.stream(RegExpUtility.getMatches(afterNextSuffixRegex, trimmedText)).findFirst(); + return SpanishDateTime.MonthTerms.stream().anyMatch(o -> trimmedText.endsWith(o)) || + SpanishDateTime.MonthTerms.stream().anyMatch(o -> trimmedText.contains(o)) && matchAfterNext.isPresent(); } @Override @@ -573,13 +581,17 @@ public boolean isMonthToDate(String text) { @Override public boolean isWeekend(String text) { String trimmedText = text.trim().toLowerCase(); - return SpanishDateTime.WeekendTerms.stream().anyMatch(o -> trimmedText.endsWith(o)); + Optional matchAfterNext = Arrays.stream(RegExpUtility.getMatches(afterNextSuffixRegex, trimmedText)).findFirst(); + return SpanishDateTime.WeekendTerms.stream().anyMatch(o -> trimmedText.endsWith(o)) || + SpanishDateTime.WeekendTerms.stream().anyMatch(o -> trimmedText.contains(o)) && matchAfterNext.isPresent(); } @Override public boolean isWeekOnly(String text) { String trimmedText = text.trim().toLowerCase(); - return SpanishDateTime.WeekTerms.stream().anyMatch(o -> trimmedText.endsWith(o)) && + Optional matchAfterNext = Arrays.stream(RegExpUtility.getMatches(afterNextSuffixRegex, trimmedText)).findFirst(); + return (SpanishDateTime.WeekTerms.stream().anyMatch(o -> trimmedText.endsWith(o)) || + SpanishDateTime.WeekTerms.stream().anyMatch(o -> trimmedText.contains(o)) && matchAfterNext.isPresent()) && !SpanishDateTime.WeekendTerms.stream().anyMatch(o -> trimmedText.endsWith(o)); } diff --git a/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/extractors/AgeExtractorConfiguration.java b/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/extractors/AgeExtractorConfiguration.java index fd0ce0e9a0..102c5afd7f 100644 --- a/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/extractors/AgeExtractorConfiguration.java +++ b/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/german/extractors/AgeExtractorConfiguration.java @@ -36,7 +36,7 @@ public Map getPrefixList() { @Override public List getAmbiguousUnitList() { - return Collections.emptyList(); + return GermanNumericWithUnit.AmbiguousAgeUnitList; } public static Map AgeSuffixList = GermanNumericWithUnit.AgeSuffixList; diff --git a/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/portuguese/extractors/AgeExtractorConfiguration.java b/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/portuguese/extractors/AgeExtractorConfiguration.java index 888469e61b..f189802183 100644 --- a/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/portuguese/extractors/AgeExtractorConfiguration.java +++ b/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/portuguese/extractors/AgeExtractorConfiguration.java @@ -36,7 +36,7 @@ public Map getPrefixList() { @Override public List getAmbiguousUnitList() { - return Collections.emptyList(); + return PortugueseNumericWithUnit.AmbiguousAgeUnitList; } public static Map AgeSuffixList = PortugueseNumericWithUnit.AgeSuffixList; diff --git a/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/GermanNumericWithUnit.java b/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/GermanNumericWithUnit.java index 57be9ca31b..33fcd85e8d 100644 --- a/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/GermanNumericWithUnit.java +++ b/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/GermanNumericWithUnit.java @@ -26,6 +26,8 @@ public class GermanNumericWithUnit { .put("Day", "tag alt|tage alt|tagen|tage") .build(); + public static final List AmbiguousAgeUnitList = Arrays.asList("jahren", "jahre", "monaten", "monate", "wochen", "woche", "tagen", "tage"); + public static final ImmutableMap AreaSuffixList = ImmutableMap.builder() .put("Square kilometer", "qkm|quadratkilometer|km^2|km²") .put("Square hectometer", "qhm|quadrathektometer|hm^2|hm²|hektar") diff --git a/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/PortugueseNumericWithUnit.java b/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/PortugueseNumericWithUnit.java index ba5ba9254e..2bb970523d 100644 --- a/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/PortugueseNumericWithUnit.java +++ b/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/PortugueseNumericWithUnit.java @@ -26,6 +26,8 @@ public class PortugueseNumericWithUnit { .put("Dia", "dias|dia") .build(); + public static final List AmbiguousAgeUnitList = Arrays.asList("anos", "ano", "meses", "mes", "mês", "semanas", "semana", "dias", "dia"); + public static final ImmutableMap AreaSuffixList = ImmutableMap.builder() .put("Quilômetro quadrado", "quilômetro quadrado|quilómetro quadrado|quilometro quadrado|quilômetros quadrados|quilómetros quadrados|quilomeros quadrados|km2|km^2|km²") .put("Hectare", "hectômetro quadrado|hectómetro quadrado|hectômetros quadrados|hectómetros cuadrados|hm2|hm^2|hm²|hectare|hectares") diff --git a/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/SpanishNumericWithUnit.java b/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/SpanishNumericWithUnit.java index 10cc6b5cf5..65c534c3e7 100644 --- a/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/SpanishNumericWithUnit.java +++ b/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/resources/SpanishNumericWithUnit.java @@ -26,6 +26,8 @@ public class SpanishNumericWithUnit { .put("Día", "dias|días|día|dia") .build(); + public static final List AmbiguousAgeUnitList = Arrays.asList("años", "año", "meses", "mes", "semanas", "semana", "dias", "días", "día", "dia"); + public static final ImmutableMap AreaSuffixList = ImmutableMap.builder() .put("Kilómetro cuadrado", "kilómetro cuadrado|kilómetros cuadrados|km2|km^2|km²") .put("Hectómetro cuadrado", "hectómetro cuadrado|hectómetros cuadrados|hm2|hm^2|hm²|hectárea|hectáreas") diff --git a/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/spanish/extractors/AgeExtractorConfiguration.java b/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/spanish/extractors/AgeExtractorConfiguration.java index 26c3155389..3255c16396 100644 --- a/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/spanish/extractors/AgeExtractorConfiguration.java +++ b/Java/libraries/recognizers-text-number-with-unit/src/main/java/com/microsoft/recognizers/text/numberwithunit/spanish/extractors/AgeExtractorConfiguration.java @@ -36,7 +36,7 @@ public Map getPrefixList() { @Override public List getAmbiguousUnitList() { - return Collections.emptyList(); + return SpanishNumericWithUnit.AmbiguousAgeUnitList; } public static Map AgeSuffixList = SpanishNumericWithUnit.AgeSuffixList; diff --git a/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts index 8c302da04d..cba5e221dc 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts @@ -119,6 +119,7 @@ export namespace EnglishDateTime { export const OfMonth = `^\\s*of\\s*${MonthRegex}`; export const MonthEnd = `${MonthRegex}\\s*(the)?\\s*$`; export const WeekDayEnd = `(this\\s+)?${WeekDayRegex}\\s*,?\\s*$`; + export const WeekDayStart = `^[\\.]`; export const RangeUnitRegex = `\\b(?years?|months?|weeks?)\\b`; export const HourNumRegex = `\\b(?zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\\b`; export const MinuteNumRegex = `(?ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|thirty|forty|fifty|one|two|three|five|eight)`; diff --git a/JavaScript/packages/recognizers-date-time/src/resources/frenchDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/frenchDateTime.ts index bf91eceacd..1cccff9960 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/frenchDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/frenchDateTime.ts @@ -83,6 +83,7 @@ export namespace FrenchDateTime { export const OfMonth = `^\\s*de\\s*${MonthRegex}`; export const MonthEnd = `${MonthRegex}\\s*(le)?\\s*$`; export const WeekDayEnd = `${WeekDayRegex}\\s*,?\\s*$`; + export const WeekDayStart = `^[\\.]`; export const RangeUnitRegex = `\\b(?(l')?ann[eé]e(s)?|mois|semaines?)\\b`; export const HourNumRegex = `\\b(?zero|[aá]\\s+une?|deux|trois|quatre|cinq|six|sept|huit|neuf|onze|douze|treize|quatorze|quinze|dix-six|dix-sept|dix-huit|dix-neuf|vingt|vingt-et-un|vingt-deux|vingt-trois|dix)\\b`; export const MinuteNumRegex = `(?un|deux|trois|quatre|cinq|six|sept|huit|neuf|onze|douze|treize|quatorze|quinze|seize|dix-sept|dix-huit|dix-neuf|vingt|trente|quarante|cinquante|dix)`; diff --git a/JavaScript/packages/recognizers-date-time/src/resources/portugueseDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/portugueseDateTime.ts index 616547dbda..10f1ddd2fd 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/portugueseDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/portugueseDateTime.ts @@ -76,6 +76,7 @@ export namespace PortugueseDateTime { export const OfMonthRegex = `^\\s*de\\s*${MonthSuffixRegex}`; export const MonthEndRegex = `(${MonthRegex}\\s*(o)?\\s*$)`; export const WeekDayEnd = `${WeekDayRegex}\\s*,?\\s*$`; + export const WeekDayStart = `^[\\.]`; export const DateYearRegex = `(?${YearRegex}|${TwoDigitYearRegex})`; export const DateExtractor1 = `\\b(${WeekDayRegex}(\\s+|\\s*,\\s*))?${DayRegex}?((\\s*(de)|[/\\\\\\.\\-])\\s*)?${MonthRegex}\\b`; export const DateExtractor2 = `\\b(${WeekDayRegex}(\\s+|\\s*,\\s*))?${DayRegex}\\s*([\\.\\-]|de)?\\s*${MonthRegex}?(\\s*(,|de)\\s*)${YearRegex}\\b`; diff --git a/JavaScript/packages/recognizers-date-time/src/resources/spanishDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/spanishDateTime.ts index 270019c840..ee0237cd95 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/spanishDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/spanishDateTime.ts @@ -40,7 +40,7 @@ export namespace SpanishDateTime { export const MonthFrontSimpleCasesRegex = `\\b${MonthSuffixRegex}\\s+((desde\\s+el|desde|del)\\s+)?(${DayRegex})\\s*${TillRegex}\\s*(${DayRegex})((\\s+|\\s*,\\s*)(en\\s+|del\\s+|de\\s+)?${YearRegex})?\\b`; export const MonthFrontBetweenRegex = `\\b${MonthSuffixRegex}\\s+((entre|entre\\s+el)\\s+)(${DayRegex})\\s*${AndRegex}\\s*(${DayRegex})((\\s+|\\s*,\\s*)(en\\s+|del\\s+|de\\s+)?${YearRegex})?\\b`; export const DayBetweenRegex = `\\b((entre|entre\\s+el)\\s+)(${DayRegex})(\\s+${MonthSuffixRegex})?\\s*${AndRegex}\\s*(${DayRegex})\\s+${MonthSuffixRegex}((\\s+|\\s*,\\s*)(en\\s+|del\\s+|de\\s+)?${YearRegex})?\\b`; - export const OneWordPeriodRegex = `\\b(((((la|el)\\s+)?mes\\s+((${OfPrepositionRegex})\\s+))|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\\s+))?(${MonthRegex})|((la|el)\\s+)?(((${RelativeRegex}\\s+)${DateUnitRegex}(\\s+${AfterNextSuffixRegex})?)|${DateUnitRegex}(\\s+${AfterNextSuffixRegex}))|va\\s+de\\s+${DateUnitRegex})`; + export const OneWordPeriodRegex = `\\b(((((la|el)\\s+)?mes\\s+((${OfPrepositionRegex})\\s+)?)|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\\s+))?(${MonthRegex})|((la|el)\\s+)?(((${RelativeRegex}\\s+)${DateUnitRegex}(\\s+${AfterNextSuffixRegex})?)|${DateUnitRegex}(\\s+${AfterNextSuffixRegex}))|va\\s+de\\s+${DateUnitRegex})`; export const MonthWithYearRegex = `\\b(((pr[oó]xim[oa](s)?|este|esta|[uú]ltim[oa]?)\\s+)?(${MonthRegex})(\\s+|(\\s*[,-]\\s*))((de|del|de la)\\s+)?(${YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\\s+año))\\b`; export const MonthNumWithYearRegex = `(${YearRegex}(\\s*?)[/\\-\\.~](\\s*?)${MonthNumRegex})|(${MonthNumRegex}(\\s*?)[/\\-\\.~](\\s*?)${YearRegex})`; export const WeekOfMonthRegex = `(?(la\\s+)?(?primera?|1ra|segunda|2da|tercera?|3ra|cuarta|4ta|quinta|5ta|[uú]ltima)\\s+semana\\s+${MonthSuffixRegex})`; @@ -51,7 +51,7 @@ export namespace SpanishDateTime { export const QuarterRegex = `(el\\s+)?${QuarterTermRegex}((\\s+del?|\\s*,\\s*)?\\s+(${YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\\s+a[ñn]o|a[ñn]o(\\s+${AfterNextSuffixRegex})))?`; export const QuarterRegexYearFront = `(${YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\\s+a[ñn]o)\\s+(el\\s+)?${QuarterTermRegex}`; export const AllHalfYearRegex = `^[.]`; - export const EarlyPrefixRegex = `\\b(?((comienzos|inicios)\\s+(${OfPrepositionRegex})))\\b`; + export const EarlyPrefixRegex = `\\b(?((comienzos?|inicios?)\\s+(${OfPrepositionRegex})))\\b`; export const MidPrefixRegex = `\\b(?(mediados\\s+(${OfPrepositionRegex})))\\b`; export const LaterPrefixRegex = `\\b(?((fines|finales)\\s+(${OfPrepositionRegex})))\\b`; export const PrefixPeriodRegex = `(${EarlyPrefixRegex}|${MidPrefixRegex}|${LaterPrefixRegex})`; @@ -86,6 +86,7 @@ export namespace SpanishDateTime { export const OfMonthRegex = `^\\s*de\\s*${MonthSuffixRegex}`; export const MonthEndRegex = `(${MonthRegex}\\s*(el)?\\s*$)`; export const WeekDayEnd = `${WeekDayRegex}\\s*,?\\s*$`; + export const WeekDayStart = `^[\\.]`; export const DateYearRegex = `(?${YearRegex}|${TwoDigitYearRegex})`; export const DateExtractor1 = `\\b(${WeekDayRegex}(\\s+|\\s*,\\s*))?${DayRegex}?((\\s*(de)|[/\\\\\\.\\-])\\s*)?${MonthRegex}\\b`; export const DateExtractor2 = `\\b(${WeekDayRegex}(\\s+|\\s*,\\s*))?${DayRegex}\\s*([\\.\\-]|de)\\s*${MonthRegex}(\\s*,\\s*|\\s*(del?)\\s*)${DateYearRegex}\\b`; @@ -217,6 +218,7 @@ export namespace SpanishDateTime { export const NextPrefixRegex = `(pr[oó]xim[oa]|siguiente|${UpcomingPrefixRegex})\\b`; export const PastPrefixRegex = `.^`; export const PreviousPrefixRegex = `([uú]ltim[oa]|${PastPrefixRegex})\\b`; + export const PreviousSuffixRegex = `\\b(pasad[ao])\\b`; export const ThisPrefixRegex = `(est[ea])\\b`; export const RelativeDayRegex = `(?((este|pr[oó]ximo|([uú]ltim(o|as|os)))\\s+días)|(días\\s+((que\\s+viene)|pasado)))\\b`; export const RestOfDateRegex = `\\bresto\\s+((del|de)\\s+)?((la|el|est[ae])\\s+)?(?semana|mes|año|decada)(\\s+actual)?\\b`; @@ -233,7 +235,7 @@ export namespace SpanishDateTime { export const CommonDatePrefixRegex = `^[\\.]`; export const DurationUnitRegex = `^[\\.]`; export const DurationConnectorRegex = `^[.]`; - export const SuffixAfterRegex = `^[.]`; + export const SuffixAfterRegex = `^[.](?!$)`; export const YearPeriodRegex = `^[.]`; export const FutureSuffixRegex = `\\b(despu[ée]s)\\b`; export const WrittenDecades: ReadonlyMap = new Map([["", 0]]); diff --git a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/portuguese/age.ts b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/portuguese/age.ts index 9b05b5dadf..d13069563f 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/portuguese/age.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/portuguese/age.ts @@ -20,7 +20,7 @@ export class PortugueseAgeExtractorConfiguration extends PortugueseNumberWithUni this.suffixList = PortugueseNumericWithUnit.AgeSuffixList; this.prefixList = new Map(); - this.ambiguousUnitList = new Array(); + this.ambiguousUnitList = PortugueseNumericWithUnit.AmbiguousAgeUnitList; } } diff --git a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/spanish/age.ts b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/spanish/age.ts index ece35ddbeb..3e822da6c0 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/spanish/age.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/numberWithUnit/spanish/age.ts @@ -20,7 +20,7 @@ export class SpanishAgeExtractorConfiguration extends SpanishNumberWithUnitExtra this.suffixList = SpanishNumericWithUnit.AgeSuffixList; this.prefixList = new Map(); - this.ambiguousUnitList = new Array(); + this.ambiguousUnitList = SpanishNumericWithUnit.AmbiguousAgeUnitList; } } diff --git a/JavaScript/packages/recognizers-number-with-unit/src/resources/portugueseNumericWithUnit.ts b/JavaScript/packages/recognizers-number-with-unit/src/resources/portugueseNumericWithUnit.ts index 20966f48f0..17aeb00346 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/resources/portugueseNumericWithUnit.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/resources/portugueseNumericWithUnit.ts @@ -12,6 +12,7 @@ import { BaseNumbers } from "./baseNumbers"; export namespace PortugueseNumericWithUnit { export const AgeSuffixList: ReadonlyMap = new Map([["Ano", "anos|ano"],["Mês", "meses|mes|mês"],["Semana", "semanas|semana"],["Dia", "dias|dia"]]); + export const AmbiguousAgeUnitList = [ "anos","ano","meses","mes","mês","semanas","semana","dias","dia" ]; export const AreaSuffixList: ReadonlyMap = new Map([["Quilômetro quadrado", "quilômetro quadrado|quilómetro quadrado|quilometro quadrado|quilômetros quadrados|quilómetros quadrados|quilomeros quadrados|km2|km^2|km²"],["Hectare", "hectômetro quadrado|hectómetro quadrado|hectômetros quadrados|hectómetros cuadrados|hm2|hm^2|hm²|hectare|hectares"],["Decâmetro quadrado", "decâmetro quadrado|decametro quadrado|decâmetros quadrados|decametro quadrado|dam2|dam^2|dam²|are|ares"],["Metro quadrado", "metro quadrado|metros quadrados|m2|m^2|m²"],["Decímetro quadrado", "decímetro quadrado|decimentro quadrado|decímetros quadrados|decimentros quadrados|dm2|dm^2|dm²"],["Centímetro quadrado", "centímetro quadrado|centimetro quadrado|centímetros quadrados|centrimetros quadrados|cm2|cm^2|cm²"],["Milímetro quadrado", "milímetro quadrado|milimetro quadrado|milímetros quadrados|militmetros quadrados|mm2|mm^2|mm²"],["Polegada quadrada", "polegada quadrada|polegadas quadradas|in2|in^2|in²"],["Pé quadrado", "pé quadrado|pe quadrado|pés quadrados|pes quadrados|pé2|pé^2|pé²|sqft|sq ft|ft2|ft^2|ft²"],["Jarda quadrada", "jarda quadrada|jardas quadradas|yd2|yd^2|yd²"],["Milha quadrada", "milha quadrada|milhas quadradas|mi2|mi^2|mi²"],["Acre", "acre|acres"]]); export const CurrencySuffixList: ReadonlyMap = new Map([["Dólar", "dólar|dolar|dólares|dolares"],["Peso", "peso|pesos"],["Coroa", "coroa|coroas"],["Rublo", "rublo|rublos"],["Libra", "libra|libras"],["Florim", "florim|florins|ƒ"],["Dinar", "dinar|dinares"],["Franco", "franco|francos"],["Rupia", "rúpia|rupia|rúpias|rupias"],["Escudo", "escudo|escudos"],["Xelim", "xelim|xelins|xelims"],["Lira", "lira|liras"],["Centavo", "centavo|cêntimo|centimo|centavos|cêntimos|centimo"],["Centésimo", "centésimo|centésimos"],["Pêni", "pêni|péni|peni|penies|pennies"],["Manat", "manat|manate|mánate|man|manats|manates|mánates"],["Euro", "euro|euros|€|eur"],["Centavo de Euro", "centavo de euro|cêntimo de euro|centimo de euro|centavos de euro|cêntimos de euro|centimos de euro"],["Dólar do Caribe Oriental", "dólar do Caribe Oriental|dolar do Caribe Oriental|dólares do Caribe Oriental|dolares do Caribe Oriental|dólar das Caraíbas Orientais|dolar das Caraibas Orientais|dólares das Caraíbas Orientais|dolares das Caraibas Orientais|ec$|xcd"],["Centavo do Caribe Oriental", "centavo do Caribe Oriental|centavo das Caraíbas Orientais|cêntimo do Caribe Oriental|cêntimo das Caraíbas Orientais|centavos do Caribe Oriental|centavos das Caraíbas Orientais|cêntimos do Caribe Oriental|cêntimos das Caraíbas Orientais"],["Franco CFA da África Ocidental", "franco CFA da África Ocidental|franco CFA da Africa Ocidental|francos CFA da África Occidental|francos CFA da Africa Occidental|franco CFA Ocidental|xof"],["Centavo de CFA da África Ocidental", "centavo de CFA da Africa Occidental|centavos de CFA da África Ocidental|cêntimo de CFA da Africa Occidental|cêntimos de CFA da África Ocidental"],["Franco CFA da África Central", "franco CFA da África Central|franco CFA da Africa Central|francos CFA da África Central|francos CFA da Africa Central|franco CFA central|xaf"],["Centavo de CFA da África Central", "centavo de CFA de África Central|centavos de CFA da África Central|cêntimo de CFA de África Central|cêntimos de CFA da África Central"],["Apsar abcásio", "apsar abcásio|apsar abecásio|apsar abcasio|apsar|apsares"],["Afegani afegão", "afegani afegão|afegane afegão|؋|afn|afegane|afgane|afegâni|afeganis|afeganes|afganes|afegânis"],["Pul", "pul|pules|puls"],["Lek albanês", "lek|lekë|lekes|lek albanês|leque|leques|all"],["Qindarke", "qindarka|qindarkë|qindarke|qindarkas"],["Kwanza angolano", "kwanza angolano|kwanzas angolanos|kwanza|kwanzas|aoa|kz"],["Cêntimo angolano", "cêntimo angolano"],["Florim das Antilhas Holandesas", "florim das antilhas holandesas|florim das antilhas neerlandesas|ang"],["Rial saudita", "rial saudita|riais sauditas|riyal saudita|riyals sauditas|riyal|riyals|sar"],["Halala saudita", "halala saudita|halala|hallalah"],["Dinar argelino", "dinar argelino|dinares argelinos|dzd"],["Cêntimo argelino", "centimo argelino|centimos argelinos|cêntimo argelino|cêntimos argelinos|centavo argelino|centavos argelinos"],["Peso argentino", "peso argentino|pesos argentinos|ar$|ars"],["Centavo argentino", "centavo argentino|centavos argentinos|ctvo.|ctvos."],["Dram armênio", "dram armênio|dram armênios|dram arménio|dram arménios|dram armenio|dram armenios|dram|drame|drames|դր."],["Luma armênio", "luma armênio|lumas armênios|luma arménio|lumas arménios|luma armenio|lumas armenios|luma|lumas"],["Florim arubano", "florín arubeño|florines arubeños|ƒ arubeños|aƒ|awg"],["Dólar australiano", "dólar australiano|dólares australianos|dolar australiano|dolares australianos|a$|aud"],["Centavo australiano", "centavo australiano|centavos australianos"],["Manat azeri", "manat azeri|manats azeris|azn|manat azerbaijanês|manat azerbaijano|manats azerbaijaneses|manats azerbaijanos"],["Qəpik azeri", "qəpik azeri|qəpik|qəpiks"],["Dólar bahamense", "dólar bahamense|dólares bahamense|dolar bahamense|dolares bahamense|dólar baamiano|dólares baamiano|dolar baamiano|dolares baamiano|b$|bsd"],["Centavo bahamense", "centavo bahamense|centavos bahamense"],["Dinar bareinita", "dinar bareinita|dinar baremita|dinares bareinitas|dinares baremitas|bhd"],["Fil bareinita", "fil bareinita|fil baremita|fils bareinitas|fils baremitas"],["Taka bengali", "taka bengali|takas bengalis|taca|tacas|taka|takas|bdt"],["Poisha bengali", "poisha bengali|poishas bengalis"],["Dólar de Barbados", "dólar de barbados|dólares de barbados|dolar de barbados|dolares de barbados|dólar dos barbados|dólares dos barbados|bbd"],["Centavo de Barbados", "centavo de barbados|centavos de barbados|centavo dos barbados|centavos dos barbados"],["Dólar de Belize", "dólar de belize|dólares de belize|dolar de belize|dolares de belize|dólar do belize|dólares do belize|dolar do belize|dolares do belize|bz$|bzd"],["Centavo de Belize", "centavo de belize|centavos de belize|cêntimo do belize|cêntimos do belize"],["Dólar bermudense", "dólar bermudense|dólares bermudenses|bd$|bmd"],["Centavo bermudense", "centavo bermudense|centavos bermudenses|cêntimo bermudense| cêntimos bermudenses"],["Rublo bielorrusso", "rublo bielorrusso|rublos bielorrussos|byr"],["Copeque bielorusso", "copeque bielorrusso|copeques bielorrussos|kopek bielorrusso|kopeks bielorrussos|kap"],["Quiate mianmarense", "quiate mianmarense|quiates mianmarenses|kyat mianmarense|kyates mianmarenses|quiate myanmarense|quiates myanmarenses|kyat myanmarense|kyates myanmarenses|quiate birmanês|quite birmanes|quiates birmaneses|kyat birmanês|kyat birmanes|kyates birmaneses|mmk"],["Pya mianmarense", "pya mianmarense|pyas mianmarenses|pya myanmarense|pyas myanmarenses|pya birmanês|pya birmanes|pyas birmaneses"],["Boliviano", "boliviano|bolivianos|bob|bs"],["Centavo Boliviano", "centavo boliviano|centavos bolivianos"],["Marco da Bósnia e Herzegovina", "marco conversível|marco conversivel|marco convertível|marco convertivel|marcos conversíveis|marcos conversiveis|marcos convertíveis|marcos convertivies|bam"],["Fening da Bósnia e Herzegovina", "fening conversível|fening conversivel|fening convertível|fening convertivel|fenings conversíveis|fenings conversiveis|fenings convertíveis|fenings convertiveis"],["Pula", "pula|pulas|bwp"],["Thebe", "thebe|thebes"],["Real brasileiro", "real brasileiro|real do brasil|real|reais brasileiros|reais do brasil|reais|r$|brl"],["Centavo brasileiro", "centavo de real|centavo brasileiro|centavos de real|centavos brasileiros"],["Dólar de Brunei", "dólar de brunei|dolar de brunei|dólar do brunei|dolar do brunei|dólares de brunéi|dolares de brunei|dólares do brunei|dolares do brunei|bnd"],["Sen de Brunei", "sen de brunei|sen do brunei|sens de brunei|sens do brunei"],["Lev búlgaro", "lev búlgaro|leve búlgaro|leves búlgaros|lev bulgaro|leve bulgaro|leves bulgaros|lv|bgn"],["Stotinka búlgaro", "stotinka búlgaro|stotinki búlgaros|stotinka bulgaro|stotinki bulgaros"],["Franco do Burundi", "franco do burundi|francos do burundi|fbu|fib"],["Centavo Burundi", "centavo burundi|cêntimo burundi|centimo burundi|centavos burundi|cêntimo burundi|centimo burundi"],["Ngultrum butanês", "ngultrum butanês|ngultrum butanes|ngúltrume butanês|ngultrume butanes|ngultrum butaneses|ngúltrumes butaneses|ngultrumes butaneses|btn"],["Chetrum butanês", "chetrum butanês|chetrum butanes|chetrum butaneses"],["Escudo cabo-verdiano", "escudo cabo-verdiano|escudos cabo-verdianos|cve"],["Riel cambojano", "riel cambojano|riéis cambojanos|rieis cambojanos|khr"],["Dólar canadense", "dólar canadense|dolar canadense|dólares canadenses|dolares canadenses|c$|cad"],["Centavo canadense", "centavo canadense|centavos canadenses"],["Peso chileno", "peso chileno|pesos chilenos|cpl"],["Yuan chinês", "yuan chinês|yuan chines|yuans chineses|yuan|yuans|renminbi|rmb|cny|¥"],["Peso colombiano", "peso colombiano|pesos colombianos|cop|col$"],["Centavo colombiano", "centavo colombiano|centavos colombianos"],["Franco comorense", "franco comorense|francos comorenses|kmf|₣"],["Franco congolês", "franco congolês|franco congoles|francos congoleses|cdf"],["Centavo congolês", "centavo congolês|centavo congoles|centavos congoleses|cêntimo congolês|centimo congoles|cêntimos congoleses|cêntimos congoleses"],["Won norte-coreano", "won norte-coreano|wŏn norte-coreano|won norte-coreanos|wŏn norte-coreanos|kpw"],["Chon norte-coreano", "chon norte-coreano|chŏn norte-coreano|chŏn norte-coreanos|chon norte-coreanos"],["Won sul-coreano", "wŏn sul-coreano|won sul-coreano|wŏnes sul-coreanos|wones sul-coreanos|krw"],["Jeon sul-coreano", "jeons sul-coreano|jeons sul-coreanos"],["Colón costarriquenho", "colón costarriquenho|colon costarriquenho|colons costarriquenho|colones costarriquenhos|crc"],["Kuna croata", "kuna croata|kunas croatas|hrk"],["Lipa croata", "lipa croata|lipas croatas"],["Peso cubano", "peso cubano|pesos cubanos|cup"],["Peso cubano convertível", "peso cubano conversível|pesos cubanos conversíveis|peso cubano conversivel|pesos cubanos conversiveis|peso cubano convertível|pesos cubanos convertíveis|peso cubano convertivel|pesos cubanos convertiveis|cuc"],["Coroa dinamarquesa", "coroa dinamarquesa|coroas dinamarquesas|dkk"],["Libra egípcia", "libra egípcia|libra egipcia|libras egípcias|libras egipcias|egp|l.e."],["Piastra egípcia", "piastra egípcia|piastra egipcia|pisastras egípcias|piastras egipcias"],["Dirham dos Emirados Árabes Unidos", "dirham|dirhams|dirham dos emirados arabes unidos|aed|dhs"],["Nakfa", "nakfa|nfk|ern"],["Centavo de Nakfa", "cêntimo de nakfa|cêntimos de nakfa|centavo de nafka|centavos de nafka"],["Peseta", "peseta|pesetas|pts.|ptas.|esp"],["Dólar estadunidense", "dólar dos estados unidos|dolar dos estados unidos|dólar estadunidense|dólar americano|dólares dos estados unidos|dolares dos estados unidos|dólares estadunidenses|dólares americanos|dolar estadunidense|dolar americano|dolares estadunidenses|dolares americanos|usd|u$d|us$|usd$"],["Coroa estoniana", "coroa estoniana|coroas estonianas|eek"],["Senti estoniano", "senti estoniano|senti estonianos"],["Birr etíope", "birr etíope|birr etiope|birr etíopes|birr etiopes|br|etb"],["Santim etíope", "santim etíope|santim etiope|santim etíopes|santim etiopes"],["Peso filipino", "peso filipino|pesos filipinos|php"],["Marco finlandês", "marco finlandês|marco finlandes|marcos finlandeses"],["Dólar fijiano", "dólar fijiano|dolar fijiano|dólares fijianos|dolares fijianos|fj$|fjd"],["Centavo fijiano", "centavo fijiano|centavos fijianos"],["Dalasi gambiano", "dalasi|gmd"],["Bututs", "butut|bututs"],["Lari georgiano", "lari georgiano|lari georgianos|gel"],["Tetri georgiano", "tetri georgiano|tetri georgianos"],["Cedi", "cedi|ghs|gh₵"],["Pesewa", "pesewa"],["Libra de Gibraltar", "libra de gibraltar|libras de gibraltar|gip"],["Peni de Gibraltar", "peni de gibraltar|penies de gibraltar"],["Quetzal guatemalteco", "quetzal guatemalteco|quetzales guatemaltecos|quetzal|quetzales|gtq"],["Centavo guatemalteco", "centavo guatemalteco|centavos guatemaltecos"],["Libra de Guernsey", "libra de Guernsey|libras de Guernsey|ggp"],["Peni de Guernsey", "peni de Guernsey|penies de Guernsey"],["Franco da Guiné", "franco da guiné|franco da guine| franco guineense|francos da guiné|francos da guine|francos guineense|gnf|fg"],["Centavo da Guiné", "cêntimo guineense|centimo guineense|centavo guineense|cêntimos guineenses|centimos guineenses|centavos guineenses"],["Dólar guianense", "dólar guianense|dólares guianense|dolar guianense|dolares guianense|gyd|gy"],["Gurde haitiano", "gurde haitiano|gourde|gurdes haitianos|htg"],["Centavo haitiano", "cêntimo haitiano|cêntimos haitianos|centavo haitiano|centavos haitianos"],["Lempira hondurenha", "lempira hondurenha|lempiras hondurenhas|lempira|lempiras|hnl"],["Centavo hondurenho", "centavo hondurenho|centavos hondurehos|cêntimo hondurenho|cêntimos hondurenhos"],["Dólar de Hong Kong", "dólar de hong kong|dolar de hong kong|dólares de hong kong|dolares de hong kong|hk$|hkd"],["Florim húngaro", "florim húngaro|florim hungaro|florins húngaros|florins hungaros|forinte|forintes|huf"],["Filér húngaro", "fillér|filér|filler|filer"],["Rupia indiana", "rúpia indiana|rupia indiana|rupias indianas|inr"],["Paisa indiana", "paisa indiana|paisas indianas"],["Rupia indonésia", "rupia indonesia|rupia indonésia|rupias indonesias|rupias indonésias|idr"],["Sen indonésio", "send indonésio|sen indonesio|sen indonésios|sen indonesios"],["Rial iraniano", "rial iraniano|riais iranianos|irr"],["Dinar iraquiano", "dinar iraquiano|dinares iraquianos|iqd"],["Fil iraquiano", "fil iraquiano|fils iraquianos|files iraquianos"],["Libra manesa", "libra manesa|libras manesas|imp"],["Peni manês", "peni manes|peni manês|penies maneses"],["Coroa islandesa", "coroa islandesa|coroas islandesas|isk|íkr"],["Aurar islandês", "aurar islandês|aurar islandes|aurar islandeses|eyrir"],["Dólar das Ilhas Cayman", "dólar das ilhas cayman|dolar das ilhas cayman|dólar das ilhas caimão|dólares das ilhas cayman|dolares das ilhas cayman|dólares das ilhas caimão|ci$|kyd"],["Dólar das Ilhas Cook", "dólar das ilhas cook|dolar das ilhas cook|dólares das ilhas cook|dolares das ilhas cook"],["Coroa feroesa", "coroa feroesa|coroas feroesas|fkr"],["Libra das Malvinas", "libra das malvinas|libras das malvinas|fk£|fkp"],["Dólar das Ilhas Salomão", "dólar das ilhas salomão|dolar das ilhas salomao|dólares das ilhas salomão|dolares das ilhas salomao|sbd"],["Novo shekel israelense", "novo shekel|novos shekeles|novo shequel|novo siclo|novo xéquel|shekeles novos|novos sheqalim|sheqalim novos|ils"],["Agora", "agora|agorot"],["Dólar jamaicano", "dólar jamaicano|dolar jamaicano|dólares jamaicanos|dolares jamaicanos|j$|ja$|jmd"],["Yen", "yen|iene|yenes|ienes|jpy"],["Libra de Jersey", "libra de Jersey|libras de Jersey|jep"],["Dinar jordaniano", "dinar jordaniano|dinar jordano|dinares jordanianos|dinares jordanos|jd|jod"],["Piastra jordaniana", "piastra jordaniana|piastra jordano|piastras jordanianas|piastra jordaniano|piastras jordanianos|piastras jordanos"],["Tengue cazaque", "tenge|tengue|tengué|tengue cazaque|kzt"],["Tiyin", "tiyin|tiyins"],["Xelim queniano", "xelim queniano|xelins quenianos|ksh|kes"],["Som quirguiz", "som quirguiz|som quirguizes|soms quirguizes|kgs"],["Tyiyn", "tyiyn|tyiyns"],["Dólar de Kiribati", "dólar de kiribati|dolar de kiribati|dólares de kiribati|dolares de kiribati"],["Dinar kuwaitiano", "dinar kuwaitiano|dinar cuaitiano|dinares kuwaitiano|dinares cuaitianos|kwd"],["Quipe laosiano", "quipe|quipes|kipe|kipes|kip|kip laosiano|kip laociano|kips laosianos|kips laocianos|lak"],["Att laosiano", "at|att|att laosiano|att laosianos"],["Loti do Lesoto", "loti|lóti|maloti|lotis|lótis|lsl"],["Sente", "sente|lisente"],["Libra libanesa", "libra libanesa|libras libanesas|lbp"],["Dólar liberiano", "dólar liberiano|dolar liberiano|dólares liberianos|dolares liberianos|l$|lrd"],["Dinar libio", "dinar libio|dinar líbio|dinares libios|dinares líbios|ld|lyd"],["Dirham libio", "dirham libio|dirhams libios|dirham líbio|dirhams líbios"],["Litas lituana", "litas lituana|litai lituanas|ltl"],["Pataca macaense", "pataca macaense|patacas macaenses|mop$|mop"],["Avo macaense", "avo macaense|avos macaenses"],["Ho macaense", "ho macaense|ho macaenses"],["Dinar macedônio", "denar macedonio|denare macedonios|denar macedônio|denar macedónio|denare macedônio|denare macedónio|dinar macedonio|dinar macedônio|dinar macedónio|dinares macedonios|dinares macedônios|dinares macedónios|den|mkd"],["Deni macedônio", "deni macedonio|deni macedônio|deni macedónio|denis macedonios|denis macedônios|denis macedónios"],["Ariary malgaxe", "ariai malgaxe|ariary malgaxe|ariary malgaxes|ariaris|mga"],["Iraimbilanja", "iraimbilanja|iraimbilanjas"],["Ringuite malaio", "ringgit malaio|ringgit malaios|ringgits malaios|ringuite malaio|ringuites malaios|rm|myr"],["Sen malaio", "sen malaio|sen malaios|centavo malaio|centavos malaios|cêntimo malaio|cêntimos malaios"],["Kwacha do Malawi", "kwacha|cuacha|quacha|mk|mwk"],["Tambala", "tambala|tambalas|tambala malawi"],["Rupia maldiva", "rupia maldiva|rupias maldivas|rupia das maldivas| rupias das maldivas|mvr"],["Dirame marroquino", "dirame marroquino|dirham marroquinho|dirhams marroquinos|dirames marroquinos|mad"],["Rupia maurícia", "rupia maurícia|rupia de Maurício|rupia mauricia|rupia de mauricio|rupias de mauricio|rupias de maurício|rupias mauricias|rupias maurícias|mur"],["Uguia", "uguia|uguias|oguia|ouguiya|oguias|mro"],["Kume", "kumes|kume|khoums"],["Peso mexicano", "peso mexicano|pesos mexicanos|mxn"],["Centavo mexicano", "centavo mexicano|centavos mexicanos"],["Leu moldávio", "leu moldavo|lei moldavos|leu moldávio|leu moldavio|lei moldávios|lei moldavios|leus moldavos|leus moldavios|leus moldávios|mdl"],["Ban moldávio", "ban moldavo|bani moldavos"],["Tugrik mongol", "tugrik mongol|tugrik|tugriks mongóis|tugriks mongois|tug|mnt"],["Metical moçambicao", "metical|metical moçambicano|metical mocambicano|meticais|meticais moçambicanos|meticais mocambicanos|mtn|mzn"],["Dólar namibiano", "dólar namibiano|dólares namibianos|dolar namibio|dolares namibios|n$|nad"],["Centavo namibiano", "centavo namibiano|centavos namibianos|centavo namibio|centavos namibianos"],["Rupia nepalesa", "rupia nepalesa|rupias nepalesas|npr"],["Paisa nepalesa", "paisa nepalesa|paisas nepalesas"],["Córdova nicaraguense", "córdova nicaraguense|cordova nicaraguense|cordova nicaraguana|córdoba nicaragüense|córdobas nicaragüenses|cordobas nicaraguenses|córdovas nicaraguenses|cordovas nicaraguenses|córdovas nicaraguanasc$|nio"],["Centavo nicaraguense", "centavo nicaragüense|centavos nicaraguenses|centavo nicaraguano|centavos nicaraguenses|centavo nicaraguano|centavos nicaraguanos"],["Naira", "naira|ngn"],["Kobo", "kobo"],["Coroa norueguesa", "coroa norueguesa|coroas norueguesas|nok"],["Franco CFP", "franco cfp|francos cfp|xpf"],["Dólar neozelandês", "dólar neozelandês|dolar neozelandes|dólares neozelandeses|dolares neozelandeses|dólar da nova zelândia|dolar da nova zelandia|dólares da nova zelândia|dolares da nova zelandia|nz$|nzd"],["Centavo neozelandês", "centavo neozelandês|centavo neozelandes|centavo da nova zelandia|centavo da nova zelândia|centavos da nova zelandia|centavos neozelandeses|centavos da nova zelândia"],["Rial omanense", "rial omani|riais omanis|rial omanense|riais omanenses|omr"],["Baisa omanense", "baisa omani|baisas omanis|baisa omanense|baisas omanenses"],["Florim holandês", "florim holandês|florim holandes|florins holandeses|nlg"],["Rupia paquistanesa", "rupia paquistanesa|rupias paquistanesas|pkr"],["Paisa paquistanesa", "paisa paquistanesa|paisas paquistanesasas"],["Balboa panamenho", "balboa panamenho|balboas panamenhos|balboa|pab|balboa panamense|balboas panamenses"],["Centavo panamenho", "centavo panamenho|cêntimo panamenho|centavos panamenhos|cêntimos panamenhos|cêntimo panamense|cêntimos panamenses"],["Kina", "kina|kina papuásia|kinas|kinas papuásias|pkg|pgk"],["Toea", "toea"],["Guarani", "guarani|guaranis|gs|pyg"],["Novo Sol", "novo sol peruano|novos sóis peruanos|sol|soles|sóis|nuevo sol|pen|s#."],["Centavo de sol", "cêntimo de sol|cêntimos de sol|centavo de sol|centavos de sol"],["Złoty", "złoty|złotys|zloty|zlotys|zloti|zlotis|zlóti|zlótis|zlote|zł|pln"],["Groszy", "groszy|grosz"],["Rial catariano", "rial qatari|riais qataris|rial catarense|riais catarenses|rial catariano|riais catarianos|qr|qar"],["Dirame catariano", "dirame catariano|dirames catarianos|dirame qatari|dirames qataris|dirame catarense|dirames catarenses|dirham qatari|dirhams qataris|dirham catarense|dirhams catarenses|dirham catariano|dirhams catariano"],["Libra esterlina", "libra esterlina|libras esterlinas|gbp"],["Coroa checa", "coroa checa|coroas checas|kc|czk"],["Peso dominicano", "peso dominicano|pesos dominicanos|rd$|dop"],["Centavo dominicano", "centavo dominicano|centavos dominicanos"],["Franco ruandês", "franco ruandês|franco ruandes|francos ruandeses|rf|rwf"],["Céntimo ruandês", "cêntimo ruandês|centimo ruandes|centavo ruandês|centavo ruandes|cêntimos ruandeses|centimos ruandeses|centavos ruandeses"],["Leu romeno", "leu romeno|lei romenos|leus romenos|ron"],["Ban romeno", "ban romeno|bani romeno|bans romenos"],["Rublo russo", "rublo russo|rublos russos|rub|р."],["Copeque ruso", "copeque russo|copeques russos|kopek ruso|kopeks rusos|copeque|copeques|kopek|kopeks"],["Tala samoano", "tala|tālā|talas|tala samonano|talas samoanos|ws$|sat|wst"],["Sene samoano", "sene"],["Libra de Santa Helena", "libra de santa helena|libras de santa helena|shp"],["Pêni de Santa Helena", "peni de santa helena|penies de santa helena"],["Dobra", "dobra|dobras|db|std"],["Dinar sérvio", "dinar sérvio|dinar servio|dinar serbio|dinares sérvios|dinares servios|dinares serbios|rsd"],["Para sérvio", "para sérvio|para servio|para serbio|paras sérvios|paras servios|paras serbios"],["Rupia seichelense", "rupia de seicheles|rupias de seicheles|rupia seichelense|rupias seichelenses|scr"],["Centavo seichelense", "centavo de seicheles|centavos de seicheles|centavo seichelense|centavos seichelenses"],["Leone serra-leonino", "leone|leones|leone serra-leonino|leones serra-leoninos|le|sll"],["Dólar de Cingapura", "dólar de singapura|dolar de singapura|dórar de cingapura|dolar de cingapura|dólares de singapura|dolares de singapura|dólares de cingapura|dolares de cingapura|sgb"],["Centavo de Cingapura", "centavo de singapura|centavos de singapura|centavo de cingapura|centavos de cingapura"],["Libra síria", "libra síria|libra siria|libras sírias|libras sirias|s£|syp"],["Piastra síria", "piastra siria|piastras sirias|piastra síria|piastras sírias"],["Xelim somali", "xelim somali|xelins somalis|xelim somaliano|xelins somalianos|sos"],["Centavo somali", "centavo somapli|centavos somalis|centavo somaliano|centavos somalianos"],["Xelim da Somalilândia", "xelim da somalilândia|xelins da somalilândia|xelim da somalilandia|xelins da somalilandia"],["Centavo da Somalilândia", "centavo da somalilândia|centavos da somalilândia|centavo da somalilandia|centavos da somalilandia"],["Rupia do Sri Lanka", "rupia do sri lanka|rupia do sri lanca|rupias do sri lanka|rupias do sri lanca|rupia cingalesa|rupias cingalesas|lkr"],["Lilangeni", "lilangeni|lilangenis|emalangeni|szl"],["Rand sul-africano", "rand|rand sul-africano|rands|rands sul-africanos|zar"],["Libra sudanesa", "libra sudanesa|libras sudanesas|sdg"],["Piastra sudanesa", "piastra sudanesa|piastras sudanesas"],["Libra sul-sudanesa", "libra sul-sudanesa|libras sul-sudanesas|ssp"],["Piastra sul-sudanesa", "piastra sul-sudanesa|piastras sul-sudanesas"],["Coroa sueca", "coroa sueca|coroas suecas|sek"],["Franco suíço", "franco suíço|franco suico|francos suíços|francos suicos|sfr|chf"],["Rappen suíço", "rappen suíço|rappen suico|rappens suíços|rappens suicos"],["Dólar surinamês", "dólar surinamês|dolar surinames|dólar do Suriname|dolar do Suriname|dólares surinameses|dolares surinameses|dólares do Suriname|dolares do Suriname|srd"],["Centavo surinamês", "centavo surinamês|centavo surinames|centavos surinameses"],["Baht tailandês", "baht tailandês|bath tailandes|baht tailandeses|thb"],["Satang tailandês", "satang tailandês|satang tailandes|satang tailandeses"],["Novo dólar taiwanês", "novo dólar taiwanês|novo dolar taiwanes|dólar taiwanês|dolar taiwanes|dólares taiwaneses|dolares taiwaneses|twd"],["Centavo taiwanês", "centavo taiwanês|centavo taiwanes|centavos taiwaneses"],["Xelim tanzaniano", "xelim tanzaniano|xelins tanzanianos|tzs"],["Centavo tanzaniano", "centavo tanzaniano|centavos tanzanianos"],["Somoni tajique", "somoni tajique|somoni|somonis tajiques|somonis|tjs"],["Diram tajique", "diram tajique|dirams tajiques|dirames tajiques"],["Paʻanga", "paanga|paangas|paʻanga|pa'anga|top"],["Seniti", "seniti"],["Rublo transdniestriano", "rublo transdniestriano|rublos transdniestriano"],["Copeque transdniestriano", "copeque transdniestriano|copeques transdniestriano"],["Dólar de Trinidade e Tobago", "dólar de trinidade e tobago|dólares trinidade e tobago|dolar de trinidade e tobago|dolares trinidade e tobago|dólar de trinidad e tobago|dólares trinidad e tobago|ttd"],["Centavo de Trinidade e Tobago", "centavo de trinidade e tobago|centavos de trinidade e tobago|centavo de trinidad e tobago|centavos de trinidad e tobago"],["Dinar tunisiano", "dinar tunisiano|dinares tunisianos|dinar tunisino|dinares tunisinos|tnd"],["Milim tunisiano", "milim tunisiano|milim tunesianos|millime tunisianos|millimes tunisianos|milim tunisino|milim tunisinos|millime tunisinos|millimes tunisinos"],["Lira turca", "lira turca|liras turcas|try"],["Kuruş turco", "kuruş turco|kuruş turcos"],["Manat turcomeno", "manat turcomeno|manats turcomenos|tmt"],["Tennesi turcomeno", "tennesi turcomeno|tennesis turcomenos|tenge turcomenos|tenges turcomenos"],["Dólar tuvaluano", "dólar tuvaluano|dolar tuvaluano|dólares tuvaluanos|dolares tuvaluanos"],["Centavo tuvaluano", "centavo tuvaluano|centavos tuvaluanos"],["Grívnia", "grívnia|grivnia|grívnias|grivnias|grivna|grivnas|uah"],["Copeque ucraniano", "kopiyka|copeque ucraniano|copeques ucranianos"],["Xelim ugandês", "xelim ugandês|xelim ugandes|xelins ugandeses|ugx"],["Centavo ugandês", "centavo ugandês|centavo ugandes|centavos ugandeses"],["Peso uruguaio", "peso uruguaio|pesos uruguayis|uyu"],["Centésimo uruguayo", "centésimo uruguaio|centesimo uruguaio|centésimos uruguaios|centesimos uruguaios"],["Som uzbeque", "som uzbeque|som uzbeques|soms uzbeques|somes uzbeques|som usbeque|som usbeques|soms usbeques|somes usbeques|uzs"],["Tiyin uzbeque", "tiyin uzbeque|tiyin uzbeques|tiyins uzbeques|tiyin usbeque|tiyin usbeques|tiyins usbeques"],["Vatu", "vatu|vatus|vuv"],["Bolívar forte venezuelano", "bolívar forte|bolivar forte|bolívar|bolivar|bolívares|bolivares|vef"],["Centavo de bolívar", "cêntimo de bolívar|cêntimos de bolívar|centavo de bolívar|centavo de bolivar|centavos de bolívar|centavos de bolivar"],["Dongue vietnamita", "dongue vietnamita|Đồng vietnamita|dong vietnamita|dongues vietnamitas|dongs vietnamitas|vnd"],["Hào vietnamita", "hào vietnamita|hao vietnamita|hào vietnamitas|hàos vietnamitas|haos vietnamitas"],["Rial iemenita", "rial iemenita|riais iemenitas|yer"],["Fils iemenita", "fils iemenita|fils iemenitas"],["Franco djibutiano", "franco djibutiano|francos djibutianos|franco jibutiano|francos jibutianos|djf"],["Dinar iugoslavo", "dinar iugoslavo|dinares iugoslavos|dinar jugoslavo|dinares jugoslavos|yud"],["Kwacha zambiano", "kwacha zambiano|kwacha zambianos|kwachas zambianos|zmw"],["Ngwee zambiano", "ngwee zambiano|ngwee zambianos|ngwees zambianos"]]); export const CompoundUnitConnectorRegex = `(?[^.])`; diff --git a/JavaScript/packages/recognizers-number-with-unit/src/resources/spanishNumericWithUnit.ts b/JavaScript/packages/recognizers-number-with-unit/src/resources/spanishNumericWithUnit.ts index a2eed91469..f9d7232962 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/resources/spanishNumericWithUnit.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/resources/spanishNumericWithUnit.ts @@ -12,6 +12,7 @@ import { BaseNumbers } from "./baseNumbers"; export namespace SpanishNumericWithUnit { export const AgeSuffixList: ReadonlyMap = new Map([["Año", "años|año"],["Mes", "meses|mes"],["Semana", "semanas|semana"],["Día", "dias|días|día|dia"]]); + export const AmbiguousAgeUnitList = [ "años","año","meses","mes","semanas","semana","dias","días","día","dia" ]; export const AreaSuffixList: ReadonlyMap = new Map([["Kilómetro cuadrado", "kilómetro cuadrado|kilómetros cuadrados|km2|km^2|km²"],["Hectómetro cuadrado", "hectómetro cuadrado|hectómetros cuadrados|hm2|hm^2|hm²|hectárea|hectáreas"],["Decámetro cuadrado", "decámetro cuadrado|decámetros cuadrados|dam2|dam^2|dam²|área|áreas"],["Metro cuadrado", "metro cuadrado|metros cuadrados|m2|m^2|m²"],["Decímetro cuadrado", "decímetro cuadrado|decímetros cuadrados|dm2|dm^2|dm²"],["Centímetro cuadrado", "centímetro cuadrado|centímetros cuadrados|cm2|cm^2|cm²"],["Milímetro cuadrado", "milímetro cuadrado|milímetros cuadrados|mm2|mm^2|mm²"],["Pulgada cuadrado", "pulgada cuadrada|pulgadas cuadradas"],["Pie cuadrado", "pie cuadrado|pies cuadrados|pie2|pie^2|pie²|ft2|ft^2|ft²"],["Yarda cuadrado", "yarda cuadrada|yardas cuadradas|yd2|yd^2|yd²"],["Acre", "acre|acres"]]); export const AreaAmbiguousValues = [ "área","áreas" ]; export const CurrencySuffixList: ReadonlyMap = new Map([["Dólar", "dólar|dólares"],["Peso", "peso|pesos"],["Rublo", "rublo|rublos"],["Libra", "libra|libras"],["Florín", "florín|florines"],["Dinar", "dinar|dinares"],["Franco", "franco|francos"],["Rupia", "rupia|rupias"],["Escudo", "escudo|escudos"],["Chelín", "chelín|chelines"],["Lira", "lira|liras"],["Centavo", "centavo|centavos"],["Céntimo", "céntimo|céntimos"],["Centésimo", "centésimo|centésimos"],["Penique", "penique|peniques"],["Euro", "euro|euros|€|eur"],["Céntimo de Euro", "céntimo de euro|céntimos de euros"],["Dólar del Caribe Oriental", "dólar del caribe oriental|dólares del caribe oriental|ec$|xcd"],["Centavo del Caribe Oriental", "centavo del caribe oriental|centavos del caribe oriental"],["Franco CFA de África Occidental", "franco cfa de África occidental|francos cfa de África occidental|fcfa|xof"],["Céntimo de CFA de África Occidental", "céntimo de cfa de África occidental|céntimos de cfa de África occidental"],["Franco CFA de África Central", "franco cfa de África central|francos cfa de África central|xaf"],["Céntimo de CFA de África Central", "céntimo de cfa de África central|céntimos de cfa de África central"],["Apsar", "apsar|apsares"],["Afgani afgano", "afgani afgano|؋|afn|afganis|afgani"],["Pul", "pul|puls"],["Lek albanés", "lek|lekë|lekes|lek albanés"],["Qindarka", "qindarka|qindarkë|qindarkas"],["Kwanza angoleño", "kwanza angoleño|kwanzas angoleños|kwanza angoleños|kwanzas angoleño|kwanzas|aoa|kz"],["Cêntimo angoleño", "cêntimo angoleño|cêntimo|cêntimos"],["Florín antillano neerlandés", "florín antillano neerlandés|florínes antillano neerlandés|ƒ antillano neerlandés|ang|naƒ"],["Cent antillano neerlandés", "cent|centen"],["Riyal saudí", "riyal saudí|riyales saudí|sar"],["Halalá saudí", "halalá saudí|hallalah"],["Dinar argelino", "dinar argelino|dinares argelinos|dzd"],["Céntimo argelino", "centimo argelino|centimos argelinos"],["Peso argentino", "peso argentino|pesos argentinos|ar$|ars"],["Centavo argentino", "centavo argentino|centavos argentinos|ctvo.|ctvos."],["Dram armenio", "dram armenio|dram armenios|dram|դր."],["Luma armenio", "luma armenio|luma armenios"],["Florín arubeño", "florín arubeño|florines arubeños|ƒ arubeños|aƒ|awg"],["Yotin arubeño", "yotin arubeño|yotines arubeños"],["Dólar australiano", "dólar australiano|dólares australianos|a$|aud"],["Centavo australiano", "centavo australiano|centavos australianos"],["Manat azerí", "manat azerí|man|azn"],["Qəpik azerí", "qəpik azerí|qəpik"],["Dólar bahameño", "dólar bahameño|dólares bahameños|b$|bsd"],["Centavo bahameño", "centavo bahameño|centavos bahameños"],["Dinar bahreiní", "dinar bahreiní|dinares bahreinies|bhd"],["Fil bahreiní", "fil bahreiní|fils bahreinies"],["Taka bangladeshí", "taka bangladeshí|takas bangladeshí|bdt"],["Poisha bangladeshí", "poisha bangladeshí|poishas bangladeshí"],["Dólar de Barbados", "dólar de barbados|dólares de barbados|bbd"],["Centavo de Barbados", "centavo de barbados|centavos de barbados"],["Dólar beliceño", "dólar beliceño|dólares beliceños|bz$|bzd"],["Centavo beliceño", "centavo beliceño|centavos beliceños"],["Dólar bermudeño", "dólar bermudeño|dólares bermudeños|bd$|bmd"],["Centavo bermudeño", "centavo bermudeño|centavos bermudeños"],["Rublo bielorruso", "rublo bielorruso|rublos bielorrusos|byr"],["Kópek bielorruso", "kópek bielorruso|kópeks bielorrusos|kap"],["Kyat birmano", "kyat birmano|kyats birmanos|mmk"],["Pya birmano", "pya birmano|pyas birmanos"],["Boliviano", "boliviano|bolivianos|bob|bs"],["Centésimo Boliviano", "centésimo boliviano|centésimos bolivianos"],["Marco bosnioherzegovino", "marco convertible|marco bosnioherzegovino|marcos convertibles|marcos bosnioherzegovinos|bam"],["Feningas bosnioherzegovino", "feninga convertible|feninga bosnioherzegovina|feningas convertibles"],["Pula", "pula|bwp"],["Thebe", "thebe"],["Real brasileño", "real brasileño|reales brasileños|r$|brl"],["Centavo brasileño", "centavo brasileño|centavos brasileños"],["Dólar de Brunéi", "dólar de brunei|dólares de brunéi|bnd"],["Sen de Brunéi", "sen|sen de brunéi"],["Lev búlgaro", "lev búlgaro|leva búlgaros|lv|bgn"],["Stotinki búlgaro", "stotinka búlgaro|stotinki búlgaros"],["Franco de Burundi", "franco de burundi|francos de burundi|fbu|fib"],["Céntimo Burundi", "céntimo burundi|céntimos burundies"],["Ngultrum butanés", "ngultrum butanés|ngultrum butaneses|btn"],["Chetrum butanés", "chetrum butanés|chetrum butaneses"],["Escudo caboverdiano", "escudo caboverdiano|escudos caboverdianos|cve"],["Riel camboyano", "riel camboyano|rieles camboyanos|khr"],["Dólar canadiense", "dólar canadiense|dólares canadienses|c$|cad"],["Centavo canadiense", "centavo canadiense|centavos canadienses"],["Peso chileno", "peso chileno|pesos chilenos|cpl"],["Yuan chino", "yuan chino|yuanes chinos|yuan|yuanes|renminbi|rmb|cny|¥"],["Peso colombiano", "peso colombiano|pesos colombianos|cop|col$"],["Centavo colombiano", "centavo colombiano|centavos colombianos"],["Franco comorano", "franco comorano|francos comoranos|kmf|₣"],["Franco congoleño", "franco congoleño|francos congoleños|cdf"],["Céntimo congoleño", "céntimo congoleño|céntimos congoleños"],["Won norcoreano", "won norcoreano|wŏn norcoreano|wŏn norcoreanos|kpw"],["Chon norcoreano", "chon norcoreano|chŏn norcoreano|chŏn norcoreanos|chon norcoreanos"],["Won surcoreano", "wŏn surcoreano|won surcoreano|wŏnes surcoreanos|wones surcoreanos|krw"],["Chon surcoreano", "chon surcoreano|chŏn surcoreano|chŏn surcoreanos|chon surcoreanos"],["Colón costarricense", "colón costarricense|colones costarricenses|crc"],["Kuna croata", "kuna croata|kuna croatas|hrk"],["Lipa croata", "lipa croata|lipa croatas"],["Peso cubano", "peso cubano|pesos cubanos|cup"],["Peso cubano convertible", "peso cubano convertible|pesos cubanos convertible|cuc"],["Corona danesa", "corona danesa|coronas danesas|dkk"],["Libra egipcia", "libra egipcia|libras egipcias|egp|l.e."],["Piastra egipcia", "piastra egipcia|piastras egipcias"],["Colón salvadoreño", "colón salvadoreño|colones salvadoreños|svc"],["Dirham de los Emiratos Árabes Unidos", "dirham|dirhams|dirham de los emiratos Árabes unidos|aed|dhs"],["Nakfa", "nakfa|nfk|ern"],["Céntimo de Nakfa", "céntimo de nakfa|céntimos de nakfa"],["Peseta", "peseta|pesetas|pts.|ptas.|esp"],["Dólar estadounidense", "dólar estadounidense|dólares estadounidenses|usd|u$d|us$"],["Corona estonia", "corona estonia|coronas estonias|eek"],["Senti estonia", "senti estonia|senti estonias"],["Birr etíope", "birr etíope|birr etíopes|br|etb"],["Santim etíope", "santim etíope|santim etíopes"],["Peso filipino", "peso filipino|pesos filipinos|php"],["Marco finlandés", "marco finlandés|marcos finlandeses"],["Dólar fiyiano", "dólar fiyiano|dólares fiyianos|fj$|fjd"],["Centavo fiyiano", "centavo fiyiano|centavos fiyianos"],["Dalasi", "dalasi|gmd"],["Bututs", "butut|bututs"],["Lari georgiano", "lari georgiano|lari georgianos|gel"],["Tetri georgiano", "tetri georgiano|tetri georgianos"],["Cedi", "cedi|ghs|gh₵"],["Pesewa", "pesewa"],["Libra gibraltareña", "libra gibraltareña|libras gibraltareñas|gip"],["Penique gibraltareña", "penique gibraltareña|peniques gibraltareñas"],["Quetzal guatemalteco", "quetzal guatemalteco|quetzales guatemaltecos|quetzal|quetzales|gtq"],["Centavo guatemalteco", "centavo guatemalteco|centavos guatemaltecos"],["Libra de Guernsey", "libra de guernsey|libras de guernsey|ggp"],["Penique de Guernsey", "penique de guernsey|peniques de guernsey"],["Franco guineano", "franco guineano|francos guineanos|gnf|fg"],["Céntimo guineano", "céntimo guineano|céntimos guineanos"],["Dólar guyanés", "dólar guyanés|dólares guyaneses|gyd|gy"],["Gourde haitiano", "gourde haitiano|gourde haitianos|htg"],["Céntimo haitiano", "céntimo haitiano|céntimos haitianos"],["Lempira hondureño", "lempira hondureño|lempira hondureños|hnl"],["Centavo hondureño", "centavo hondureño|centavos hondureño"],["Dólar de Hong Kong", "dólar de hong kong|dólares de hong kong|hk$|hkd"],["Forinto húngaro", "forinto húngaro|forinto húngaros|huf"],["Rupia india", "rupia india|rupias indias|inr"],["Paisa india", "paisa india|paise indias"],["Rupia indonesia", "rupia indonesia|rupias indonesias|idr"],["Sen indonesia", "sen indonesia|sen indonesias"],["Rial iraní", "rial iraní|rial iranies|irr"],["Dinar iraquí", "dinar iraquí|dinares iraquies|iqd"],["Fil iraquí", "fil iraquí|fils iraquies"],["Libra manesa", "libra manesa|libras manesas|imp"],["Penique manes", "penique manes|peniques maneses"],["Corona islandesa", "corona islandesa|coronas islandesas|isk|íkr"],["Aurar islandes", "aurar islandes|aurar islandeses"],["Dólar de las Islas Caimán", "dólar de las islas caimán|dólares de las islas caimán|ci$|kyd"],["Dólar de las Islas Cook", "dólar de las islas cook|dólares de las islas cook"],["Corona feroesa", "corona feroesa|coronas feroesas|fkr"],["Libra malvinense", "libra malvinense|libras malvinenses|fk£|fkp"],["Dólar de las Islas Salomón", "dólar de las islas salomón|dólares de las islas salomón|sbd"],["Nuevo shéquel", "nuevo shéquel|nuevos shéquel|ils"],["Agorot", "agorot"],["Dólar jamaiquino", "dólar jamaiquino|dólares jamaiquinos|j$|ja$|jmd"],["Yen", "yen|yenes|jpy"],["Libra de Jersey", "libra de jersey|libras de jersey|jep"],["Dinar jordano", "dinar jordano|dinares jordanos|jd|jod"],["Piastra jordano", "piastra jordano|piastras jordanos"],["Tenge kazajo", "tenge|tenge kazajo|kzt"],["Chelín keniano", "chelín keniano|chelines kenianos|ksh|kes"],["Som kirguís", "som kirguís|kgs"],["Tyiyn", "tyiyn"],["Dólar de Kiribati", "dólar de kiribati|dólares de kiribati"],["Dinar kuwaití", "dinar kuwaití|dinares kuwaití"],["Kip laosiano", "kip|kip laosiano|kip laosianos|lak"],["Att laosiano", "att|att laosiano|att laosianos"],["Loti", "loti|maloti|lsl"],["Sente", "sente|lisente"],["Libra libanesa", "libra libanesa|libras libanesas|lbp"],["Dólar liberiano", "dólar liberiano|dólares liberianos|l$|lrd"],["Dinar libio", "dinar libio|dinares libios|ld|lyd"],["Dirham libio", "dirham libio|dirhams libios"],["Litas lituana", "litas lituana|litai lituanas|ltl"],["Pataca macaense", "pataca macaense|patacas macaenses|mop$|mop"],["Avo macaense", "avo macaense|avos macaenses"],["Ho macaense", "ho macaense|ho macaenses"],["Denar macedonio", "denar macedonio|denare macedonios|den|mkd"],["Deni macedonio", "deni macedonio|deni macedonios"],["Ariary malgache", "ariary malgache|ariary malgaches|mga"],["Iraimbilanja malgache", "iraimbilanja malgache|iraimbilanja malgaches"],["Ringgit malayo", "ringgit malayo|ringgit malayos|rm|myr"],["Sen malayo", "sen malayo|sen malayos"],["Kwacha malauí", "kwacha malauí|mk|mwk"],["Támbala malauí", "támbala malauí"],["Rupia de Maldivas", "rupia de maldivas|rupias de maldivas|mvr"],["Dirham marroquí", "dirham marroquí|dirhams marroquies|mad"],["Rupia de Mauricio", "rupia de Mauricio|rupias de Mauricio|mur"],["Uguiya", "uguiya|uguiyas|mro"],["Jum", "jum|jums"],["Peso mexicano", "peso mexicano|pesos mexicanos|mxn"],["Centavo mexicano", "centavo mexicano|centavos mexicanos"],["Leu moldavo", "leu moldavo|lei moldavos|mdl"],["Ban moldavo", "ban moldavo|bani moldavos"],["Tugrik mongol", "tugrik mongol|tugrik|tugrik mongoles|tug|mnt"],["Metical mozambiqueño", "metical|metical mozambiqueño|meticales|meticales mozambiqueños|mtn|mzn"],["Dram de Nagorno Karabaj", "dram de nagorno karabaj|drams de nagorno karabaj"],["Luma de Nagorno Karabaj", "luma de nagorno karabaj"],["Dólar namibio", "dólar namibio|dólares namibios|n$|nad"],["Centavo namibio", "centavo namibio|centavos namibios"],["Rupia nepalí", "rupia nepalí|rupias nepalies|npr"],["Paisa nepalí", "paisa nepalí|paisas nepalies"],["Córdoba nicaragüense", "córdoba nicaragüense|córdobas nicaragüenses|nio"],["Centavo nicaragüense", "centavo nicaragüense|centavos nicaragüenses"],["Naira", "naira|ngn"],["Kobo", "kobo"],["Corona noruega", "corona noruega|coronas noruegas|nok"],["Franco CFP", "franco cfp|francos cfp|xpf"],["Dólar neozelandés", "dólar neozelandés|dólares neozelandeses|dólar de nueva zelanda|dólares de nueva zelanda|nz$|nzd"],["Centavo neozelandés", "centavo neozelandés|centavo de nueva zelanda|centavos de nueva zelanda|centavos neozelandeses"],["Rial omaní", "rial omaní|riales omanies|omr"],["Baisa omaní", "baisa omaní|baisa omanies"],["Florín neerlandés", "florín neerlandés|florines neerlandeses|nlg"],["Rupia pakistaní", "rupia pakistaní|rupias pakistanies|pkr"],["Paisa pakistaní", "paisa pakistaní|paisas pakistanies"],["Balboa panameño", "balboa panameño|balboa panameños|pab"],["Centésimo panameño", "centésimo panameño|centésimos panameños"],["Kina", "kina|pkg|pgk"],["Toea", "toea"],["Guaraní", "guaraní|guaranies|gs|pyg"],["Sol", "sol|soles|nuevo sol|pen|s#."],["Céntimo de sol", "céntimo de sol|céntimos de sol"],["Złoty", "złoty|esloti|eslotis|zł|pln"],["Groszy", "groszy"],["Riyal qatarí", "riyal qatarí|riyal qataries|qr|qar"],["Dirham qatarí", "dirham qatarí|dirhams qataries"],["Libra esterlina", "libra esterlina|libras esterlinas|gbp"],["Corona checa", "corona checa|coronas checas|kc|czk"],["Peso dominicano", "peso dominicano|pesos dominicanos|rd$|dop"],["Centavo dominicano", "centavo dominicano|centavos dominicanos"],["Franco ruandés", "franco ruandés|francos ruandeses|rf|rwf"],["Céntimo ruandés", "céntimo ruandés|céntimos ruandeses"],["Leu rumano", "leu rumano|lei rumanos|ron"],["Ban rumano", "ban rumano|bani rumanos"],["Rublo ruso", "rublo ruso|rublos rusos|rub"],["Kopek ruso", "kopek ruso|kopeks rusos"],["Tala", "tala|tālā|ws$|sat|wst"],["Sene", "sene"],["Libra de Santa Helena", "libra de santa helena|libras de santa helena|shp"],["Penique de Santa Helena", "penique de santa helena|peniques de santa helena"],["Dobra", "dobra|db|std"],["Dinar serbio", "dinar serbio|dinares serbios|rsd"],["Para serbio", "para serbio|para serbios"],["Rupia de Seychelles", "rupia de seychelles|rupias de seychelles|scr"],["Centavo de Seychelles", "centavo de seychelles|centavos de seychelles"],["Leone", "leone|le|sll"],["Dólar de Singapur", "dólar de singapur|dólares de singapur|sgb"],["Centavo de Singapur", "centavo de Singapur|centavos de Singapur"],["Libra siria", "libra siria|libras sirias|s£|syp"],["Piastra siria", "piastra siria|piastras sirias"],["Chelín somalí", "chelín somalí|chelines somalies|sos"],["Centavo somalí", "centavo somalí|centavos somalies"],["Chelín somalilandés", "chelín somalilandés|chelines somalilandeses"],["Centavo somalilandés", "centavo somalilandés|centavos somalilandeses"],["Rupia de Sri Lanka", "rupia de Sri Lanka|rupias de Sri Lanka|lkr"],["Céntimo de Sri Lanka", "céntimo de Sri Lanka|céntimos de Sri Lanka"],["Lilangeni", "lilangeni|emalangeni|szl"],["Rand sudafricano", "rand|rand sudafricano|zar"],["Libra sudanesa", "libra sudanesa|libras sudanesas|sdg"],["Piastra sudanesa", "piastra sudanesa|piastras sudanesas"],["Libra sursudanesa", "libra sursudanesa|libras sursudanesa|ssp"],["Piastra sursudanesa", "piastra sursudanesa|piastras sursudanesas"],["Corona sueca", "corona sueca|coronas suecas|sek"],["Franco suizo", "franco suizo|francos suizos|sfr|chf"],["Rappen suizo", "rappen suizo|rappens suizos"],["Dólar surinamés", "óolar surinamés|dólares surinameses|srd"],["Centavo surinamés", "centavo surinamés|centavos surinamés"],["Baht tailandés", "baht tailandés|baht tailandeses|thb"],["Satang tailandés", "satang tailandés|satang tailandeses"],["Nuevo dólar taiwanés", "nuevo dólar taiwanés|dólar taiwanés|dólares taiwaneses|twd"],["Centavo taiwanés", "centavo taiwanés|centavos taiwaneses"],["Chelín tanzano", "chelín tanzano|chelines tanzanos|tzs"],["Centavo tanzano", "centavo tanzano|centavos tanzanos"],["Somoni tayiko", "somoni tayiko|somoni|tjs"],["Diram", "diram|dirams"],["Paʻanga", "dólar tongano|dólares tonganos|paʻanga|pa'anga|top"],["Seniti", "seniti"],["Rublo de Transnistria", "rublo de transnistria|rublos de transnistria"],["Kopek de Transnistria", "kopek de transnistria|kopeks de transnistria"],["Dólar trinitense", "dólar trinitense|dólares trinitenses|ttd"],["Centavo trinitense", "centavo trinitense|centavos trinitenses"],["Dinar tunecino", "dinar tunecino|dinares tunecinos|tnd"],["Millime tunecino", "millime tunecino|millimes tunecinos"],["Lira turca", "lira turca|liras turcas|try"],["Kuruş turca", "kuruş turca|kuruş turcas"],["Manat turkmeno", "manat turkmeno|manat turkmenos|tmt"],["Tennesi turkmeno", "tennesi turkmeno|tenge turkmeno"],["Dólar tuvaluano", "dólar tuvaluano|dólares tuvaluanos"],["Centavo tuvaluano", "centavo tuvaluano|centavos tuvaluanos"],["Grivna", "grivna|grivnas|uah"],["Kopiyka", "kopiyka|kópeks"],["Chelín ugandés", "chelín ugandés|chelines ugandeses|ugx"],["Centavo ugandés", "centavo ugandés|centavos ugandeses"],["Peso uruguayo", "peso uruguayo|pesos uruguayos|uyu"],["Centésimo uruguayo", "centésimo uruguayo|centésimos uruguayos"],["Som uzbeko", "som uzbeko|som uzbekos|uzs"],["Tiyin uzbeko", "tiyin uzbeko|tiyin uzbekos"],["Vatu", "vatu|vuv"],["Bolívar fuerte", "bolívar fuerte|bolívar|bolívares|vef"],["Céntimo de bolívar", "céntimo de bolívar|céntimos de bolívar"],["Đồng vietnamita", "Đồng vietnamita|dong vietnamita|dong vietnamitas|vnd"],["Hào vietnamita", "Hào vietnamita|hao vietnamita|hao vietnamitas"],["Rial yemení", "rial yemení|riales yemenies|yer"],["Fils yemení", "fils yemení|fils yemenies"],["Franco yibutiano", "franco yibutiano|francos yibutianos|djf"],["Dinar yugoslavo", "dinar yugoslavo|dinares yugoslavos|yud"],["Kwacha zambiano", "kwacha zambiano|kwacha zambianos|zmw"],["Ngwee zambiano", "ngwee zambiano|ngwee zambianos"]]); diff --git a/Patterns/German/German-NumbersWithUnit.yaml b/Patterns/German/German-NumbersWithUnit.yaml index e098d2c197..bd2c2b8b0d 100644 --- a/Patterns/German/German-NumbersWithUnit.yaml +++ b/Patterns/German/German-NumbersWithUnit.yaml @@ -7,6 +7,18 @@ AgeSuffixList: !dictionary Month: monat alt|monate alt|monaten|monate Week: woche alt|wochen alt|wochen|woche Day: tag alt|tage alt|tagen|tage +# @TODO: This is a workaround. A redesign of Age is necessary to properly address the issue. +AmbiguousAgeUnitList: !list + types: [ string ] + entries: + - jahren + - jahre + - monaten + - monate + - wochen + - woche + - tagen + - tage #AreaExtractorConfiguration AreaSuffixList: !dictionary types: [ string, string ] diff --git a/Patterns/Portuguese/Portuguese-NumbersWithUnit.yaml b/Patterns/Portuguese/Portuguese-NumbersWithUnit.yaml index bc644272bf..d782669ad6 100644 --- a/Patterns/Portuguese/Portuguese-NumbersWithUnit.yaml +++ b/Patterns/Portuguese/Portuguese-NumbersWithUnit.yaml @@ -7,6 +7,19 @@ AgeSuffixList: !dictionary Mês: meses|mes|mês Semana: semanas|semana Dia: dias|dia +# @TODO: This is a workaround. A redesign of Age is necessary to properly address the issue. +AmbiguousAgeUnitList: !list + types: [ string ] + entries: + - anos + - ano + - meses + - mes + - mês + - semanas + - semana + - dias + - dia #AreaExtractorConfiguration AreaSuffixList: !dictionary types: [ string, string ] diff --git a/Patterns/Spanish/Spanish-DateTime.yaml b/Patterns/Spanish/Spanish-DateTime.yaml index 1840064d05..e1cbd133a2 100644 --- a/Patterns/Spanish/Spanish-DateTime.yaml +++ b/Patterns/Spanish/Spanish-DateTime.yaml @@ -72,7 +72,7 @@ DayBetweenRegex: !nestedRegex def: \b((entre|entre\s+el)\s+)({DayRegex})(\s+{MonthSuffixRegex})?\s*{AndRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*)(en\s+|del\s+|de\s+)?{YearRegex})?\b references: [ DayRegex, AndRegex, MonthSuffixRegex, YearRegex ] OneWordPeriodRegex: !nestedRegex - def: \b(((((la|el)\s+)?mes\s+(({OfPrepositionRegex})\s+))|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\s+))?({MonthRegex})|((la|el)\s+)?((({RelativeRegex}\s+){DateUnitRegex}(\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\s+{AfterNextSuffixRegex}))|va\s+de\s+{DateUnitRegex}) + def: \b(((((la|el)\s+)?mes\s+(({OfPrepositionRegex})\s+)?)|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\s+))?({MonthRegex})|((la|el)\s+)?((({RelativeRegex}\s+){DateUnitRegex}(\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\s+{AfterNextSuffixRegex}))|va\s+de\s+{DateUnitRegex}) references: [MonthRegex, RelativeRegex, OfPrepositionRegex, AfterNextSuffixRegex,DateUnitRegex] MonthWithYearRegex: !nestedRegex def: \b(((pr[oó]xim[oa](s)?|este|esta|[uú]ltim[oa]?)\s+)?({MonthRegex})(\s+|(\s*[,-]\s*))((de|del|de la)\s+)?({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\s+año))\b @@ -104,7 +104,7 @@ AllHalfYearRegex: !simpleRegex # TODO: modify below regex according to the counterpart in English def: ^[.] EarlyPrefixRegex: !nestedRegex - def: \b(?((comienzos|inicios)\s+({OfPrepositionRegex})))\b + def: \b(?((comienzos?|inicios?)\s+({OfPrepositionRegex})))\b references: [OfPrepositionRegex] MidPrefixRegex: !nestedRegex def: \b(?(mediados\s+({OfPrepositionRegex})))\b @@ -799,6 +799,8 @@ PastPrefixRegex: !simpleRegex PreviousPrefixRegex: !nestedRegex def: ([uú]ltim[oa]|{PastPrefixRegex})\b references: [ PastPrefixRegex ] +PreviousSuffixRegex: !simpleRegex + def: \b(pasad[ao])\b ThisPrefixRegex: !simpleRegex def: (est[ea])\b RelativeDayRegex: !simpleRegex @@ -842,7 +844,7 @@ DurationConnectorRegex: !simpleRegex def: ^[.] SuffixAfterRegex: !simpleRegex # TODO: modify below regex according to the counterpart in English - def: ^[.] + def: ^[.](?!$) YearPeriodRegex: !simpleRegex # TODO: modify below regex according to the counterpart in English def: ^[.] diff --git a/Patterns/Spanish/Spanish-NumbersWithUnit.yaml b/Patterns/Spanish/Spanish-NumbersWithUnit.yaml index 29767f58ba..438388629a 100644 --- a/Patterns/Spanish/Spanish-NumbersWithUnit.yaml +++ b/Patterns/Spanish/Spanish-NumbersWithUnit.yaml @@ -7,6 +7,20 @@ AgeSuffixList: !dictionary Mes: meses|mes Semana: semanas|semana Día: dias|días|día|dia +# @TODO: This is a workaround. A redesign of Age is necessary to properly address the issue. +AmbiguousAgeUnitList: !list + types: [ string ] + entries: + - años + - año + - meses + - mes + - semanas + - semana + - dias + - días + - día + - dia #AreaExtractorConfiguration AreaSuffixList: !dictionary types: [ string, string ] diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py index 9c6898df1d..a9ea7f155b 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py @@ -122,6 +122,7 @@ class EnglishDateTime: OfMonth = f'^\\s*of\\s*{MonthRegex}' MonthEnd = f'{MonthRegex}\\s*(the)?\\s*$' WeekDayEnd = f'(this\\s+)?{WeekDayRegex}\\s*,?\\s*$' + WeekDayStart = f'^[\\.]' RangeUnitRegex = f'\\b(?years?|months?|weeks?)\\b' HourNumRegex = f'\\b(?zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\\b' MinuteNumRegex = f'(?ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|thirty|forty|fifty|one|two|three|five|eight)' diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/french_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/french_date_time.py index f06cd945e7..6d578d4e33 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/french_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/french_date_time.py @@ -86,6 +86,7 @@ class FrenchDateTime: OfMonth = f'^\\s*de\\s*{MonthRegex}' MonthEnd = f'{MonthRegex}\\s*(le)?\\s*$' WeekDayEnd = f'{WeekDayRegex}\\s*,?\\s*$' + WeekDayStart = f'^[\\.]' RangeUnitRegex = f'\\b(?(l\')?ann[eé]e(s)?|mois|semaines?)\\b' HourNumRegex = f'\\b(?zero|[aá]\\s+une?|deux|trois|quatre|cinq|six|sept|huit|neuf|onze|douze|treize|quatorze|quinze|dix-six|dix-sept|dix-huit|dix-neuf|vingt|vingt-et-un|vingt-deux|vingt-trois|dix)\\b' MinuteNumRegex = f'(?un|deux|trois|quatre|cinq|six|sept|huit|neuf|onze|douze|treize|quatorze|quinze|seize|dix-sept|dix-huit|dix-neuf|vingt|trente|quarante|cinquante|dix)' diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/portuguese_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/portuguese_date_time.py index 42eba7b03c..da9f7013bb 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/portuguese_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/portuguese_date_time.py @@ -79,6 +79,7 @@ class PortugueseDateTime: OfMonthRegex = f'^\\s*de\\s*{MonthSuffixRegex}' MonthEndRegex = f'({MonthRegex}\\s*(o)?\\s*$)' WeekDayEnd = f'{WeekDayRegex}\\s*,?\\s*$' + WeekDayStart = f'^[\\.]' DateYearRegex = f'(?{YearRegex}|{TwoDigitYearRegex})' DateExtractor1 = f'\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}?((\\s*(de)|[/\\\\\\.\\-])\\s*)?{MonthRegex}\\b' DateExtractor2 = f'\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}\\s*([\\.\\-]|de)?\\s*{MonthRegex}?(\\s*(,|de)\\s*){YearRegex}\\b' diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/spanish_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/spanish_date_time.py index 037ed58bfc..3342d722cd 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/spanish_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/spanish_date_time.py @@ -43,7 +43,7 @@ class SpanishDateTime: MonthFrontSimpleCasesRegex = f'\\b{MonthSuffixRegex}\\s+((desde\\s+el|desde|del)\\s+)?({DayRegex})\\s*{TillRegex}\\s*({DayRegex})((\\s+|\\s*,\\s*)(en\\s+|del\\s+|de\\s+)?{YearRegex})?\\b' MonthFrontBetweenRegex = f'\\b{MonthSuffixRegex}\\s+((entre|entre\\s+el)\\s+)({DayRegex})\\s*{AndRegex}\\s*({DayRegex})((\\s+|\\s*,\\s*)(en\\s+|del\\s+|de\\s+)?{YearRegex})?\\b' DayBetweenRegex = f'\\b((entre|entre\\s+el)\\s+)({DayRegex})(\\s+{MonthSuffixRegex})?\\s*{AndRegex}\\s*({DayRegex})\\s+{MonthSuffixRegex}((\\s+|\\s*,\\s*)(en\\s+|del\\s+|de\\s+)?{YearRegex})?\\b' - OneWordPeriodRegex = f'\\b(((((la|el)\\s+)?mes\\s+(({OfPrepositionRegex})\\s+))|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\\s+))?({MonthRegex})|((la|el)\\s+)?((({RelativeRegex}\\s+){DateUnitRegex}(\\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\\s+{AfterNextSuffixRegex}))|va\\s+de\\s+{DateUnitRegex})' + OneWordPeriodRegex = f'\\b(((((la|el)\\s+)?mes\\s+(({OfPrepositionRegex})\\s+)?)|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\\s+))?({MonthRegex})|((la|el)\\s+)?((({RelativeRegex}\\s+){DateUnitRegex}(\\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\\s+{AfterNextSuffixRegex}))|va\\s+de\\s+{DateUnitRegex})' MonthWithYearRegex = f'\\b(((pr[oó]xim[oa](s)?|este|esta|[uú]ltim[oa]?)\\s+)?({MonthRegex})(\\s+|(\\s*[,-]\\s*))((de|del|de la)\\s+)?({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\\s+año))\\b' MonthNumWithYearRegex = f'({YearRegex}(\\s*?)[/\\-\\.~](\\s*?){MonthNumRegex})|({MonthNumRegex}(\\s*?)[/\\-\\.~](\\s*?){YearRegex})' WeekOfMonthRegex = f'(?(la\\s+)?(?primera?|1ra|segunda|2da|tercera?|3ra|cuarta|4ta|quinta|5ta|[uú]ltima)\\s+semana\\s+{MonthSuffixRegex})' @@ -54,7 +54,7 @@ class SpanishDateTime: QuarterRegex = f'(el\\s+)?{QuarterTermRegex}((\\s+del?|\\s*,\\s*)?\\s+({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\\s+a[ñn]o|a[ñn]o(\\s+{AfterNextSuffixRegex})))?' QuarterRegexYearFront = f'({YearRegex}|(?pr[oó]ximo(s)?|[uú]ltimo?|este)\\s+a[ñn]o)\\s+(el\\s+)?{QuarterTermRegex}' AllHalfYearRegex = f'^[.]' - EarlyPrefixRegex = f'\\b(?((comienzos|inicios)\\s+({OfPrepositionRegex})))\\b' + EarlyPrefixRegex = f'\\b(?((comienzos?|inicios?)\\s+({OfPrepositionRegex})))\\b' MidPrefixRegex = f'\\b(?(mediados\\s+({OfPrepositionRegex})))\\b' LaterPrefixRegex = f'\\b(?((fines|finales)\\s+({OfPrepositionRegex})))\\b' PrefixPeriodRegex = f'({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex})' @@ -89,6 +89,7 @@ class SpanishDateTime: OfMonthRegex = f'^\\s*de\\s*{MonthSuffixRegex}' MonthEndRegex = f'({MonthRegex}\\s*(el)?\\s*$)' WeekDayEnd = f'{WeekDayRegex}\\s*,?\\s*$' + WeekDayStart = f'^[\\.]' DateYearRegex = f'(?{YearRegex}|{TwoDigitYearRegex})' DateExtractor1 = f'\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}?((\\s*(de)|[/\\\\\\.\\-])\\s*)?{MonthRegex}\\b' DateExtractor2 = f'\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}\\s*([\\.\\-]|de)\\s*{MonthRegex}(\\s*,\\s*|\\s*(del?)\\s*){DateYearRegex}\\b' @@ -418,6 +419,7 @@ class SpanishDateTime: NextPrefixRegex = f'(pr[oó]xim[oa]|siguiente|{UpcomingPrefixRegex})\\b' PastPrefixRegex = f'.^' PreviousPrefixRegex = f'([uú]ltim[oa]|{PastPrefixRegex})\\b' + PreviousSuffixRegex = f'\\b(pasad[ao])\\b' ThisPrefixRegex = f'(est[ea])\\b' RelativeDayRegex = f'(?((este|pr[oó]ximo|([uú]ltim(o|as|os)))\\s+días)|(días\\s+((que\\s+viene)|pasado)))\\b' RestOfDateRegex = f'\\bresto\\s+((del|de)\\s+)?((la|el|est[ae])\\s+)?(?semana|mes|año|decada)(\\s+actual)?\\b' @@ -434,7 +436,7 @@ class SpanishDateTime: CommonDatePrefixRegex = f'^[\\.]' DurationUnitRegex = f'^[\\.]' DurationConnectorRegex = f'^[.]' - SuffixAfterRegex = f'^[.]' + SuffixAfterRegex = f'^[.](?!$)' YearPeriodRegex = f'^[.]' FutureSuffixRegex = f'\\b(despu[ée]s)\\b' WrittenDecades = dict([("", 0)]) diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/portuguese/extractors.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/portuguese/extractors.py index 41f2b39c88..2c2075fc76 100644 --- a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/portuguese/extractors.py +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/portuguese/extractors.py @@ -83,7 +83,7 @@ def __init__(self, culture_info: CultureInfo = None): super().__init__(culture_info) self._suffix_list = PortugueseNumericWithUnit.AgeSuffixList self._prefix_list = dict() - self._ambiguous_unit_list = list() + self._ambiguous_unit_list = PortugueseNumericWithUnit.AmbiguousAgeUnitList class PortugueseCurrencyExtractorConfiguration(PortugueseNumberWithUnitExtractorConfiguration): diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/spanish/extractors.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/spanish/extractors.py index f0c6b48f4a..bc8f896008 100644 --- a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/spanish/extractors.py +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/number_with_unit/spanish/extractors.py @@ -82,7 +82,7 @@ def __init__(self, culture_info: CultureInfo = None): super().__init__(culture_info) self._suffix_list = SpanishNumericWithUnit.AgeSuffixList self._prefix_list = dict() - self._ambiguous_unit_list = list() + self._ambiguous_unit_list = SpanishNumericWithUnit.AmbiguousAgeUnitList class SpanishCurrencyExtractorConfiguration(SpanishNumberWithUnitExtractorConfiguration): diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/portuguese_numeric_with_unit.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/portuguese_numeric_with_unit.py index 327b2ce207..43c5c11dbb 100644 --- a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/portuguese_numeric_with_unit.py +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/portuguese_numeric_with_unit.py @@ -18,6 +18,7 @@ class PortugueseNumericWithUnit: ("Mês", "meses|mes|mês"), ("Semana", "semanas|semana"), ("Dia", "dias|dia")]) + AmbiguousAgeUnitList = [r'anos', r'ano', r'meses', r'mes', r'mês', r'semanas', r'semana', r'dias', r'dia'] AreaSuffixList = dict([("Quilômetro quadrado", "quilômetro quadrado|quilómetro quadrado|quilometro quadrado|quilômetros quadrados|quilómetros quadrados|quilomeros quadrados|km2|km^2|km²"), ("Hectare", "hectômetro quadrado|hectómetro quadrado|hectômetros quadrados|hectómetros cuadrados|hm2|hm^2|hm²|hectare|hectares"), ("Decâmetro quadrado", "decâmetro quadrado|decametro quadrado|decâmetros quadrados|decametro quadrado|dam2|dam^2|dam²|are|ares"), diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/spanish_numeric_with_unit.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/spanish_numeric_with_unit.py index 0f9b9794f2..02948a8150 100644 --- a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/spanish_numeric_with_unit.py +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/spanish_numeric_with_unit.py @@ -18,6 +18,7 @@ class SpanishNumericWithUnit: ("Mes", "meses|mes"), ("Semana", "semanas|semana"), ("Día", "dias|días|día|dia")]) + AmbiguousAgeUnitList = [r'años', r'año', r'meses', r'mes', r'semanas', r'semana', r'dias', r'días', r'día', r'dia'] AreaSuffixList = dict([("Kilómetro cuadrado", "kilómetro cuadrado|kilómetros cuadrados|km2|km^2|km²"), ("Hectómetro cuadrado", "hectómetro cuadrado|hectómetros cuadrados|hm2|hm^2|hm²|hectárea|hectáreas"), ("Decámetro cuadrado", "decámetro cuadrado|decámetros cuadrados|dam2|dam^2|dam²|área|áreas"), diff --git a/Specs/DateTime/Spanish/DatePeriodExtractor.json b/Specs/DateTime/Spanish/DatePeriodExtractor.json index 48e91441a5..f782e4396d 100644 --- a/Specs/DateTime/Spanish/DatePeriodExtractor.json +++ b/Specs/DateTime/Spanish/DatePeriodExtractor.json @@ -4611,5 +4611,41 @@ "Length": 14 } ] + }, + { + "Input": "semana pasada", + "NotSupportedByDesign": "javascript, python", + "Results": [ + { + "Text": "semana pasada", + "Type": "daterange", + "Start": 0, + "Length": 13 + } + ] + }, + { + "Input": "la semana pasada", + "NotSupportedByDesign": "javascript, python", + "Results": [ + { + "Text": "la semana pasada", + "Type": "daterange", + "Start": 0, + "Length": 16 + } + ] + }, + { + "Input": "el mes pasado", + "NotSupportedByDesign": "javascript, python", + "Results": [ + { + "Text": "el mes pasado", + "Type": "daterange", + "Start": 0, + "Length": 13 + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Spanish/DatePeriodParser.json b/Specs/DateTime/Spanish/DatePeriodParser.json index 3822d62ffa..da1d99722f 100644 --- a/Specs/DateTime/Spanish/DatePeriodParser.json +++ b/Specs/DateTime/Spanish/DatePeriodParser.json @@ -6821,11 +6821,10 @@ "Context": { "ReferenceDateTime": "2018-08-08T00:00:00" }, - "NotSupported": "dotnet, java", "NotSupportedByDesign": "javascript, python", "Results": [ { - "Text": "semana del 18", + "Text": "la semana del 18", "Type": "daterange", "Value": { "Timex": "XXXX-XX-18", @@ -6838,8 +6837,8 @@ "endDate": "2018-07-23" } }, - "Start": 31, - "Length": 13 + "Start": 28, + "Length": 16 } ] }, @@ -6950,5 +6949,83 @@ "Length": 15 } ] + }, + { + "Input": "la semana pasada", + "Context": { + "ReferenceDateTime": "2019-08-21T17:00:00" + }, + "NotSupportedByDesign": "javascript, python", + "Results": [ + { + "Text": "la semana pasada", + "Type": "daterange", + "Value": { + "Timex": "2019-W33", + "FutureResolution": { + "startDate": "2019-08-12", + "endDate": "2019-08-19" + }, + "PastResolution": { + "startDate": "2019-08-12", + "endDate": "2019-08-19" + } + }, + "Start": 0, + "Length": 16 + } + ] + }, + { + "Input": "semana pasada", + "Context": { + "ReferenceDateTime": "2019-08-21T17:00:00" + }, + "NotSupportedByDesign": "javascript, python", + "Results": [ + { + "Text": "semana pasada", + "Type": "daterange", + "Value": { + "Timex": "2019-W33", + "FutureResolution": { + "startDate": "2019-08-12", + "endDate": "2019-08-19" + }, + "PastResolution": { + "startDate": "2019-08-12", + "endDate": "2019-08-19" + } + }, + "Start": 0, + "Length": 13 + } + ] + }, + { + "Input": "el mes pasado", + "Context": { + "ReferenceDateTime": "2019-08-21T17:00:00" + }, + "NotSupportedByDesign": "javascript, python", + "Results": [ + { + "Text": "el mes pasado", + "Type": "daterange", + "Value": { + "Timex": "2019-07", + "FutureResolution": { + "startDate": "2019-07-01", + "endDate": "2019-08-01" + }, + "PastResolution": { + "startDate": "2019-07-01", + "endDate": "2019-08-01" + } + }, + "Start": 0, + "Length": 13 + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Spanish/DateTimeModel.json b/Specs/DateTime/Spanish/DateTimeModel.json index e72181e782..b0ab61c06a 100644 --- a/Specs/DateTime/Spanish/DateTimeModel.json +++ b/Specs/DateTime/Spanish/DateTimeModel.json @@ -486,5 +486,105 @@ }, "NotSupported": "python", "Results": [] + }, + { + "Input": "semana pasada.", + "Context": { + "ReferenceDateTime": "2019-08-23T01:00:00" + }, + "NotSupported": "python, javascript", + "Results": [ + { + "Text": "semana pasada", + "Start": 0, + "End": 12, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "2019-W33", + "type": "daterange", + "start": "2019-08-12", + "end": "2019-08-19" + } + ] + } + } + ] + }, + { + "Input": "la semana pasada.", + "Context": { + "ReferenceDateTime": "2019-08-23T01:00:00" + }, + "NotSupported": "python, javascript", + "Results": [ + { + "Text": "la semana pasada", + "Start": 0, + "End": 15, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "2019-W33", + "type": "daterange", + "start": "2019-08-12", + "end": "2019-08-19" + } + ] + } + } + ] + }, + { + "Input": "el mes pasado", + "Context": { + "ReferenceDateTime": "2019-08-23T01:00:00" + }, + "NotSupported": "python, javascript", + "Results": [ + { + "Text": "el mes pasado", + "Start": 0, + "End": 12, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "2019-07", + "type": "daterange", + "start": "2019-07-01", + "end": "2019-08-01" + } + ] + } + } + ] + }, + { + "Input": "mes pasado", + "Context": { + "ReferenceDateTime": "2019-08-23T01:00:00" + }, + "NotSupported": "python, javascript", + "Results": [ + { + "Text": "mes pasado", + "Start": 0, + "End": 9, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "2019-07", + "type": "daterange", + "start": "2019-07-01", + "end": "2019-08-01" + } + ] + } + } + ] } ] \ No newline at end of file diff --git a/Specs/NumberWithUnit/German/AgeModel.json b/Specs/NumberWithUnit/German/AgeModel.json index c66f5ae4d4..c370ff480e 100644 --- a/Specs/NumberWithUnit/German/AgeModel.json +++ b/Specs/NumberWithUnit/German/AgeModel.json @@ -219,5 +219,11 @@ } } ] + }, + { + "Input": "Woche oder Wochen", + "NotSupportedByDesign": "python", + "NotSupported": "javascript", + "Results": [] } ] \ No newline at end of file diff --git a/Specs/NumberWithUnit/Portuguese/AgeModel.json b/Specs/NumberWithUnit/Portuguese/AgeModel.json index 8941dfe290..a33700aee6 100644 --- a/Specs/NumberWithUnit/Portuguese/AgeModel.json +++ b/Specs/NumberWithUnit/Portuguese/AgeModel.json @@ -254,5 +254,9 @@ } } ] + }, + { + "Input": "Semana ou semanas", + "Results": [] } ] \ No newline at end of file diff --git a/Specs/NumberWithUnit/Spanish/AgeModel.json b/Specs/NumberWithUnit/Spanish/AgeModel.json index 5f949554a7..ba5381114a 100644 --- a/Specs/NumberWithUnit/Spanish/AgeModel.json +++ b/Specs/NumberWithUnit/Spanish/AgeModel.json @@ -254,5 +254,9 @@ } } ] + }, + { + "Input": "semana o semanas", + "Results": [] } ] \ No newline at end of file