Skip to content

Commit

Permalink
[ES DateTimeV2] Fix problem recognizing entities like "la semana pasa…
Browse files Browse the repository at this point in the history
…da" (#1818)

Also, add a workaround for Age false positives in DE/ES/PT.
  • Loading branch information
Grey0202 authored and tellarin committed Aug 26, 2019
1 parent a640186 commit 6f9a886
Show file tree
Hide file tree
Showing 46 changed files with 407 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ public static class NumbersWithUnitDefinitions
{ @"Week", @"woche alt|wochen alt|wochen|woche" },
{ @"Day", @"tag alt|tage alt|tagen|tage" }
};
public static readonly IList<string> AmbiguousAgeUnitList = new List<string>
{
@"jahren",
@"jahre",
@"monaten",
@"monate",
@"wochen",
@"woche",
@"tagen",
@"tage"
};
public static readonly Dictionary<string, string> AreaSuffixList = new Dictionary<string, string>
{
{ @"Square kilometer", @"qkm|quadratkilometer|km^2|km²" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ public static class NumbersWithUnitDefinitions
{ @"Semana", @"semanas|semana" },
{ @"Dia", @"dias|dia" }
};
public static readonly IList<string> AmbiguousAgeUnitList = new List<string>
{
@"anos",
@"ano",
@"meses",
@"mes",
@"mês",
@"semanas",
@"semana",
@"dias",
@"dia"
};
public static readonly Dictionary<string, string> AreaSuffixList = new Dictionary<string, string>
{
{ @"Quilômetro quadrado", @"quilômetro quadrado|quilómetro quadrado|quilometro quadrado|quilômetros quadrados|quilómetros quadrados|quilomeros quadrados|km2|km^2|km²" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public static class DateTimeDefinitions
public static readonly string MonthFrontSimpleCasesRegex = $@"\b{MonthSuffixRegex}\s+((desde\s+el|desde|del)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*)(en\s+|del\s+|de\s+)?{YearRegex})?\b";
public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+((entre|entre\s+el)\s+)({DayRegex})\s*{AndRegex}\s*({DayRegex})((\s+|\s*,\s*)(en\s+|del\s+|de\s+)?{YearRegex})?\b";
public static readonly string DayBetweenRegex = $@"\b((entre|entre\s+el)\s+)({DayRegex})(\s+{MonthSuffixRegex})?\s*{AndRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*)(en\s+|del\s+|de\s+)?{YearRegex})?\b";
public static readonly string OneWordPeriodRegex = $@"\b(((((la|el)\s+)?mes\s+(({OfPrepositionRegex})\s+))|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\s+))?({MonthRegex})|((la|el)\s+)?((({RelativeRegex}\s+){DateUnitRegex}(\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\s+{AfterNextSuffixRegex}))|va\s+de\s+{DateUnitRegex})";
public static readonly string OneWordPeriodRegex = $@"\b(((((la|el)\s+)?mes\s+(({OfPrepositionRegex})\s+)?)|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\s+))?({MonthRegex})|((la|el)\s+)?((({RelativeRegex}\s+){DateUnitRegex}(\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\s+{AfterNextSuffixRegex}))|va\s+de\s+{DateUnitRegex})";
public static readonly string MonthWithYearRegex = $@"\b(((pr[oó]xim[oa](s)?|este|esta|[uú]ltim[oa]?)\s+)?({MonthRegex})(\s+|(\s*[,-]\s*))((de|del|de la)\s+)?({YearRegex}|(?<order>pr[oó]ximo(s)?|[uú]ltimo?|este)\s+año))\b";
public static readonly string MonthNumWithYearRegex = $@"({YearRegex}(\s*?)[/\-\.~](\s*?){MonthNumRegex})|({MonthNumRegex}(\s*?)[/\-\.~](\s*?){YearRegex})";
public static readonly string WeekOfMonthRegex = $@"(?<wom>(la\s+)?(?<cardinal>primera?|1ra|segunda|2da|tercera?|3ra|cuarta|4ta|quinta|5ta|[uú]ltima)\s+semana\s+{MonthSuffixRegex})";
Expand All @@ -61,7 +61,7 @@ public static class DateTimeDefinitions
public static readonly string QuarterRegex = $@"(el\s+)?{QuarterTermRegex}((\s+del?|\s*,\s*)?\s+({YearRegex}|(?<order>pr[oó]ximo(s)?|[uú]ltimo?|este)\s+a[ñn]o|a[ñn]o(\s+{AfterNextSuffixRegex})))?";
public static readonly string QuarterRegexYearFront = $@"({YearRegex}|(?<order>pr[oó]ximo(s)?|[uú]ltimo?|este)\s+a[ñn]o)\s+(el\s+)?{QuarterTermRegex}";
public const string AllHalfYearRegex = @"^[.]";
public static readonly string EarlyPrefixRegex = $@"\b(?<EarlyPrefix>((comienzos|inicios)\s+({OfPrepositionRegex})))\b";
public static readonly string EarlyPrefixRegex = $@"\b(?<EarlyPrefix>((comienzos?|inicios?)\s+({OfPrepositionRegex})))\b";
public static readonly string MidPrefixRegex = $@"\b(?<MidPrefix>(mediados\s+({OfPrepositionRegex})))\b";
public static readonly string LaterPrefixRegex = $@"\b(?<LatePrefix>((fines|finales)\s+({OfPrepositionRegex})))\b";
public static readonly string PrefixPeriodRegex = $@"({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex})";
Expand Down Expand Up @@ -462,6 +462,7 @@ public static class DateTimeDefinitions
public static readonly string NextPrefixRegex = $@"(pr[oó]xim[oa]|siguiente|{UpcomingPrefixRegex})\b";
public const string PastPrefixRegex = @".^";
public static readonly string PreviousPrefixRegex = $@"([uú]ltim[oa]|{PastPrefixRegex})\b";
public const string PreviousSuffixRegex = @"\b(pasad[ao])\b";
public const string ThisPrefixRegex = @"(est[ea])\b";
public const string RelativeDayRegex = @"(?<relday>((este|pr[oó]ximo|([uú]ltim(o|as|os)))\s+días)|(días\s+((que\s+viene)|pasado)))\b";
public const string RestOfDateRegex = @"\bresto\s+((del|de)\s+)?((la|el|est[ae])\s+)?(?<duration>semana|mes|año|decada)(\s+actual)?\b";
Expand All @@ -478,7 +479,7 @@ public static class DateTimeDefinitions
public const string CommonDatePrefixRegex = @"^[\.]";
public const string DurationUnitRegex = @"^[\.]";
public const string DurationConnectorRegex = @"^[.]";
public const string SuffixAfterRegex = @"^[.]";
public const string SuffixAfterRegex = @"^[.](?!$)";
public const string YearPeriodRegex = @"^[.]";
public const string FutureSuffixRegex = @"\b(despu[ée]s)\b";
public static readonly Dictionary<string, int> WrittenDecades = new Dictionary<string, int>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@ public static class NumbersWithUnitDefinitions
{ @"Semana", @"semanas|semana" },
{ @"Día", @"dias|días|día|dia" }
};
public static readonly IList<string> AmbiguousAgeUnitList = new List<string>
{
@"años",
@"año",
@"meses",
@"mes",
@"semanas",
@"semana",
@"dias",
@"días",
@"día",
@"dia"
};
public static readonly Dictionary<string, string> AreaSuffixList = new Dictionary<string, string>
{
{ @"Kilómetro cuadrado", @"kilómetro cuadrado|kilómetros cuadrados|km2|km^2|km²" },
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
using System.Collections.Immutable;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Linq;
using System.Text.RegularExpressions;

Expand All @@ -15,9 +17,15 @@ public class SpanishDatePeriodParserConfiguration : BaseDateTimeOptionsConfigura
public static readonly Regex PreviousPrefixRegex =
new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags);

public static readonly Regex PreviousSuffixRegex =
new Regex(DateTimeDefinitions.PreviousSuffixRegex, RegexFlags);

public static readonly Regex ThisPrefixRegex =
new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags);

public static readonly Regex AfterNextSuffixRegex =
new Regex(DateTimeDefinitions.AfterNextSuffixRegex, RegexFlags);

public static readonly Regex RelativeRegex =
new Regex(DateTimeDefinitions.RelativeRegex, RegexFlags);

Expand Down Expand Up @@ -223,7 +231,7 @@ public int GetSwiftDayOrMonth(string text)
swift = 1;
}

if (PreviousPrefixRegex.IsMatch(trimmedText))
if (PreviousPrefixRegex.IsMatch(trimmedText) || PreviousSuffixRegex.IsMatch(trimmedText))
{
swift = -1;
}
Expand Down Expand Up @@ -267,7 +275,8 @@ public bool IsLastCardinal(string text)
public bool IsMonthOnly(string text)
{
var trimmedText = text.Trim();
return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o));
return DateTimeDefinitions.MonthTerms.Any(o => trimmedText.EndsWith(o)) ||
(DateTimeDefinitions.MonthTerms.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText));
}

public bool IsMonthToDate(string text)
Expand All @@ -279,20 +288,23 @@ public bool IsMonthToDate(string text)
public bool IsWeekend(string text)
{
var trimmedText = text.Trim();
return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o));
return DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o)) ||
(DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText));
}

public bool IsWeekOnly(string text)
{
var trimmedText = text.Trim();
return DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)) &&
!DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.EndsWith(o));
return (DateTimeDefinitions.WeekTerms.Any(o => trimmedText.EndsWith(o)) ||
(DateTimeDefinitions.WeekTerms.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText))) &&
!DateTimeDefinitions.WeekendTerms.Any(o => trimmedText.Contains(o));
}

public bool IsYearOnly(string text)
{
var trimmedText = text.Trim();
return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o));
return DateTimeDefinitions.YearTerms.Any(o => trimmedText.EndsWith(o)) ||
(DateTimeDefinitions.YearTerms.Any(o => trimmedText.Contains(o)) && AfterNextSuffixRegex.IsMatch(trimmedText));
}

public bool IsYearToDate(string text)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ public class AgeExtractorConfiguration : GermanNumberWithUnitExtractorConfigurat
{
public static readonly ImmutableDictionary<string, string> AgeSuffixList = NumbersWithUnitDefinitions.AgeSuffixList.ToImmutableDictionary();

public static readonly ImmutableList<string> AmbiguousAgeUnitList = NumbersWithUnitDefinitions.AmbiguousAgeUnitList.ToImmutableList();

public AgeExtractorConfiguration()
: this(new CultureInfo(Culture.German))
{
Expand All @@ -23,7 +25,7 @@ public AgeExtractorConfiguration(CultureInfo ci)

public override ImmutableDictionary<string, string> PrefixList => null;

public override ImmutableList<string> AmbiguousUnitList => null;
public override ImmutableList<string> AmbiguousUnitList => AmbiguousAgeUnitList;

public override string ExtractType => Constants.SYS_UNIT_AGE;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ public class AgeExtractorConfiguration : PortugueseNumberWithUnitExtractorConfig
{
public static readonly ImmutableDictionary<string, string> AgeSuffixList = NumbersWithUnitDefinitions.AgeSuffixList.ToImmutableDictionary();

public static readonly ImmutableList<string> AmbiguousAgeUnitList = NumbersWithUnitDefinitions.AmbiguousAgeUnitList.ToImmutableList();

public AgeExtractorConfiguration()
: this(new CultureInfo(Culture.Portuguese))
{
Expand All @@ -23,7 +25,7 @@ public AgeExtractorConfiguration(CultureInfo ci)

public override ImmutableDictionary<string, string> PrefixList => null;

public override ImmutableList<string> AmbiguousUnitList => null;
public override ImmutableList<string> AmbiguousUnitList => AmbiguousAgeUnitList;

public override string ExtractType => Constants.SYS_UNIT_AGE;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ public class AgeExtractorConfiguration : SpanishNumberWithUnitExtractorConfigura
{
public static readonly ImmutableDictionary<string, string> AgeSuffixList = NumbersWithUnitDefinitions.AgeSuffixList.ToImmutableDictionary();

public static readonly ImmutableList<string> AmbiguousAgeUnitList = NumbersWithUnitDefinitions.AmbiguousAgeUnitList.ToImmutableList();

public AgeExtractorConfiguration()
: this(new CultureInfo(Culture.Spanish))
{
Expand All @@ -23,7 +25,7 @@ public AgeExtractorConfiguration(CultureInfo ci)

public override ImmutableDictionary<string, string> PrefixList => null;

public override ImmutableList<string> AmbiguousUnitList => null;
public override ImmutableList<string> AmbiguousUnitList => AmbiguousAgeUnitList;

public override string ExtractType => Constants.SYS_UNIT_AGE;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

using Microsoft.Recognizers.Definitions;
using Microsoft.Recognizers.Definitions.Spanish;
using Microsoft.Recognizers.Definitions.Utilities;
using Microsoft.Recognizers.Text.Number;
using Microsoft.Recognizers.Text.Number.Spanish;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,8 @@ public class EnglishDateTime {
public static final String WeekDayEnd = "(this\\s+)?{WeekDayRegex}\\s*,?\\s*$"
.replace("{WeekDayRegex}", WeekDayRegex);

public static final String WeekDayStart = "^[\\.]";

public static final String RangeUnitRegex = "\\b(?<unit>years?|months?|weeks?)\\b";

public static final String HourNumRegex = "\\b(?<hournum>zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\\b";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ public class FrenchDateTime {
public static final String WeekDayEnd = "{WeekDayRegex}\\s*,?\\s*$"
.replace("{WeekDayRegex}", WeekDayRegex);

public static final String WeekDayStart = "^[\\.]";

public static final String RangeUnitRegex = "\\b(?<unit>(l')?ann[eé]e(s)?|mois|semaines?)\\b";

public static final String HourNumRegex = "\\b(?<hournum>zero|[aá]\\s+une?|deux|trois|quatre|cinq|six|sept|huit|neuf|onze|douze|treize|quatorze|quinze|dix-six|dix-sept|dix-huit|dix-neuf|vingt|vingt-et-un|vingt-deux|vingt-trois|dix)\\b";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ public class PortugueseDateTime {
public static final String WeekDayEnd = "{WeekDayRegex}\\s*,?\\s*$"
.replace("{WeekDayRegex}", WeekDayRegex);

public static final String WeekDayStart = "^[\\.]";

public static final String DateYearRegex = "(?<year>{YearRegex}|{TwoDigitYearRegex})"
.replace("{YearRegex}", YearRegex)
.replace("{TwoDigitYearRegex}", TwoDigitYearRegex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public class SpanishDateTime {
.replace("{MonthSuffixRegex}", MonthSuffixRegex)
.replace("{YearRegex}", YearRegex);

public static final String OneWordPeriodRegex = "\\b(((((la|el)\\s+)?mes\\s+(({OfPrepositionRegex})\\s+))|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\\s+))?({MonthRegex})|((la|el)\\s+)?((({RelativeRegex}\\s+){DateUnitRegex}(\\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\\s+{AfterNextSuffixRegex}))|va\\s+de\\s+{DateUnitRegex})"
public static final String OneWordPeriodRegex = "\\b(((((la|el)\\s+)?mes\\s+(({OfPrepositionRegex})\\s+)?)|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\\s+))?({MonthRegex})|((la|el)\\s+)?((({RelativeRegex}\\s+){DateUnitRegex}(\\s+{AfterNextSuffixRegex})?)|{DateUnitRegex}(\\s+{AfterNextSuffixRegex}))|va\\s+de\\s+{DateUnitRegex})"
.replace("{MonthRegex}", MonthRegex)
.replace("{RelativeRegex}", RelativeRegex)
.replace("{OfPrepositionRegex}", OfPrepositionRegex)
Expand Down Expand Up @@ -148,7 +148,7 @@ public class SpanishDateTime {

public static final String AllHalfYearRegex = "^[.]";

public static final String EarlyPrefixRegex = "\\b(?<EarlyPrefix>((comienzos|inicios)\\s+({OfPrepositionRegex})))\\b"
public static final String EarlyPrefixRegex = "\\b(?<EarlyPrefix>((comienzos?|inicios?)\\s+({OfPrepositionRegex})))\\b"
.replace("{OfPrepositionRegex}", OfPrepositionRegex);

public static final String MidPrefixRegex = "\\b(?<MidPrefix>(mediados\\s+({OfPrepositionRegex})))\\b"
Expand Down Expand Up @@ -238,6 +238,8 @@ public class SpanishDateTime {
public static final String WeekDayEnd = "{WeekDayRegex}\\s*,?\\s*$"
.replace("{WeekDayRegex}", WeekDayRegex);

public static final String WeekDayStart = "^[\\.]";

public static final String DateYearRegex = "(?<year>{YearRegex}|{TwoDigitYearRegex})"
.replace("{YearRegex}", YearRegex)
.replace("{TwoDigitYearRegex}", TwoDigitYearRegex);
Expand Down Expand Up @@ -890,6 +892,8 @@ public class SpanishDateTime {
public static final String PreviousPrefixRegex = "([uú]ltim[oa]|{PastPrefixRegex})\\b"
.replace("{PastPrefixRegex}", PastPrefixRegex);

public static final String PreviousSuffixRegex = "\\b(pasad[ao])\\b";

public static final String ThisPrefixRegex = "(est[ea])\\b";

public static final String RelativeDayRegex = "(?<relday>((este|pr[oó]ximo|([uú]ltim(o|as|os)))\\s+días)|(días\\s+((que\\s+viene)|pasado)))\\b";
Expand Down Expand Up @@ -924,7 +928,7 @@ public class SpanishDateTime {

public static final String DurationConnectorRegex = "^[.]";

public static final String SuffixAfterRegex = "^[.]";
public static final String SuffixAfterRegex = "^[.](?!$)";

public static final String YearPeriodRegex = "^[.]";

Expand Down
Loading

0 comments on commit 6f9a886

Please sign in to comment.