Skip to content

Commit

Permalink
[EN DateTimeV2] Avoid some wrong recognize, like "3 this" (#1790)
Browse files Browse the repository at this point in the history
  • Loading branch information
songwenhao1 authored and tellarin committed Aug 8, 2019
1 parent 246cd02 commit 79c92e9
Show file tree
Hide file tree
Showing 17 changed files with 295 additions and 60 deletions.
Original file line number Diff line number Diff line change
@@ -1,66 +1,68 @@
using System.Collections.Generic;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions.Dutch;

namespace Microsoft.Recognizers.Text.DateTime.Dutch
{
public class DutchDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration
{
public static readonly Regex ThisPrefixRegex =
new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags);

public static readonly Regex PreviousPrefixRegex =
new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags);

public static readonly Regex NextPrefixRegex =
new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags);

public static readonly Regex AmRegex =
new Regex(DateTimeDefinitions.AmRegex, RegexFlags);

public static readonly Regex PmRegex =
new Regex(DateTimeDefinitions.PmRegex, RegexFlags);

public static readonly Regex RangePrefixRegex =
new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags);

public static readonly Regex[] RelativePrefixList =
{
ThisPrefixRegex, PreviousPrefixRegex, NextPrefixRegex,
};

public static readonly Regex[] AmPmRegexList =
{
AmRegex, PmRegex,
};

using System.Collections.Generic;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Definitions.Dutch;

namespace Microsoft.Recognizers.Text.DateTime.Dutch
{
public class DutchDateTimeAltExtractorConfiguration : BaseDateTimeOptionsConfiguration, IDateTimeAltExtractorConfiguration
{
public static readonly Regex ThisPrefixRegex =
new Regex(DateTimeDefinitions.ThisPrefixRegex, RegexFlags);

public static readonly Regex PreviousPrefixRegex =
new Regex(DateTimeDefinitions.PreviousPrefixRegex, RegexFlags);

public static readonly Regex NextPrefixRegex =
new Regex(DateTimeDefinitions.NextPrefixRegex, RegexFlags);

public static readonly Regex AmRegex =
new Regex(DateTimeDefinitions.AmRegex, RegexFlags);

public static readonly Regex PmRegex =
new Regex(DateTimeDefinitions.PmRegex, RegexFlags);

public static readonly Regex RangePrefixRegex =
new Regex(DateTimeDefinitions.RangePrefixRegex, RegexFlags);

public static readonly Regex[] RelativePrefixList =
{
ThisPrefixRegex, PreviousPrefixRegex, NextPrefixRegex,
};

public static readonly Regex[] AmPmRegexList =
{
AmRegex, PmRegex,
};

private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture;

private static readonly Regex OrRegex =
new Regex(DateTimeDefinitions.OrRegex, RegexFlags);

private static readonly Regex DayRegex =
new Regex(DateTimeDefinitions.OrRegex, RegexFlags);

private static readonly Regex DayRegex =
new Regex(DateTimeDefinitions.DayRegex, RegexFlags);

public DutchDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration config)
: base(config)
{
DateExtractor = new BaseDateExtractor(new DutchDateExtractorConfiguration(this));
DatePeriodExtractor = new BaseDatePeriodExtractor(new DutchDatePeriodExtractorConfiguration(this));
: base(config)
{
DateExtractor = new BaseDateExtractor(new DutchDateExtractorConfiguration(this));
DatePeriodExtractor = new BaseDatePeriodExtractor(new DutchDatePeriodExtractorConfiguration(this));
}

public IDateExtractor DateExtractor { get; }

public IDateTimeExtractor DatePeriodExtractor { get; }

IEnumerable<Regex> IDateTimeAltExtractorConfiguration.RelativePrefixList => RelativePrefixList;

IEnumerable<Regex> IDateTimeAltExtractorConfiguration.AmPmRegexList => AmPmRegexList;

Regex IDateTimeAltExtractorConfiguration.OrRegex => OrRegex;

Regex IDateTimeAltExtractorConfiguration.DayRegex => DayRegex;

Regex IDateTimeAltExtractorConfiguration.RangePrefixRegex => RangePrefixRegex;
}
public IDateTimeExtractor DatePeriodExtractor { get; }

IEnumerable<Regex> IDateTimeAltExtractorConfiguration.RelativePrefixList => RelativePrefixList;

IEnumerable<Regex> IDateTimeAltExtractorConfiguration.AmPmRegexList => AmPmRegexList;

Regex IDateTimeAltExtractorConfiguration.OrRegex => OrRegex;

Regex IDateTimeAltExtractorConfiguration.ThisPrefixRegex => ThisPrefixRegex;

Regex IDateTimeAltExtractorConfiguration.DayRegex => DayRegex;

Regex IDateTimeAltExtractorConfiguration.RangePrefixRegex => RangePrefixRegex;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ public EnglishDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration co

Regex IDateTimeAltExtractorConfiguration.OrRegex => OrRegex;

Regex IDateTimeAltExtractorConfiguration.ThisPrefixRegex => ThisPrefixRegex;

Regex IDateTimeAltExtractorConfiguration.DayRegex => DayRegex;

Regex IDateTimeAltExtractorConfiguration.RangePrefixRegex => RangePrefixRegex;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,12 @@ private void ResolveImplicitRelativeDatePeriod(List<ExtractResult> ers, string t
relativeTermsMatches.AddRange(regex.Matches(text).Cast<Match>());
}

// Filtered out if there is no relative term or the only one found is "this". Like "3 this"
if (relativeTermsMatches.Count == 0 || (relativeTermsMatches.Count == 1 && config.ThisPrefixRegex.IsMatch(relativeTermsMatches[0].Value)))
{
return;
}

// Remove overlapping matches
relativeTermsMatches.RemoveAll(m =>
ers.Any(e => e.Start <= m.Index && e.Start + e.Length >= m.Index + m.Length));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ public interface IDateTimeAltExtractorConfiguration

Regex OrRegex { get; }

Regex ThisPrefixRegex { get; }

Regex DayRegex { get; }

Regex RangePrefixRegex { get; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ public FrenchDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration con

Regex IDateTimeAltExtractorConfiguration.OrRegex => OrRegex;

Regex IDateTimeAltExtractorConfiguration.ThisPrefixRegex => ThisPrefixRegex;

Regex IDateTimeAltExtractorConfiguration.DayRegex => DayRegex;

Regex IDateTimeAltExtractorConfiguration.RangePrefixRegex => RangePrefixRegex;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ public GermanDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration con

Regex IDateTimeAltExtractorConfiguration.OrRegex => OrRegex;

Regex IDateTimeAltExtractorConfiguration.ThisPrefixRegex => ThisPrefixRegex;

Regex IDateTimeAltExtractorConfiguration.DayRegex => DayRegex;

Regex IDateTimeAltExtractorConfiguration.RangePrefixRegex => RangePrefixRegex;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ public ItalianDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration co

Regex IDateTimeAltExtractorConfiguration.OrRegex => OrRegex;

Regex IDateTimeAltExtractorConfiguration.ThisPrefixRegex => ThisPrefixRegex;

Regex IDateTimeAltExtractorConfiguration.DayRegex => DayRegex;

Regex IDateTimeAltExtractorConfiguration.RangePrefixRegex => RangePrefixRegex;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ public PortugueseDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration

Regex IDateTimeAltExtractorConfiguration.OrRegex => OrRegex;

Regex IDateTimeAltExtractorConfiguration.ThisPrefixRegex => ThisPrefixRegex;

Regex IDateTimeAltExtractorConfiguration.DayRegex => DayRegex;

Regex IDateTimeAltExtractorConfiguration.RangePrefixRegex => RangePrefixRegex;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ public SpanishDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration co

Regex IDateTimeAltExtractorConfiguration.OrRegex => OrRegex;

Regex IDateTimeAltExtractorConfiguration.ThisPrefixRegex => ThisPrefixRegex;

Regex IDateTimeAltExtractorConfiguration.DayRegex => DayRegex;

Regex IDateTimeAltExtractorConfiguration.RangePrefixRegex => RangePrefixRegex;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ public TurkishDateTimeAltExtractorConfiguration(IDateTimeOptionsConfiguration co

Regex IDateTimeAltExtractorConfiguration.OrRegex => OrRegex;

Regex IDateTimeAltExtractorConfiguration.ThisPrefixRegex => ThisPrefixRegex;

Regex IDateTimeAltExtractorConfiguration.DayRegex => DayRegex;

Regex IDateTimeAltExtractorConfiguration.RangePrefixRegex => RangePrefixRegex;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ public Pattern getOrRegex() {
return OrRegex;
}

@Override
public Pattern getThisPrefixRegex() {
return ThisPrefixRegex;
}

@Override
public Pattern getDayRegex() {
return DayRegex;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,15 @@ private List<ExtractResult> resolveImplicitRelativeDatePeriod(List<ExtractResult
relativeTermsMatches.addAll(Arrays.asList(RegExpUtility.getMatches(regex, text)));
}

List<ExtractResult> results = new ArrayList<>();

// Filtered out if there is no relative term or the only one found is "this". Like "3 this"
if (relativeTermsMatches.size() == 0 || (relativeTermsMatches.size() == 1 &&
RegExpUtility.getMatches(config.getOrRegex(), relativeTermsMatches.get(0).value).length > 0)) {
results.addAll(ers);
return results;
}

List<ExtractResult> relativeDatePeriodErs = new ArrayList<>();
int i = 0;
for (ExtractResult result : ers.toArray(new ExtractResult[0])) {
Expand Down Expand Up @@ -255,12 +264,11 @@ private List<ExtractResult> resolveImplicitRelativeDatePeriod(List<ExtractResult
i++;
}

List<ExtractResult> result = new ArrayList<>();
result.addAll(ers);
result.addAll(relativeDatePeriodErs);
result.sort(Comparator.comparingInt(er -> er.getStart()));
results.addAll(ers);
results.addAll(relativeDatePeriodErs);
results.sort(Comparator.comparingInt(er -> er.getStart()));

return result;
return results;
}

private boolean isConnectorOrWhiteSpace(int start, int end, String text) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ public interface IDateTimeAltExtractorConfiguration {

Pattern getOrRegex();

Pattern getThisPrefixRegex();

Pattern getDayRegex();

Pattern getRangePrefixRegex();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ public Pattern getOrRegex() {
return OrRegex;
}

@Override
public Pattern getThisPrefixRegex() {
return ThisPrefixRegex;
}

@Override
public Pattern getDayRegex() {
return DayRegex;
Expand Down
51 changes: 51 additions & 0 deletions Specs/DateTime/English/DateTimeModel.json
Original file line number Diff line number Diff line change
Expand Up @@ -13851,5 +13851,56 @@
},
"NotSupported": "javascript, python",
"Results": []
},
{
"Input": "ABC-12345-A1B2C3 this is yet to be submitted",
"Context": {
"ReferenceDateTime": "2019-08-08T00:00:00"
},
"Results": []
},
{
"Input": "mar3 this week or next",
"Context": {
"ReferenceDateTime": "2019-08-08T00:00:00"
},
"Results": [
{
"Text": "mar3",
"Start": 0,
"End": 3,
"TypeName": "datetimeV2.date",
"Resolution": {
"values": [
{
"timex": "XXXX-03-03",
"type": "date",
"value": "2019-03-03"
},
{
"timex": "XXXX-03-03",
"type": "date",
"value": "2020-03-03"
}
]
}
},
{
"Text": "this week",
"Start": 5,
"End": 13,
"TypeName": "datetimeV2.daterange",
"Resolution": {
"values": [
{
"timex": "2019-W32",
"type": "daterange",
"start": "2019-08-05",
"end": "2019-08-12"
}
]
}
}
]
}
]
Loading

0 comments on commit 79c92e9

Please sign in to comment.