Skip to content

Commit

Permalink
Revise BaseNumberExtractor to avoid double extract (#1647)
Browse files Browse the repository at this point in the history
* Revise BaseNumberExtractor to avoid double extract
  • Loading branch information
Sanxing Chen authored and tellarin committed Jun 19, 2019
1 parent ce8ed99 commit 1b81155
Show file tree
Hide file tree
Showing 8 changed files with 13 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ public static class URLDefinitions
public static readonly string UrlRegex = $@"{UrlPrefixRegex}(?<Tld>[a-zA-Z]{{2,18}}){BaseURL.UrlSuffixRegex}";
public static readonly string IpUrlRegex = $@"(?<IPurl>({ExtractionRestrictionRegex}{BaseURL.ProtocolRegex}({BaseIp.Ipv4Regex}|localhost){BaseURL.UrlSuffixRegex}))";
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public static class NumbersDefinitions
public static readonly string AllIntRegexWithDozenSuffixLocks = $@"(?<=\b)(((half\s+)?a\s+dozen)|({AllIntRegex}\s+dozen(s)?))(?=\b)";
public const string RoundNumberOrdinalRegex = @"(hundredth|thousandth|millionth|billionth|trillionth)";
public const string NumberOrdinalRegex = @"(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)";
public const string RelativeOrdinalRegex = @"((next|previous) one|(the second|next) to last|the one before the last( one)?|the last but one|(ante)?penultimate|last|next|previous)";
public const string RelativeOrdinalRegex = @"(?<relativeOrdinal>(next|previous)\s+one|(the\s+second|next)\s+to\s+last|the\s+one\s+before\s+the\s+last(\s+one)?|the\s+last\s+but\s+one|(ante)?penultimate|last|next|previous)";
public static readonly string BasicOrdinalRegex = $@"({NumberOrdinalRegex}|{RelativeOrdinalRegex})";
public static readonly string SuffixBasicOrdinalRegex = $@"((((({TensNumberIntegerRegex}(\s+(and\s+)?|\s*-\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{AnIntRegex})(\s+{RoundNumberIntegerRegex})+)\s+(and\s+)?)*({TensNumberIntegerRegex}(\s+|\s*-\s*))?{BasicOrdinalRegex})";
public static readonly string SuffixRoundNumberOrdinalRegex = $@"(({AllIntRegex}\s+){RoundNumberOrdinalRegex})";
Expand Down
3 changes: 3 additions & 0 deletions .NET/Microsoft.Recognizers.Text.Number/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ public static class Constants
public const string RELATIVE_END = "end";
public const string RELATIVE_CURRENT = "current";

// Groups' names for named groups in regexes
public const string RelativeOrdinalGroupName = "relativeOrdinal";

// Number subtypes
public const string INTEGER = "integer";
public const string DECIMAL = "decimal";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public virtual List<ExtractResult> Extract(string source)
}

// If SuppressExtendedTypes is on, cases like "last", "next" should be skipped
if ((Options & NumberOptions.SuppressExtendedTypes) != 0 && IsRelativeOrdinal(m.Value))
if ((Options & NumberOptions.SuppressExtendedTypes) != 0 && m.Groups[Constants.RelativeOrdinalGroupName].Success)
{
continue;
}
Expand Down Expand Up @@ -83,8 +83,8 @@ public virtual List<ExtractResult> Extract(string source)

if (matchSource.Keys.Any(o => o.Index == start && o.Length == length))
{
var type = matchSource.Where(p => p.Key.Index == start && p.Key.Length == length)
.Select(p => (p.Value.Priority, p.Value.Name)).Min().Item2;
var (_, type, originalMatch) = matchSource.Where(p => p.Key.Index == start && p.Key.Length == length)
.Select(p => (p.Value.Priority, p.Value.Name, p.Key)).Min();

// Extract negative numbers
if (NegativeNumberTermsRegex != null)
Expand All @@ -111,7 +111,7 @@ public virtual List<ExtractResult> Extract(string source)
if (ExtractType.Contains(Constants.MODEL_ORDINAL))
{
er.Metadata = new Metadata();
if (IsRelativeOrdinal(substr))
if (originalMatch.Groups[Constants.RelativeOrdinalGroupName].Success)
{
er.Metadata.IsOrdinalRelative = true;
}
Expand Down Expand Up @@ -144,16 +144,6 @@ protected static Regex GenerateLongFormatNumberRegexes(LongFormatType type, stri
return new Regex(regexDefinition, RegexOptions.Singleline);
}

private bool IsRelativeOrdinal(string matchValue)
{
if (RelativeReferenceRegex == null)
{
return false;
}

return RelativeReferenceRegex.Match(matchValue).Success;
}

private List<ExtractResult> FilterAmbiguity(List<ExtractResult> ers, string text)
{
if (AmbiguityFiltersDict != null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public static String NumbersWithPlaceHolder(String placeholder) {

public static final String NumberOrdinalRegex = "(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)";

public static final String RelativeOrdinalRegex = "((next|previous) one|(the second|next) to last|the one before the last( one)?|the last but one|(ante)?penultimate|last|next|previous)";
public static final String RelativeOrdinalRegex = "(?<relativeOrdinal>(next|previous)\\s+one|(the\\s+second|next)\\s+to\\s+last|the\\s+one\\s+before\\s+the\\s+last(\\s+one)?|the\\s+last\\s+but\\s+one|(ante)?penultimate|last|next|previous)";

public static final String BasicOrdinalRegex = "({NumberOrdinalRegex}|{RelativeOrdinalRegex})"
.replace("{NumberOrdinalRegex}", NumberOrdinalRegex)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ export namespace EnglishNumeric {
export const AllIntRegexWithDozenSuffixLocks = `(?<=\\b)(((half\\s+)?a\\s+dozen)|(${AllIntRegex}\\s+dozen(s)?))(?=\\b)`;
export const RoundNumberOrdinalRegex = `(hundredth|thousandth|millionth|billionth|trillionth)`;
export const NumberOrdinalRegex = `(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)`;
export const RelativeOrdinalRegex = `((next|previous) one|(the second|next) to last|the one before the last( one)?|the last but one|(ante)?penultimate|last|next|previous)`;
export const RelativeOrdinalRegex = `(?<relativeOrdinal>(next|previous)\\s+one|(the\\s+second|next)\\s+to\\s+last|the\\s+one\\s+before\\s+the\\s+last(\\s+one)?|the\\s+last\\s+but\\s+one|(ante)?penultimate|last|next|previous)`;
export const BasicOrdinalRegex = `(${NumberOrdinalRegex}|${RelativeOrdinalRegex})`;
export const SuffixBasicOrdinalRegex = `(((((${TensNumberIntegerRegex}(\\s+(and\\s+)?|\\s*-\\s*)${ZeroToNineIntegerRegex})|${TensNumberIntegerRegex}|${ZeroToNineIntegerRegex}|${AnIntRegex})(\\s+${RoundNumberIntegerRegex})+)\\s+(and\\s+)?)*(${TensNumberIntegerRegex}(\\s+|\\s*-\\s*))?${BasicOrdinalRegex})`;
export const SuffixRoundNumberOrdinalRegex = `((${AllIntRegex}\\s+)${RoundNumberOrdinalRegex})`;
Expand Down
2 changes: 1 addition & 1 deletion Patterns/English/English-Numbers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ RoundNumberOrdinalRegex: !simpleRegex
NumberOrdinalRegex: !simpleRegex
def: (first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)
RelativeOrdinalRegex: !simpleRegex
def: ((next|previous) one|(the second|next) to last|the one before the last( one)?|the last but one|(ante)?penultimate|last|next|previous)
def: (?<relativeOrdinal>(next|previous)\s+one|(the\s+second|next)\s+to\s+last|the\s+one\s+before\s+the\s+last(\s+one)?|the\s+last\s+but\s+one|(ante)?penultimate|last|next|previous)
BasicOrdinalRegex: !nestedRegex
def: ({NumberOrdinalRegex}|{RelativeOrdinalRegex})
references: [ NumberOrdinalRegex, RelativeOrdinalRegex ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class EnglishNumeric:
AllIntRegexWithDozenSuffixLocks = f'(?<=\\b)(((half\\s+)?a\\s+dozen)|({AllIntRegex}\\s+dozen(s)?))(?=\\b)'
RoundNumberOrdinalRegex = f'(hundredth|thousandth|millionth|billionth|trillionth)'
NumberOrdinalRegex = f'(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)'
RelativeOrdinalRegex = f'((next|previous) one|(the second|next) to last|the one before the last( one)?|the last but one|(ante)?penultimate|last|next|previous)'
RelativeOrdinalRegex = f'(?<relativeOrdinal>(next|previous)\\s+one|(the\\s+second|next)\\s+to\\s+last|the\\s+one\\s+before\\s+the\\s+last(\\s+one)?|the\\s+last\\s+but\\s+one|(ante)?penultimate|last|next|previous)'
BasicOrdinalRegex = f'({NumberOrdinalRegex}|{RelativeOrdinalRegex})'
SuffixBasicOrdinalRegex = f'((((({TensNumberIntegerRegex}(\\s+(and\\s+)?|\\s*-\\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{AnIntRegex})(\\s+{RoundNumberIntegerRegex})+)\\s+(and\\s+)?)*({TensNumberIntegerRegex}(\\s+|\\s*-\\s*))?{BasicOrdinalRegex})'
SuffixRoundNumberOrdinalRegex = f'(({AllIntRegex}\\s+){RoundNumberOrdinalRegex})'
Expand Down

0 comments on commit 1b81155

Please sign in to comment.