diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/URLDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/URLDefinitions.cs index d2e6ec979f..af47044c37 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/URLDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/URLDefinitions.cs @@ -26,4 +26,4 @@ public static class URLDefinitions public static readonly string UrlRegex = $@"{UrlPrefixRegex}(?[a-zA-Z]{{2,18}}){BaseURL.UrlSuffixRegex}"; public static readonly string IpUrlRegex = $@"(?({ExtractionRestrictionRegex}{BaseURL.ProtocolRegex}({BaseIp.Ipv4Regex}|localhost){BaseURL.UrlSuffixRegex}))"; } -} +} \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersDefinitions.cs index b7cfd19ea2..cfbc31fbe7 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersDefinitions.cs @@ -44,7 +44,7 @@ public static class NumbersDefinitions public static readonly string AllIntRegexWithDozenSuffixLocks = $@"(?<=\b)(((half\s+)?a\s+dozen)|({AllIntRegex}\s+dozen(s)?))(?=\b)"; public const string RoundNumberOrdinalRegex = @"(hundredth|thousandth|millionth|billionth|trillionth)"; public const string NumberOrdinalRegex = @"(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)"; - public const string RelativeOrdinalRegex = @"((next|previous) one|(the second|next) to last|the one before the last( one)?|the last but one|(ante)?penultimate|last|next|previous)"; + public const string RelativeOrdinalRegex = @"(?(next|previous)\s+one|(the\s+second|next)\s+to\s+last|the\s+one\s+before\s+the\s+last(\s+one)?|the\s+last\s+but\s+one|(ante)?penultimate|last|next|previous)"; public static readonly string BasicOrdinalRegex = $@"({NumberOrdinalRegex}|{RelativeOrdinalRegex})"; public static readonly string SuffixBasicOrdinalRegex = $@"((((({TensNumberIntegerRegex}(\s+(and\s+)?|\s*-\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{AnIntRegex})(\s+{RoundNumberIntegerRegex})+)\s+(and\s+)?)*({TensNumberIntegerRegex}(\s+|\s*-\s*))?{BasicOrdinalRegex})"; public static readonly string SuffixRoundNumberOrdinalRegex = $@"(({AllIntRegex}\s+){RoundNumberOrdinalRegex})"; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Constants.cs b/.NET/Microsoft.Recognizers.Text.Number/Constants.cs index f166ab9132..4f94706d12 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Constants.cs @@ -54,6 +54,9 @@ public static class Constants public const string RELATIVE_END = "end"; public const string RELATIVE_CURRENT = "current"; + // Groups' names for named groups in regexes + public const string RelativeOrdinalGroupName = "relativeOrdinal"; + // Number subtypes public const string INTEGER = "integer"; public const string DECIMAL = "decimal"; diff --git a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs index bff8b1b8c5..98e4e42377 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs @@ -55,7 +55,7 @@ public virtual List Extract(string source) } // If SuppressExtendedTypes is on, cases like "last", "next" should be skipped - if ((Options & NumberOptions.SuppressExtendedTypes) != 0 && IsRelativeOrdinal(m.Value)) + if ((Options & NumberOptions.SuppressExtendedTypes) != 0 && m.Groups[Constants.RelativeOrdinalGroupName].Success) { continue; } @@ -83,8 +83,8 @@ public virtual List Extract(string source) if (matchSource.Keys.Any(o => o.Index == start && o.Length == length)) { - var type = matchSource.Where(p => p.Key.Index == start && p.Key.Length == length) - .Select(p => (p.Value.Priority, p.Value.Name)).Min().Item2; + var (_, type, originalMatch) = matchSource.Where(p => p.Key.Index == start && p.Key.Length == length) + .Select(p => (p.Value.Priority, p.Value.Name, p.Key)).Min(); // Extract negative numbers if (NegativeNumberTermsRegex != null) @@ -111,7 +111,7 @@ public virtual List Extract(string source) if (ExtractType.Contains(Constants.MODEL_ORDINAL)) { er.Metadata = new Metadata(); - if (IsRelativeOrdinal(substr)) + if (originalMatch.Groups[Constants.RelativeOrdinalGroupName].Success) { er.Metadata.IsOrdinalRelative = true; } @@ -144,16 +144,6 @@ protected static Regex GenerateLongFormatNumberRegexes(LongFormatType type, stri return new Regex(regexDefinition, RegexOptions.Singleline); } - private bool IsRelativeOrdinal(string matchValue) - { - if (RelativeReferenceRegex == null) - { - return false; - } - - return RelativeReferenceRegex.Match(matchValue).Success; - } - private List FilterAmbiguity(List ers, string text) { if (AmbiguityFiltersDict != null) diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java index ba9ce28cae..1dc47beba0 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java @@ -84,7 +84,7 @@ public static String NumbersWithPlaceHolder(String placeholder) { public static final String NumberOrdinalRegex = "(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)"; - public static final String RelativeOrdinalRegex = "((next|previous) one|(the second|next) to last|the one before the last( one)?|the last but one|(ante)?penultimate|last|next|previous)"; + public static final String RelativeOrdinalRegex = "(?(next|previous)\\s+one|(the\\s+second|next)\\s+to\\s+last|the\\s+one\\s+before\\s+the\\s+last(\\s+one)?|the\\s+last\\s+but\\s+one|(ante)?penultimate|last|next|previous)"; public static final String BasicOrdinalRegex = "({NumberOrdinalRegex}|{RelativeOrdinalRegex})" .replace("{NumberOrdinalRegex}", NumberOrdinalRegex) diff --git a/JavaScript/packages/recognizers-number/src/resources/englishNumeric.ts b/JavaScript/packages/recognizers-number/src/resources/englishNumeric.ts index 3b96c4594c..21cc26e643 100644 --- a/JavaScript/packages/recognizers-number/src/resources/englishNumeric.ts +++ b/JavaScript/packages/recognizers-number/src/resources/englishNumeric.ts @@ -34,7 +34,7 @@ export namespace EnglishNumeric { export const AllIntRegexWithDozenSuffixLocks = `(?<=\\b)(((half\\s+)?a\\s+dozen)|(${AllIntRegex}\\s+dozen(s)?))(?=\\b)`; export const RoundNumberOrdinalRegex = `(hundredth|thousandth|millionth|billionth|trillionth)`; export const NumberOrdinalRegex = `(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)`; - export const RelativeOrdinalRegex = `((next|previous) one|(the second|next) to last|the one before the last( one)?|the last but one|(ante)?penultimate|last|next|previous)`; + export const RelativeOrdinalRegex = `(?(next|previous)\\s+one|(the\\s+second|next)\\s+to\\s+last|the\\s+one\\s+before\\s+the\\s+last(\\s+one)?|the\\s+last\\s+but\\s+one|(ante)?penultimate|last|next|previous)`; export const BasicOrdinalRegex = `(${NumberOrdinalRegex}|${RelativeOrdinalRegex})`; export const SuffixBasicOrdinalRegex = `(((((${TensNumberIntegerRegex}(\\s+(and\\s+)?|\\s*-\\s*)${ZeroToNineIntegerRegex})|${TensNumberIntegerRegex}|${ZeroToNineIntegerRegex}|${AnIntRegex})(\\s+${RoundNumberIntegerRegex})+)\\s+(and\\s+)?)*(${TensNumberIntegerRegex}(\\s+|\\s*-\\s*))?${BasicOrdinalRegex})`; export const SuffixRoundNumberOrdinalRegex = `((${AllIntRegex}\\s+)${RoundNumberOrdinalRegex})`; diff --git a/Patterns/English/English-Numbers.yaml b/Patterns/English/English-Numbers.yaml index cdfe1eeb1b..892df7ca1e 100644 --- a/Patterns/English/English-Numbers.yaml +++ b/Patterns/English/English-Numbers.yaml @@ -55,7 +55,7 @@ RoundNumberOrdinalRegex: !simpleRegex NumberOrdinalRegex: !simpleRegex def: (first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth) RelativeOrdinalRegex: !simpleRegex - def: ((next|previous) one|(the second|next) to last|the one before the last( one)?|the last but one|(ante)?penultimate|last|next|previous) + def: (?(next|previous)\s+one|(the\s+second|next)\s+to\s+last|the\s+one\s+before\s+the\s+last(\s+one)?|the\s+last\s+but\s+one|(ante)?penultimate|last|next|previous) BasicOrdinalRegex: !nestedRegex def: ({NumberOrdinalRegex}|{RelativeOrdinalRegex}) references: [ NumberOrdinalRegex, RelativeOrdinalRegex ] diff --git a/Python/libraries/recognizers-number/recognizers_number/resources/english_numeric.py b/Python/libraries/recognizers-number/recognizers_number/resources/english_numeric.py index 19c11e3684..ca98f18d22 100644 --- a/Python/libraries/recognizers-number/recognizers_number/resources/english_numeric.py +++ b/Python/libraries/recognizers-number/recognizers_number/resources/english_numeric.py @@ -35,7 +35,7 @@ class EnglishNumeric: AllIntRegexWithDozenSuffixLocks = f'(?<=\\b)(((half\\s+)?a\\s+dozen)|({AllIntRegex}\\s+dozen(s)?))(?=\\b)' RoundNumberOrdinalRegex = f'(hundredth|thousandth|millionth|billionth|trillionth)' NumberOrdinalRegex = f'(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)' - RelativeOrdinalRegex = f'((next|previous) one|(the second|next) to last|the one before the last( one)?|the last but one|(ante)?penultimate|last|next|previous)' + RelativeOrdinalRegex = f'(?(next|previous)\\s+one|(the\\s+second|next)\\s+to\\s+last|the\\s+one\\s+before\\s+the\\s+last(\\s+one)?|the\\s+last\\s+but\\s+one|(ante)?penultimate|last|next|previous)' BasicOrdinalRegex = f'({NumberOrdinalRegex}|{RelativeOrdinalRegex})' SuffixBasicOrdinalRegex = f'((((({TensNumberIntegerRegex}(\\s+(and\\s+)?|\\s*-\\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{AnIntRegex})(\\s+{RoundNumberIntegerRegex})+)\\s+(and\\s+)?)*({TensNumberIntegerRegex}(\\s+|\\s*-\\s*))?{BasicOrdinalRegex})' SuffixRoundNumberOrdinalRegex = f'(({AllIntRegex}\\s+){RoundNumberOrdinalRegex})'