From d78923608053f42bf4fb345c1172a7b6fd719cb4 Mon Sep 17 00:00:00 2001 From: Wenhao Song <32570730+songwenhao1@users.noreply.github.com> Date: Wed, 19 Jun 2019 17:58:57 +0800 Subject: [PATCH] Fix regression bug in ordinal.relative (#1648) --- .../English/NumbersDefinitions.cs | 12 +++- .../Extractors/BaseNumberExtractor.cs | 2 +- .../Parsers/BaseNumberParser.cs | 11 +-- Patterns/English/English-Numbers.yaml | 12 +++- Specs/Number/English/OrdinalModel.json | 68 +++++++++++++++++++ .../OrdinalModelSuppressExtendedTypes.json | 20 ++++++ 6 files changed, 114 insertions(+), 11 deletions(-) diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersDefinitions.cs index cfbc31fbe7..2607d4c28d 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/NumbersDefinitions.cs @@ -44,19 +44,19 @@ public static class NumbersDefinitions public static readonly string AllIntRegexWithDozenSuffixLocks = $@"(?<=\b)(((half\s+)?a\s+dozen)|({AllIntRegex}\s+dozen(s)?))(?=\b)"; public const string RoundNumberOrdinalRegex = @"(hundredth|thousandth|millionth|billionth|trillionth)"; public const string NumberOrdinalRegex = @"(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)"; - public const string RelativeOrdinalRegex = @"(?(next|previous)\s+one|(the\s+second|next)\s+to\s+last|the\s+one\s+before\s+the\s+last(\s+one)?|the\s+last\s+but\s+one|(ante)?penultimate|last|next|previous)"; + public const string RelativeOrdinalRegex = @"(?(next|previous|current)\s+one|(the\s+second|next)\s+to\s+last|the\s+one\s+before\s+the\s+last(\s+one)?|the\s+last\s+but\s+one|(ante)?penultimate|last|next|previous|current)"; public static readonly string BasicOrdinalRegex = $@"({NumberOrdinalRegex}|{RelativeOrdinalRegex})"; public static readonly string SuffixBasicOrdinalRegex = $@"((((({TensNumberIntegerRegex}(\s+(and\s+)?|\s*-\s*){ZeroToNineIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex}|{AnIntRegex})(\s+{RoundNumberIntegerRegex})+)\s+(and\s+)?)*({TensNumberIntegerRegex}(\s+|\s*-\s*))?{BasicOrdinalRegex})"; public static readonly string SuffixRoundNumberOrdinalRegex = $@"(({AllIntRegex}\s+){RoundNumberOrdinalRegex})"; public static readonly string AllOrdinalRegex = $@"({SuffixBasicOrdinalRegex}|{SuffixRoundNumberOrdinalRegex})"; public const string OrdinalSuffixRegex = @"(?<=\b)((\d*(1st|2nd|3rd|4th|5th|6th|7th|8th|9th|0th))|(11th|12th))(?=\b)"; public const string OrdinalNumericRegex = @"(?<=\b)(\d{1,3}(\s*,\s*\d{3})*\s*th)(?=\b)"; - public static readonly string OrdinalRoundNumberRegex = $@"(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?![\.,]))(?=\b)"; public static readonly string FractionPrepositionWithinPercentModeRegex = $@"(?<=\b)(?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?![\.,]))(?=\b)"; public static readonly string AllPointRegex = $@"((\s+{ZeroToNineIntegerRegex})+|(\s+{SeparaIntRegex}))"; @@ -254,9 +254,12 @@ public static class NumbersDefinitions { { @"last", @"0" }, { @"next one", @"1" }, + { @"current", @"0" }, + { @"current one", @"0" }, { @"previous one", @"-1" }, { @"the second to last", @"-1" }, { @"the one before the last one", @"-1" }, + { @"the one before the last", @"-1" }, { @"next to last", @"-1" }, { @"penultimate", @"-1" }, { @"the last but one", @"-1" }, @@ -269,8 +272,11 @@ public static class NumbersDefinitions { @"last", @"end" }, { @"next one", @"current" }, { @"previous one", @"current" }, + { @"current", @"current" }, + { @"current one", @"current" }, { @"the second to last", @"end" }, { @"the one before the last one", @"end" }, + { @"the one before the last", @"end" }, { @"next to last", @"end" }, { @"penultimate", @"end" }, { @"the last but one", @"end" }, diff --git a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs index 98e4e42377..2f45f0c7a6 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Extractors/BaseNumberExtractor.cs @@ -111,7 +111,7 @@ public virtual List Extract(string source) if (ExtractType.Contains(Constants.MODEL_ORDINAL)) { er.Metadata = new Metadata(); - if (originalMatch.Groups[Constants.RelativeOrdinalGroupName].Success) + if ((Options & NumberOptions.SuppressExtendedTypes) == 0 && originalMatch.Groups[Constants.RelativeOrdinalGroupName].Success) { er.Metadata.IsOrdinalRelative = true; } diff --git a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberParser.cs b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberParser.cs index d7388e1d84..53309c4ba1 100644 --- a/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberParser.cs +++ b/.NET/Microsoft.Recognizers.Text.Number/Parsers/BaseNumberParser.cs @@ -161,7 +161,7 @@ public virtual ParseResult Parse(ExtractResult extResult) // Add "offset" and "relativeTo" for ordinal if (!string.IsNullOrEmpty(ret.Type) && ret.Type.Contains(Constants.MODEL_ORDINAL)) { - if (Config.RelativeReferenceOffsetMap.ContainsKey(extResult.Text) && + if ((this.Config.Options & NumberOptions.SuppressExtendedTypes) == 0 && Config.RelativeReferenceOffsetMap.ContainsKey(extResult.Text) && Config.RelativeReferenceRelativeToMap.ContainsKey(extResult.Text)) { ret.Metadata.Offset = Config.RelativeReferenceOffsetMap[extResult.Text]; @@ -302,10 +302,13 @@ protected ParseResult TextNumberParse(ExtractResult extResult) handle = Config.HalfADozenRegex.Replace(handle, Config.HalfADozenText); // Handling cases like "last", "next one", "previous one" - if (Config.RelativeReferenceOffsetMap.ContainsKey(extResult.Text) && - Config.RelativeReferenceRelativeToMap.ContainsKey(extResult.Text)) + if ((this.Config.Options & NumberOptions.SuppressExtendedTypes) == 0) { - return result; + if (Config.RelativeReferenceOffsetMap.ContainsKey(extResult.Text) && + Config.RelativeReferenceRelativeToMap.ContainsKey(extResult.Text)) + { + return result; + } } var numGroup = handle.Split(Config.WrittenDecimalSeparatorTexts.ToArray(), StringSplitOptions.RemoveEmptyEntries); diff --git a/Patterns/English/English-Numbers.yaml b/Patterns/English/English-Numbers.yaml index 892df7ca1e..0c16ba8c36 100644 --- a/Patterns/English/English-Numbers.yaml +++ b/Patterns/English/English-Numbers.yaml @@ -55,7 +55,7 @@ RoundNumberOrdinalRegex: !simpleRegex NumberOrdinalRegex: !simpleRegex def: (first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth) RelativeOrdinalRegex: !simpleRegex - def: (?(next|previous)\s+one|(the\s+second|next)\s+to\s+last|the\s+one\s+before\s+the\s+last(\s+one)?|the\s+last\s+but\s+one|(ante)?penultimate|last|next|previous) + def: (?(next|previous|current)\s+one|(the\s+second|next)\s+to\s+last|the\s+one\s+before\s+the\s+last(\s+one)?|the\s+last\s+but\s+one|(ante)?penultimate|last|next|previous|current) BasicOrdinalRegex: !nestedRegex def: ({NumberOrdinalRegex}|{RelativeOrdinalRegex}) references: [ NumberOrdinalRegex, RelativeOrdinalRegex ] @@ -73,7 +73,7 @@ OrdinalSuffixRegex: !simpleRegex OrdinalNumericRegex: !simpleRegex def: (?<=\b)(\d{1,3}(\s*,\s*\d{3})*\s*th)(?=\b) OrdinalRoundNumberRegex: !nestedRegex - def: (?({AllIntRegex})|((?({AllIntRegex})|(\d+)(?![\.,]))(?=\b) @@ -365,9 +365,12 @@ RelativeReferenceOffsetMap: !dictionary entries: last: 0 next one: 1 + current: 0 + current one: 0 previous one: -1 the second to last: -1 the one before the last one: -1 + the one before the last: -1 next to last: -1 penultimate: -1 the last but one: -1 @@ -380,8 +383,11 @@ RelativeReferenceRelativeToMap: !dictionary last: end next one: current previous one: current + current: current + current one: current the second to last: end the one before the last one: end + the one before the last: end next to last: end penultimate: end the last but one: end diff --git a/Specs/Number/English/OrdinalModel.json b/Specs/Number/English/OrdinalModel.json index 7e595c0407..4a4eefdd63 100644 --- a/Specs/Number/English/OrdinalModel.json +++ b/Specs/Number/English/OrdinalModel.json @@ -678,5 +678,73 @@ } } ] + }, + { + "Input": "The one before the last one is the right one", + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "the one before the last one", + "Start": 0, + "End": 26, + "TypeName": "ordinal.relative", + "Resolution": { + "offset": "-1", + "relativeTo": "end", + "value": "end-1" + } + } + ] + }, + { + "Input": "I meant the one before the last", + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "the one before the last", + "Start": 8, + "End": 30, + "TypeName": "ordinal.relative", + "Resolution": { + "offset": "-1", + "relativeTo": "end", + "value": "end-1" + } + } + ] + }, + { + "Input": "I meant the current one", + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "current one", + "Start": 12, + "End": 22, + "TypeName": "ordinal.relative", + "Resolution": { + "offset": "0", + "relativeTo": "current", + "value": "current+0" + } + } + ] + }, + { + "Input": "Look at the current page", + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "current", + "Start": 12, + "End": 18, + "TypeName": "ordinal.relative", + "Resolution": { + "offset": "0", + "relativeTo": "current", + "value": "current+0" + } + } + ] } ] \ No newline at end of file diff --git a/Specs/Number/English/OrdinalModelSuppressExtendedTypes.json b/Specs/Number/English/OrdinalModelSuppressExtendedTypes.json index 32e9693dd8..5cae324b47 100644 --- a/Specs/Number/English/OrdinalModelSuppressExtendedTypes.json +++ b/Specs/Number/English/OrdinalModelSuppressExtendedTypes.json @@ -129,5 +129,25 @@ "NotSupported": "javascript, python, java", "Results": [ ] + }, + { + "Input": "The one before the last one is the right one", + "NotSupported": "javascript, python, java", + "Results": [] + }, + { + "Input": "I meant the one before the last", + "NotSupported": "javascript, python, java", + "Results": [] + }, + { + "Input": "I meant the current one", + "NotSupported": "javascript, python, java", + "Results": [] + }, + { + "Input": "Look at the current page", + "NotSupported": "javascript, python, java", + "Results": [] } ] \ No newline at end of file