Skip to content

Commit

Permalink
Support for Italian Number in .NET (#1604)
Browse files Browse the repository at this point in the history
* Support for Italian Number in .NET

* Skipping 3 test cases that currently don't pass

* Fix review feedback

* Removed commented out code, added Italian-specific comment
  • Loading branch information
aitelintII authored and tellarin committed Jun 19, 2019
1 parent 4655cf7 commit ce8ed99
Show file tree
Hide file tree
Showing 8 changed files with 1,218 additions and 623 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ public static class URLDefinitions
public static readonly string UrlRegex = $@"{UrlPrefixRegex}(?<Tld>[a-zA-Z]{{2,18}}){BaseURL.UrlSuffixRegex}";
public static readonly string IpUrlRegex = $@"(?<IPurl>({ExtractionRestrictionRegex}{BaseURL.ProtocolRegex}({BaseIp.Ipv4Regex}|localhost){BaseURL.UrlSuffixRegex}))";
}
}
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

Expand All @@ -10,6 +11,12 @@ namespace Microsoft.Recognizers.Text.Number.Italian
{
public class ItalianNumberParserConfiguration : BaseNumberParserConfiguration
{
public ItalianNumberParserConfiguration(NumberOptions options)
: this()
{
this.Options = options;
}

public ItalianNumberParserConfiguration()
: this(new CultureInfo(Culture.Italian))
{
Expand All @@ -34,20 +41,56 @@ public ItalianNumberParserConfiguration(CultureInfo ci)
this.WrittenFractionSeparatorTexts = NumbersDefinitions.WrittenFractionSeparatorTexts;

this.CardinalNumberMap = NumbersDefinitions.CardinalNumberMap.ToImmutableDictionary();
this.OrdinalNumberMap = NumberMapGenerator.InitOrdinalNumberMap(NumbersDefinitions.OrdinalNumberMap, NumbersDefinitions.PrefixCardinalMap, NumbersDefinitions.SuffixOrdinalMap);
RelativeReferenceOffsetMap = NumbersDefinitions.RelativeReferenceOffsetMap.ToImmutableDictionary();
RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary();
this.OrdinalNumberMap = NumbersDefinitions.OrdinalNumberMap.ToImmutableDictionary();
this.RelativeReferenceOffsetMap = NumbersDefinitions.RelativeReferenceOffsetMap.ToImmutableDictionary();
this.RelativeReferenceRelativeToMap = NumbersDefinitions.RelativeReferenceRelativeToMap.ToImmutableDictionary();
this.RoundNumberMap = NumbersDefinitions.RoundNumberMap.ToImmutableDictionary();
this.HalfADozenRegex = new Regex(NumbersDefinitions.HalfADozenRegex, RegexOptions.Singleline);
this.DigitalNumberRegex = new Regex(NumbersDefinitions.DigitalNumberRegex, RegexOptions.Singleline);
this.NegativeNumberSignRegex = new Regex(NumbersDefinitions.NegativeNumberSignRegex, RegexOptions.Singleline);
this.FractionPrepositionRegex = new Regex(NumbersDefinitions.FractionPrepositionRegex, RegexOptions.Singleline);
this.OneToNineOrdinalRegex = new Regex(NumbersDefinitions.OneToNineOrdinalRegex, RegexOptions.Singleline);
}

public string NonDecimalSeparatorText { get; private set; }

public Regex OneToNineOrdinalRegex { get; }

public override IEnumerable<string> NormalizeTokenSet(IEnumerable<string> tokens, ParseResult context)
{
return tokens;
var fracWords = new List<string>();
var tokenList = tokens.ToList();
var tokenLen = tokenList.Count;

for (var i = 0; i < tokenLen; i++)
{
if ((i < tokenLen - 2) && tokenList[i + 1] == "-")
{
fracWords.Add(tokenList[i] + tokenList[i + 1] + tokenList[i + 2]);
i += 2;
}
else
{
fracWords.Add(tokenList[i]);
}
}

/*The following piece of code is needed in Italian to correctly compute some fraction patterns
* e.g. 'due milioni duemiladuecento quinti' (=2002200/5) which is otherwise interpreted as
* 2000000/2205 (in Italian, isolated ordinals <10 have a different form respect to when
* they are concatenated to other numbers, so the following lines try to keep them isolated
* by concatenating the two previous numbers) */
var fracLen = fracWords.Count;
if (fracLen > 2 && this.OneToNineOrdinalRegex.Match(fracWords[fracLen - 1]).Success)
{
if (fracWords[fracLen - 3] != "e" && fracWords[fracLen - 2] != "e")
{
fracWords[fracLen - 3] += fracWords[fracLen - 2];
fracWords.RemoveAt(fracLen - 2);
}
}

return fracWords;
}

public override long ResolveCompositeNumber(string numberStr)
Expand All @@ -63,6 +106,8 @@ public override long ResolveCompositeNumber(string numberStr)
}

long value = 0;
long prevValue = 0;

long finalValue = 0;
var strBuilder = new StringBuilder();
int lastGoodChar = 0;
Expand All @@ -78,7 +123,22 @@ public override long ResolveCompositeNumber(string numberStr)

if ((i + 1) == numberStr.Length)
{
if (prevValue > 0 && value > prevValue)
{
value = (prevValue * value) - prevValue;
}

if (prevValue < 1000)
{
prevValue = value + prevValue;
}
else
{
prevValue = value;
}

finalValue += value;

strBuilder.Clear();
i = lastGoodChar++;
value = 0;
Expand Down
3 changes: 1 addition & 2 deletions .NET/Microsoft.Recognizers.Text.Number/NumberRecognizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using Microsoft.Recognizers.Text.Number.English;
using Microsoft.Recognizers.Text.Number.French;
using Microsoft.Recognizers.Text.Number.German;
using Microsoft.Recognizers.Text.Number.Italian;
using Microsoft.Recognizers.Text.Number.Japanese;
using Microsoft.Recognizers.Text.Number.Korean;
using Microsoft.Recognizers.Text.Number.Portuguese;
Expand Down Expand Up @@ -204,7 +205,6 @@ protected override void InitializeConfiguration()
AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new GermanNumberParserConfiguration()),
new German.PercentageExtractor()));

/*
RegisterModel<NumberModel>(
Culture.Italian,
(options) => new NumberModel(
Expand All @@ -222,7 +222,6 @@ protected override void InitializeConfiguration()
(options) => new PercentModel(
AgnosticNumberParserFactory.GetParser(AgnosticNumberParserType.Percentage, new ItalianNumberParserConfiguration()),
new Italian.PercentageExtractor()));
*/

RegisterModel<NumberModel>(
Culture.Japanese,
Expand Down
Loading

0 comments on commit ce8ed99

Please sign in to comment.