Skip to content

Commit

Permalink
Maintenance update of auto-gen resources and minor changes (#2524)
Browse files Browse the repository at this point in the history
* - Removed unnecessary half-width digits in ZH and JA regexes;
- Fix merge issue in FR/ES/PT for time-of-day + time (additional cases for #2482);
- Add NumberRange patterns to be localized in French YAML;
- Re-gen resources.

* - Fixing incorrect ET timezone offset;
- Adding non-standard speed units;
- Add bitcoin and its Unicode symbol as currency unit;
- Breaking clustered units into separate entries;
- Fixing incorrect French timezone spec file name;
- Re-gen resources across platforms.

* - Adding specs;
- Adding more verbosity to assert failure in Python DateTime test code;
- Re-gen resources across platforms.

* Disabling unicode fraction case in javascript units.
  • Loading branch information
tellarin authored Mar 15, 2021
1 parent ca59ab0 commit c70ff6f
Show file tree
Hide file tree
Showing 95 changed files with 1,746 additions and 580 deletions.
10 changes: 7 additions & 3 deletions .NET/Microsoft.Recognizers.Definitions.Common/BaseCurrency.cs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,8 @@ public static class BaseCurrency
{ @"UYU", @"CENTESIMO" },
{ @"VEF", @"CENTIMO" },
{ @"YER", @"FILS" },
{ @"ZMW", @"NGWEE" }
{ @"ZMW", @"NGWEE" },
{ @"_XBT", @"MILLIBITCOIN|SATOSHI" }
};
public static readonly Dictionary<string, long> CurrencyFractionalRatios = new Dictionary<string, long>
{
Expand Down Expand Up @@ -269,7 +270,9 @@ public static class BaseCurrency
{ @"Ngwee", 100 },
{ @"Kwartje", 4 },
{ @"Dubbeltje", 10 },
{ @"Stuiver", 20 }
{ @"Stuiver", 20 },
{ @"Millibitcoin", 1000 },
{ @"Satoshi", 100000000 }
};
public static readonly Dictionary<string, long> NonStandardFractionalSubunits = new Dictionary<string, long>
{
Expand All @@ -279,7 +282,8 @@ public static class BaseCurrency
{ @"OMR", 1000 },
{ @"YDD", 1000 },
{ @"TND", 1000 },
{ @"MRO", 5 }
{ @"MRO", 5 },
{ @"_XBT", 1000 }
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ public static class NumbersDefinitions
'拾'
};
public static readonly string DigitalNumberRegex = $@"((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))";
public const string ZeroToNineFullHalfRegex = @"[\d1234567890]";
public const string ZeroToNineFullHalfRegex = @"[\d]";
public static readonly string DigitNumRegex = $@"{ZeroToNineFullHalfRegex}+";
public const string DozenRegex = @".*打$";
public const string PercentageRegex = @"(?<=(((?<![十百千拾佰仟])[十百千拾佰仟])|([万亿兆萬億]))\s*分\s*之).+|.+(?=个\s*(((?<![十百千拾佰仟])[十百千拾佰仟])|([万亿兆萬億]))\s*分\s*点)|.*(?=[%%])";
Expand Down Expand Up @@ -201,8 +201,8 @@ public static class NumbersDefinitions
public static readonly string DoubleWithMultiplierRegex = $@"({NegativeNumberTermsRegexNum}\s*)?{ZeroToNineFullHalfRegex}+[\..]{ZeroToNineFullHalfRegex}+\s*{BaseNumbers.NumberMultiplierRegex}";
public static readonly string DoubleWithThousandsRegex = $@"{NegativeNumberTermsRegex}?(({ZeroToNineFullHalfRegex}+)|({ZeroToNineFullHalfRegex}{{1,3}}(,{ZeroToNineFullHalfRegex}{{3}})+))([\..]{ZeroToNineFullHalfRegex}+)?\s*[多几幾余]?[万亿萬億]{{1,2}}";
public static readonly string DoubleAllFloatRegex = $@"(?<![百佰]\s*分\s*之\s*(({AllIntRegex}[点點]*)|{AllFloatRegex})*){AllFloatRegex}(?!{ZeroToNineIntegerRegex}*\s*[个個]\s*[百佰]\s*分\s*[点點])";
public static readonly string DoubleExponentialNotationRegex = $@"(?<!{ZeroToNineFullHalfRegex}+[\..])({NegativeNumberTermsRegexNum}\s*)?{ZeroToNineFullHalfRegex}+([\..]{ZeroToNineFullHalfRegex}+)?e(([--++]*[1-9123456789]{ZeroToNineFullHalfRegex}*)|[00](?!{ZeroToNineFullHalfRegex}+))";
public static readonly string DoubleScientificNotationRegex = $@"(?<!{ZeroToNineFullHalfRegex}+[\..])({NegativeNumberTermsRegexNum}\s*)?({ZeroToNineFullHalfRegex}+([\..]{ZeroToNineFullHalfRegex}+)?)\^([--++]*[1-9123456789]{ZeroToNineFullHalfRegex}*)";
public static readonly string DoubleExponentialNotationRegex = $@"(?<!{ZeroToNineFullHalfRegex}+[\..])({NegativeNumberTermsRegexNum}\s*)?{ZeroToNineFullHalfRegex}+([\..]{ZeroToNineFullHalfRegex}+)?e(([--++]*[1-9]{ZeroToNineFullHalfRegex}*)|0(?!{ZeroToNineFullHalfRegex}+))";
public static readonly string DoubleScientificNotationRegex = $@"(?<!{ZeroToNineFullHalfRegex}+[\..])({NegativeNumberTermsRegexNum}\s*)?({ZeroToNineFullHalfRegex}+([\..]{ZeroToNineFullHalfRegex}+)?)\^([--++]*[1-9]{ZeroToNineFullHalfRegex}*)";
public static readonly string OrdinalRegex = $@"{AllIntRegex}";
public static readonly string OrdinalNumbersRegex = $@"{ZeroToNineFullHalfRegex}+";
public static readonly string AllFractionNumber = $@"{NegativeNumberTermsRegex}?(({ZeroToNineFullHalfRegex}+|{AllIntRegex})\s*又\s*)?{NegativeNumberTermsRegex}?({ZeroToNineFullHalfRegex}+|{AllIntRegex})\s*分\s*之\s*{NegativeNumberTermsRegex}?({ZeroToNineFullHalfRegex}+|{AllIntRegex})({PointRegexStr}{AllIntRegex}*)?";
Expand All @@ -227,7 +227,7 @@ public static class NumbersDefinitions
public static readonly string FoldsPercentageRegex = $@"{ZeroToNineIntegerRegex}(\s*[点點]?\s*{ZeroToNineIntegerRegex})?\s*折";
public static readonly string SimpleFoldsPercentageRegex = $@"{ZeroToNineFullHalfRegex}\s*成(\s*(半|{ZeroToNineFullHalfRegex}))?";
public static readonly string SpecialsPercentageRegex = $@"({ZeroToNineIntegerRegex}|[十拾])\s*成(\s*(半|{ZeroToNineIntegerRegex}))?";
public static readonly string NumbersSpecialsPercentageRegex = $@"({ZeroToNineFullHalfRegex}[\..]{ZeroToNineFullHalfRegex}|[11][00])\s*成";
public static readonly string NumbersSpecialsPercentageRegex = $@"({ZeroToNineFullHalfRegex}[\..]{ZeroToNineFullHalfRegex}|10)\s*成";
public static readonly string SimpleSpecialsPercentageRegex = $@"{ZeroToNineIntegerRegex}\s*[点點]\s*{ZeroToNineIntegerRegex}\s*成";
public const string SpecialsFoldsPercentageRegex = @"半\s*成|(?<=打)[对對]\s*折|半\s*折";
public const string SpeicalCharBeforeNumber = @"(有|是|为)";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,8 @@ public static class NumbersWithUnitDefinitions
{ @"Fen", @"分钱|分" },
{ @"Jiao", @"毛钱|毛|角钱|角" },
{ @"Finnish markka", @"芬兰马克" },
{ @"Penni", @"盆尼" }
{ @"Penni", @"盆尼" },
{ @"Bitcoin", @"₿|btc|xbt|个比特币|比特币" }
};
public static readonly Dictionary<string, string> CurrencyNameToIsoCodeMap = new Dictionary<string, string>
{
Expand Down Expand Up @@ -536,7 +537,8 @@ public static class NumbersWithUnitDefinitions
{ @"Turkish lira", @"₺" },
{ @"Euro", @"€" },
{ @"Pound", @"£" },
{ @"Costa Rican colón", @"₡" }
{ @"Costa Rican colón", @"₡" },
{ @"Bitcoin", @"₿|btc|xbt" }
};
public static readonly IList<string> CurrencyAmbiguousValues = new List<string>
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,10 @@ public static class NumbersWithUnitDefinitions
{ @"Fen", @"fen" },
{ @"Jiao", @"jiao|mao" },
{ @"Finnish markka", @"finse mark|finse markka|suomen markka|finnish markka|finsk mark|fim|markkaa|markka" },
{ @"Penni", @"penniä|penni" }
{ @"Penni", @"penniä|penni" },
{ @"Bitcoin", @"bitcoin|bitcoins|btc|xbt|₿" },
{ @"Millibitcoin", @"millibitcoin|millibitcoins|milibitcoin|milibitcoins" },
{ @"Satoshi", @"satoshi|satoshis" }
};
public static readonly Dictionary<string, string> CurrencyNameToIsoCodeMap = new Dictionary<string, string>
{
Expand Down Expand Up @@ -609,7 +612,8 @@ public static class NumbersWithUnitDefinitions
{ @"British Virgin Islands dollar", @"_BD" },
{ @"Ascension pound", @"_AP" },
{ @"Alderney pound", @"_ALP" },
{ @"Abkhazian apsar", @"_AA" }
{ @"Abkhazian apsar", @"_AA" },
{ @"Bitcoin", @"_XBT" }
};
public static readonly Dictionary<string, string> FractionalUnitNameToCodeMap = new Dictionary<string, string>
{
Expand Down Expand Up @@ -688,7 +692,9 @@ public static class NumbersWithUnitDefinitions
{ @"Ngwee", @"NGWEE" },
{ @"Kwartje", @"KWARTJE" },
{ @"Dubbeltje", @"DUBBELTJE" },
{ @"Stuiver", @"STUIVER" }
{ @"Stuiver", @"STUIVER" },
{ @"Millibitcoin", @"MILLIBITCOIN" },
{ @"Satoshi", @"SATOSHI" }
};
public const string CompoundUnitConnectorRegex = @"(?<spacer>en)";
public static readonly Dictionary<string, string> CurrencyPrefixList = new Dictionary<string, string>
Expand Down Expand Up @@ -732,7 +738,8 @@ public static class NumbersWithUnitDefinitions
{ @"Euro", @"€|eur" },
{ @"Pound", @"£" },
{ @"Costa Rican colón", @"₡" },
{ @"Turkish lira", @"₺" }
{ @"Turkish lira", @"₺" },
{ @"Bitcoin", @"₿|btc|xbt" }
};
public static readonly IList<string> AmbiguousCurrencyUnitList = new List<string>
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,9 @@ public static class DateTimeDefinitions
public const string HourNumRegex = @"\b(?<hournum>zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\b";
public const string MinuteNumRegex = @"(?<minnum>ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|thirty|forty|fifty|one|two|three|five|eight)";
public const string DeltaMinuteNumRegex = @"(?<deltaminnum>ten|eleven|twelve|thirteen|fifteen|eighteen|(four|six|seven|nine)(teen)?|twenty|thirty|forty|fifty|one|two|three|five|eight)";
public const string PmRegex = @"(?<pm>(((?:at|in|around|on|for)\s+(the\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\s+(the\s+)?night))";
public const string PmRegexFull = @"(?<pm>((?:at|in|around|on|for)\s+(the\s+)?)?(afternoon|evening|(mid)?night|lunchtime))";
public const string AmRegex = @"(?<am>((?:at|in|around|on|for)\s+(the\s+)?)?(morning))";
public const string PmRegex = @"(?<pm>(((?:at|in|around|circa|on|for)\s+(the\s+)?)?(afternoon|evening|midnight|lunchtime))|((at|in|around|on|for)\s+(the\s+)?night))";
public const string PmRegexFull = @"(?<pm>((?:at|in|around|circa|on|for)\s+(the\s+)?)?(afternoon|evening|(mid)?night|lunchtime))";
public const string AmRegex = @"(?<am>((?:at|in|around|circa|on|for)\s+(the\s+)?)?(morning))";
public const string LunchRegex = @"\blunchtime\b";
public const string NightRegex = @"\b(mid)?night\b";
public const string CommonDatePrefixRegex = @"^[\.]";
Expand All @@ -156,7 +156,7 @@ public static class DateTimeDefinitions
public const string MidafternoonRegex = @"(?<midafternoon>mid\s*(-\s*)?afternoon)";
public const string MiddayRegex = @"(?<midday>mid\s*(-\s*)?day|((12\s)?noon))";
public static readonly string MidTimeRegex = $@"(?<mid>({MidnightRegex}|{MidmorningRegex}|{MidafternoonRegex}|{MiddayRegex}))";
public static readonly string AtRegex = $@"\b(?:(?:(?<=\bat\s+)(?:{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d)(\s*((?<iam>a)|(?<ipm>p)))?|{MidTimeRegex}))|{MidTimeRegex})\b";
public static readonly string AtRegex = $@"\b(?:(?:(?<=\b(at|(at)?\s*around|circa)\s+)(?:{WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex}(?!\.\d)(\s*((?<iam>a)|(?<ipm>p)))?|{MidTimeRegex}))|{MidTimeRegex})\b";
public static readonly string IshRegex = $@"\b({BaseDateTime.HourRegex}(-|——)?ish|noon(ish)?)\b";
public const string TimeUnitRegex = @"([^A-Za-z]{1,}|\b)(?<unit>h(ou)?rs?|h|min(ute)?s?|sec(ond)?s?)\b";
public const string RestrictedTimeUnitRegex = @"(?<unit>hour|minute)\b";
Expand Down Expand Up @@ -198,9 +198,9 @@ public static class DateTimeDefinitions
public const string DateTimeTimeOfDayRegex = @"\b(?<timeOfDay>morning|(?<pm>afternoon|night|evening))\b";
public static readonly string DateTimeSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{DateTimeTimeOfDayRegex})\b|\btoni(ght|te))\b";
public static readonly string TimeOfTodayAfterRegex = $@"^\s*(,\s*)?(in\s+)?{DateTimeSpecificTimeOfDayRegex}";
public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(at|around|in|on))?\s*$";
public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(at|around|circa|in|on))?\s*$";
public static readonly string SimpleTimeOfTodayAfterRegex = $@"(?<!{NonTimeContextTokens}\s*)\b({HourNumRegex}|{BaseDateTime.HourRegex})\s*(,\s*)?(in\s+)?{DateTimeSpecificTimeOfDayRegex}\b";
public static readonly string SimpleTimeOfTodayBeforeRegex = $@"\b{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(at|around))?\s*({HourNumRegex}|{BaseDateTime.HourRegex})\b";
public static readonly string SimpleTimeOfTodayBeforeRegex = $@"\b{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(at|around|circa))?\s*({HourNumRegex}|{BaseDateTime.HourRegex})\b";
public const string SpecificEndOfRegex = @"(the\s+)?end of(\s+the)?\s*$";
public const string UnspecificEndOfRegex = @"\b(the\s+)?(eod|(end\s+of\s+day))\b";
public const string UnspecificEndOfRangeRegex = @"\b(eoy)\b";
Expand Down Expand Up @@ -256,12 +256,12 @@ public static class DateTimeDefinitions
public static readonly string RelativeTimeUnitRegex = $@"(?:(?:(?:{NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+({TimeUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))";
public static readonly string RelativeDurationUnitRegex = $@"(?:(?:(?<=({NextPrefixRegex}|{PreviousPrefixRegex}|{ThisPrefixRegex})\s+)({DurationUnitRegex}))|((the|my))\s+({RestrictedTimeUnitRegex}))";
public static readonly string ReferenceDatePeriodRegex = $@"\b{ReferencePrefixRegex}\s+(?<duration>week|month|year|decade|weekend)\b";
public const string ConnectorRegex = @"^(-|,|for|t|around|@)$";
public const string ConnectorRegex = @"^(-|,|for|t|around|circa|@)$";
public const string FromToRegex = @"(\b(from).+(to|and|or)\b.+)";
public const string SingleAmbiguousMonthRegex = @"^(the\s+)?(may|march)$";
public const string SingleAmbiguousTermsRegex = @"^(the\s+)?(day|week|month|year)$";
public const string UnspecificDatePeriodRegex = @"^(week|month|year)$";
public const string PrepositionSuffixRegex = @"\b(on|in|at|around|from|to)$";
public const string PrepositionSuffixRegex = @"\b(on|in|at|around|circa|from|to)$";
public const string FlexibleDayRegex = @"(?<DayOfMonth>([A-Za-z]+\s)?[A-Za-z\d]+)";
public static readonly string ForTheRegex = $@"\b((((?<=for\s+)the\s+{FlexibleDayRegex})|((?<=on\s+)(the\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?<end>\s*(,|\.(?!\d)|!|\?|$)))";
public static readonly string WeekDayAndDayOfMonthRegex = $@"\b{WeekDayRegex}\s+(the\s+{FlexibleDayRegex})\b";
Expand Down
Loading

0 comments on commit c70ff6f

Please sign in to comment.