diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java index 72b07f559b..84a3108642 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/EnglishDateTime.java @@ -884,7 +884,7 @@ public class EnglishDateTime { public static final String FlexibleDayRegex = "(?([A-Za-z]+\\s)?[A-Za-z\\d]+)"; - public static final String ForTheRegex = "\\b((((?<=for\\s+)the\\s+{FlexibleDayRegex})|((?<=on\\s+)(the\\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\\s*(,|\\.(?!\\d)|!|\\?|$)))" + public static final String ForTheRegex = "\\b((((?<=\\bfor\\s+)the\\s+{FlexibleDayRegex})|((?<=\\bon\\s+)(the\\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\\s*(,|\\.(?!\\d)|!|\\?|$)))" .replace("{FlexibleDayRegex}", FlexibleDayRegex); public static final String WeekDayAndDayOfMonthRegex = "\\b{WeekDayRegex}\\s+(the\\s+{FlexibleDayRegex})\\b" diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java index c74074f047..23c0420cda 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/FrenchDateTime.java @@ -23,9 +23,9 @@ public class FrenchDateTime { public static final Boolean CheckBothBeforeAfter = false; - public static final String TillRegex = "(?au|et|(jusqu')?[aà]|avant|--|-|—|——)"; + public static final String TillRegex = "(?\\b(au|et|(jusqu')?a|avant)\\b|(jusqu')?à|--|-|—|——)"; - public static final String RangeConnectorRegex = "(?de la|au|[aà]|et(\\s*la)?|--|-|—|——)"; + public static final String RangeConnectorRegex = "(?\\b(de\\s+la|au|(jusqu')?a|et(\\s*la)?)\\b|(jusqu')?à|--|-|—|——)"; public static final String RelativeRegex = "(?prochaine?|de|du|ce(tte)?|l[ae]|derni[eè]re|hier|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))"; @@ -147,7 +147,7 @@ public class FrenchDateTime { .replace("{PastSuffixRegex}", PastSuffixRegex) .replace("{NextSuffixRegex}", NextSuffixRegex); - public static final String OneWordPeriodRegex = "\\b(({RelativeRegex}\\s+)?{WrittenMonthRegex}|(la\\s+)?(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)\\s+{StrictRelativeRegex}|{RelativeRegex}\\s+(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)|weekend|week-end|(mois|l'année))\\b" + public static final String OneWordPeriodRegex = "\\b(({RelativeRegex}\\s+)?{WrittenMonthRegex}|(la\\s+)?(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)\\s+{StrictRelativeRegex}|{RelativeRegex}\\s+(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)|weekend|week-end|mois|l'année|an)\\b" .replace("{WrittenMonthRegex}", WrittenMonthRegex) .replace("{RelativeRegex}", RelativeRegex) .replace("{StrictRelativeRegex}", StrictRelativeRegex); @@ -703,7 +703,7 @@ public class FrenchDateTime { public static final String SingleAmbiguousMonthRegex = "^(le\\s+)?(may|march)$"; - public static final String UnspecificDatePeriodRegex = "^\\b$"; + public static final String UnspecificDatePeriodRegex = "^(semaine|mois|an(n[eé]e)?)$"; public static final String PrepositionSuffixRegex = "\\b(du|de|[àa]|vers|dans)$"; diff --git a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java index f1d7172c01..402a3f4499 100644 --- a/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java +++ b/Java/libraries/recognizers-text-date-time/src/main/java/com/microsoft/recognizers/text/datetime/resources/PortugueseDateTime.java @@ -376,7 +376,7 @@ public class PortugueseDateTime { .replace("{MiddayRegex}", MiddayRegex) .replace("{MidEarlyMorning}", MidEarlyMorning); - public static final String AtRegex = "\\b(((?<=\\b([aà]s?)\\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\\s+horas?|\\s*h\\b)?|(?<=\\b(s(er)?[aã]o|v[aã]o\\s+ser|^[eé]h?)\\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\\s+horas?|\\s*h\\b))(\\s+{OclockRegex})?|{MidTimeRegex})\\b" + public static final String AtRegex = "\\b(((?<=\\b([aà]s?)\\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\\s+horas?|\\s*h\\b)?|(?<=\\b(s(er)?[aã]o|v[aã]o\\s+ser|^[eé]h?)\\s+|^\\s*)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\\s+horas?|\\s*h\\b))(\\s+{OclockRegex})?|{MidTimeRegex})\\b" .replace("{HourNumRegex}", HourNumRegex) .replace("{BaseDateTime.HourRegex}", BaseDateTime.HourRegex) .replace("{WrittenTimeRegex}", WrittenTimeRegex) @@ -1042,6 +1042,10 @@ public class PortugueseDateTime { .put("^(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)$", "([$%£&!?@#])(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)|(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)([$%£&@#])") .build(); + public static final ImmutableMap AmbiguityTimeFiltersDict = ImmutableMap.builder() + .put("horas?$", "\\b((por|duração\\s+de|durante)\\s+(\\S+\\s+){1,2}horas?|horas?\\s+(\\S+\\s+){0,2}dur(ação|ou|a(rá|va)?))\\b") + .build(); + public static final List EarlyMorningTermList = Arrays.asList("madrugada"); public static final List MorningTermList = Arrays.asList("manha", "manhã"); diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/BaseNumbers.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/BaseNumbers.java index cc3ec1ccf8..f054143338 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/BaseNumbers.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/BaseNumbers.java @@ -34,7 +34,9 @@ public static String DoubleRegexDefinition(String placeholder, String thousandsm .replace("{decimalmark}", decimalmark); } - public static final String PlaceHolderDefault = "\\D|\\b"; + public static final String PlaceHolderDefault = "(?=\\D)|\\b"; + + public static final String PlaceHolderMixed = "\\D|\\b"; public static final String CaseSensitiveTerms = "(?<=(\\s|\\d))(kB|K[Bb]?|M[BbM]?|G[Bb]?|B)\\b"; diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java index 916ea07654..5298224693 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/EnglishNumeric.java @@ -127,8 +127,15 @@ public static String NumbersWithPlaceHolder(String placeholder) { public static final String FractionNotationRegex = "{BaseNumbers.FractionNotationRegex}" .replace("{BaseNumbers.FractionNotationRegex}", BaseNumbers.FractionNotationRegex); - public static final String RoundMultiplierRegex = "\\b\\s*((of\\s+)?a\\s+)?(?{RoundNumberIntegerRegex})$" - .replace("{RoundNumberIntegerRegex}", RoundNumberIntegerRegex); + public static final String FractionMultiplierRegex = "(?\\s+and\\s+(a|one|{TwoToNineIntegerRegex})\\s+(half|quarter|third|fourth|fifth|sixth|seventh|eighth|nine?th|tenth)s?)" + .replace("{TwoToNineIntegerRegex}", TwoToNineIntegerRegex); + + public static final String RoundMultiplierWithFraction = "(?<=(?(?:million|mln|billion|bln|trillion|tln)s?)(?={FractionMultiplierRegex}?$)" + .replace("{RoundNumberIntegerRegex}", RoundNumberIntegerRegex) + .replace("{FractionMultiplierRegex}", FractionMultiplierRegex); + + public static final String RoundMultiplierRegex = "\\b\\s*((of\\s+)?a\\s+)?({RoundMultiplierWithFraction}|(?(?:hundred|thousand|lakh|crore)s?)$)" + .replace("{RoundMultiplierWithFraction}", RoundMultiplierWithFraction); public static final String FractionNounRegex = "(?<=\\b)({AllIntRegex}\\s+(and\\s+)?)?(({AllIntRegex})(\\s+|\\s*-\\s*)((({AllOrdinalRegex})|({RoundNumberOrdinalRegex}))s|halves|quarters)((\\s+of\\s+a)?\\s+{RoundNumberIntegerRegex})?|(half(\\s+a)?|quarter(\\s+of\\s+a)?)\\s+{RoundNumberIntegerRegex})(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) @@ -136,11 +143,12 @@ public static String NumbersWithPlaceHolder(String placeholder) { .replace("{RoundNumberOrdinalRegex}", RoundNumberOrdinalRegex) .replace("{RoundNumberIntegerRegex}", RoundNumberIntegerRegex); - public static final String FractionNounWithArticleRegex = "(?<=\\b)((({AllIntRegex}\\s+(and\\s+)?)?(an?|one)(\\s+|\\s*-\\s*)(?!\\bfirst\\b|\\bsecond\\b)(({AllOrdinalRegex})|({RoundNumberOrdinalRegex})|(half|quarter)(((\\s+of)?\\s+a)?\\s+{RoundNumberIntegerRegex})?))|(half))(?=\\b)" + public static final String FractionNounWithArticleRegex = "(?<=\\b)(((({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\\s+(and\\s+)?)?(an?|one)(\\s+|\\s*-\\s*)(?!\\bfirst\\b|\\bsecond\\b)(({AllOrdinalRegex})|({RoundNumberOrdinalRegex})|(half|quarter)(((\\s+of)?\\s+a)?\\s+{RoundNumberIntegerRegex})?))|(half))(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) .replace("{AllOrdinalRegex}", AllOrdinalRegex) .replace("{RoundNumberOrdinalRegex}", RoundNumberOrdinalRegex) - .replace("{RoundNumberIntegerRegex}", RoundNumberIntegerRegex); + .replace("{RoundNumberIntegerRegex}", RoundNumberIntegerRegex) + .replace("{RoundNumberIntegerRegexWithLocks}", RoundNumberIntegerRegexWithLocks); public static final String FractionPrepositionRegex = "(?({AllIntRegex})|((?in|out\\s+of))\\s+(?({AllIntRegex})|(\\d+)(?![\\.,]))(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) @@ -369,6 +377,13 @@ public static String DoubleWithoutIntegralRegex(String placeholder) { .put("tln", 1000000000000L) .put("lakh", 100000L) .put("crore", 10000000L) + .put("hundreds", 100L) + .put("thousands", 1000L) + .put("millions", 1000000L) + .put("billions", 1000000000L) + .put("trillions", 1000000000000L) + .put("lakhs", 100000L) + .put("crores", 10000000L) .build(); public static final ImmutableMap OrdinalNumberMap = ImmutableMap.builder() @@ -455,6 +470,13 @@ public static String DoubleWithoutIntegralRegex(String placeholder) { .put("tln", 1000000000000L) .put("lakh", 100000L) .put("crore", 10000000L) + .put("hundreds", 100L) + .put("thousands", 1000L) + .put("millions", 1000000L) + .put("billions", 1000000000L) + .put("trillions", 1000000000000L) + .put("lakhs", 100000L) + .put("crores", 10000000L) .put("hundredth", 100L) .put("thousandth", 1000L) .put("millionth", 1000000L) diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java index 02b9a1630e..7dddd4500f 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/FrenchNumeric.java @@ -29,6 +29,8 @@ public class FrenchNumeric { public static final String ZeroToNineIntegerRegex = "(une?|deux|trois|quatre|cinq|six|sept|huit|neuf|z[ée]ro)"; + public static final String TwoToNineIntegerRegex = "(deux|trois|quatre|cinq|six|sept|huit|neuf)"; + public static final String TenToNineteenIntegerRegex = "((seize|quinze|quatorze|treize|douze|onze)|dix(\\Wneuf|\\Whuit|\\Wsept)?)"; public static final String TensNumberIntegerRegex = "(quatre\\Wvingt(s|\\Wdix)?|soixante(\\Wdix)?|vingt|trente|quarante|cinquante|septante|octante|huitante|nonante)"; @@ -146,15 +148,26 @@ public static String NumbersWithPlaceHolder(String placeholder) { public static final String FractionNotationRegex = "{BaseNumbers.FractionNotationRegex}" .replace("{BaseNumbers.FractionNotationRegex}", BaseNumbers.FractionNotationRegex); - public static final String FractionNounRegex = "(?<=\\b)({AllIntRegex}\\s+((et)\\s+)?)?({AllIntRegex})(\\s+((et)\\s)?)((({AllOrdinalRegex})s?|({SuffixOrdinalRegex})s?)|demi[es]?|tiers?|quarts?)(?=\\b)" + public static final String FractionMultiplierRegex = "(?\\s+et\\s+(demi[es]?|(une?|{TwoToNineIntegerRegex})\\s+(demie?|tier|quart|(cinqui|sixi|septi|hui[tr]i|neuvi|dixi)[eè]me)s?))" + .replace("{TwoToNineIntegerRegex}", TwoToNineIntegerRegex); + + public static final String RoundMultiplierWithFraction = "(?(millions?|milliards?|billions?))(?={FractionMultiplierRegex}?$)" + .replace("{FractionMultiplierRegex}", FractionMultiplierRegex); + + public static final String RoundMultiplierRegex = "\\b\\s*({RoundMultiplierWithFraction}|(?(cent|mille))$)" + .replace("{RoundMultiplierWithFraction}", RoundMultiplierWithFraction); + + public static final String FractionNounRegex = "(?<=\\b)({AllIntRegex}\\s+((et)\\s+)?)?({AllIntRegex}(\\s+((et)\\s)?)(({AllOrdinalRegex}s?|{SuffixOrdinalRegex}s?)|(demi[es]?|tiers?|quarts?))|(un\\s+)?(demi|tier|quart)(\\s+(de\\s+)?|\\s*-\\s*){RoundNumberIntegerRegex})(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) .replace("{AllOrdinalRegex}", AllOrdinalRegex) - .replace("{SuffixOrdinalRegex}", SuffixOrdinalRegex); + .replace("{SuffixOrdinalRegex}", SuffixOrdinalRegex) + .replace("{RoundNumberIntegerRegex}", RoundNumberIntegerRegex); - public static final String FractionNounWithArticleRegex = "(?<=\\b)({AllIntRegex}\\s+(et\\s+)?)?(une?)(\\s+)(({AllOrdinalRegex})|({SuffixOrdinalRegex})|(et\\s+)?demi[es]?)(?=\\b)" + public static final String FractionNounWithArticleRegex = "(?<=\\b)(({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\\s+(et\\s+)?)?((une?)(\\s+)(({AllOrdinalRegex})|({SuffixOrdinalRegex})|(et\\s+)?demi[es]?)|demi[es]?)(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) .replace("{AllOrdinalRegex}", AllOrdinalRegex) - .replace("{SuffixOrdinalRegex}", SuffixOrdinalRegex); + .replace("{SuffixOrdinalRegex}", SuffixOrdinalRegex) + .replace("{RoundNumberIntegerRegexWithLocks}", RoundNumberIntegerRegexWithLocks); public static final String FractionPrepositionRegex = "(?({AllIntRegex})|((?({AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) @@ -316,6 +329,8 @@ public static String DoubleWithoutIntegralRegex(String placeholder) { public static final List WrittenFractionSeparatorTexts = Arrays.asList("et", "sur"); + public static final List OneHalfTokens = Arrays.asList("un", "demi"); + public static final String HalfADozenRegex = "(?<=\\b)demie?\\s+douzaine"; public static final String DigitalNumberRegex = "((?<=\\b)(cent|mille|millions?|milliards?|billions?|douzaines?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))" diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/GermanNumeric.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/GermanNumeric.java index 7b0e311413..ab5b5bc453 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/GermanNumeric.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/GermanNumeric.java @@ -27,7 +27,9 @@ public class GermanNumeric { public static final String ZeroToNineIntegerRegex = "(drei|sieben|acht|vier|fuenf|fünf|null|neun|eins|(ein(?!($|\\.|,|!|\\?)))|eine[rn]?|zwei|zwo|sechs)"; - public static final String RoundNumberIntegerRegex = "((ein)?hundert|tausend|(\\s*(million(en)?|mio|milliarden?|mrd|billion(en)?)\\s*))"; + public static final String TwoToNineIntegerRegex = "(drei|sieben|acht|vier|fuenf|fünf|neun|zwei|zwo|sechs)"; + + public static final String RoundNumberIntegerRegex = "((ein)?hundert|tausend|((million(en)?|mio|milliarden?|mrd|billion(en)?)))"; public static final String AnIntRegex = "(eine?)(?=\\s)"; @@ -47,7 +49,7 @@ public class GermanNumeric { .replace("{RoundNumberIntegerRegex}", RoundNumberIntegerRegex) .replace("{AnIntRegex}", AnIntRegex); - public static final String AllIntRegex = "(((({TenToNineteenIntegerRegex}|({ZeroToNineIntegerRegex}und{TensNumberIntegerRegex})|{TensNumberIntegerRegex}|({ZeroToNineIntegerRegex}|{AnIntRegex}))?(\\s*{RoundNumberIntegerRegex})))*{SeparaIntRegex})" + public static final String AllIntRegex = "(((({TenToNineteenIntegerRegex}|({ZeroToNineIntegerRegex}und{TensNumberIntegerRegex})|{TensNumberIntegerRegex}|({ZeroToNineIntegerRegex}|{AnIntRegex}))?(\\s*{RoundNumberIntegerRegex}\\s*)))*{SeparaIntRegex})" .replace("{TenToNineteenIntegerRegex}", TenToNineteenIntegerRegex) .replace("{TensNumberIntegerRegex}", TensNumberIntegerRegex) .replace("{ZeroToNineIntegerRegex}", ZeroToNineIntegerRegex) @@ -121,17 +123,30 @@ public static String NumbersWithPlaceHolder(String placeholder) { public static final List OneHalfTokens = Arrays.asList("ein", "halb", "halbes"); - public static final String FractionNounRegex = "(?<=\\b)(({AllIntRegex})(\\s*|\\s*-\\s*)((({AllOrdinalRegex})|({RoundNumberOrdinalRegex}))|halb(e[rs]?)?|hälfte)|{FractionUnitsRegex})(?=\\b)" + public static final String FractionMultiplierRegex = "(?(\\s+und\\s+)?(anderthalb|einundhalb|dreiviertel)|(\\s+und\\s+)?(eine?|{TwoToNineIntegerRegex})\\s*(halbe?|(dritt|viert|fünft|fuenft|sechst|siebt|acht|neunt|zehnt)(er|es|en|el|e)?))" + .replace("{TwoToNineIntegerRegex}", TwoToNineIntegerRegex); + + public static final String RoundMultiplierWithFraction = "(?<=(?(million(en)?|mio|milliarden?|mrd|billion(en)?))(?={FractionMultiplierRegex}?$)" + .replace("{RoundNumberIntegerRegex}", RoundNumberIntegerRegex) + .replace("{FractionMultiplierRegex}", FractionMultiplierRegex); + + public static final String RoundMultiplierRegex = "\\b\\s*((von\\s+)?ein(er|es|en|el|e)?\\s+)?({RoundMultiplierWithFraction}|(?(?:hundert|tausend))$)" + .replace("{RoundMultiplierWithFraction}", RoundMultiplierWithFraction); + + public static final String FractionNounRegex = "(?<=\\b)({AllIntRegex}\\s+(und\\s+)?)?(({AllIntRegex})(\\s*|\\s*-\\s*)((({AllOrdinalRegex})|({RoundNumberOrdinalRegex}))|halb(e[rs]?)?|hälfte)(\\s+{RoundNumberIntegerRegex})?|(eine\\s+(halbe|viertel)\\s+){RoundNumberIntegerRegex}|{FractionUnitsRegex}(\\s+{RoundNumberIntegerRegex})?)(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) .replace("{AllOrdinalRegex}", AllOrdinalRegex) .replace("{RoundNumberOrdinalRegex}", RoundNumberOrdinalRegex) - .replace("{FractionUnitsRegex}", FractionUnitsRegex); + .replace("{FractionUnitsRegex}", FractionUnitsRegex) + .replace("{RoundNumberIntegerRegex}", RoundNumberIntegerRegex); - public static final String FractionNounWithArticleRegex = "(?<=\\b)(({AllIntRegex}\\s+(und\\s+)?)?eine?(\\s+|\\s*-\\s*)({AllOrdinalRegex}|{RoundNumberOrdinalRegex}|{FractionUnitsRegex}|({AllIntRegex}ein)?(halb(e[rs]?)?|hälfte))|{AllIntRegex}ein(halb))(?=\\b)" + public static final String FractionNounWithArticleRegex = "(?<=\\b)((({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\\s+(und\\s+)?)?eine?(\\s+|\\s*-\\s*)({AllOrdinalRegex}|{RoundNumberOrdinalRegex}|{FractionUnitsRegex}|({AllIntRegex}ein)?(halb(e[rs]?)?|hälfte))|{AllIntRegex}ein(halb)(\\s+{RoundNumberIntegerRegex})?)(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) .replace("{AllOrdinalRegex}", AllOrdinalRegex) .replace("{RoundNumberOrdinalRegex}", RoundNumberOrdinalRegex) - .replace("{FractionUnitsRegex}", FractionUnitsRegex); + .replace("{FractionUnitsRegex}", FractionUnitsRegex) + .replace("{RoundNumberIntegerRegexWithLocks}", RoundNumberIntegerRegexWithLocks) + .replace("{RoundNumberIntegerRegex}", RoundNumberIntegerRegex); public static final String FractionPrepositionRegex = "(?({AllIntRegex})|((?({AllIntRegex})|(\\d+)(?!\\.))(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) @@ -287,7 +302,7 @@ public static String DoubleWithoutIntegralRegex(String placeholder) { public static final List WrittenIntegerSeparatorTexts = Arrays.asList("und"); - public static final List WrittenFractionSeparatorTexts = Arrays.asList("durch"); + public static final List WrittenFractionSeparatorTexts = Arrays.asList("durch", "und"); public static final String HalfADozenRegex = "ein\\s+halbes\\s+dutzend"; diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/PortugueseNumeric.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/PortugueseNumeric.java index 3d0329efd0..fccfab8a6d 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/PortugueseNumeric.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/PortugueseNumeric.java @@ -31,6 +31,8 @@ public class PortugueseNumeric { public static final String ZeroToNineIntegerRegex = "(quatro|cinco|sete|nove|zero|tr[êe]s|seis|oito|d(oi|ua)s|h?uma?)"; + public static final String TwoToNineIntegerRegex = "(quatro|cinco|sete|nove|tr[êe]s|seis|oito|d(oi|ua)s)"; + public static final String TenToNineteenIntegerRegex = "(dez[ea]sseis|dez[ea]ssete|dez[ea]nove|dezoito|(c|qua)torze|quinze|treze|d[ée]z|onze|doze)"; public static final String TensNumberIntegerRegex = "(cinquenta|quarenta|trinta|sessenta|setenta|oitenta|noventa|vinte)"; @@ -100,7 +102,7 @@ public static String NumbersWithPlaceHolder(String placeholder) { public static final String SimpleRoundOrdinalRegex = "(mil[eé]sim[oa]|milion[eé]sim[oa]|bilion[eé]sim[oa]|trilion[eé]sim[oa]|quatrilion[eé]sim[oa]|quintilion[eé]sim[oa])"; - public static final String OneToNineOrdinalRegex = "(primeir[oa]|segund[oa]|terceir[oa]|quart[oa]|quint[oa]|sext[oa]|s[eé]tim[oa]|oitav[oa]|non[oa])"; + public static final String OneToNineOrdinalRegex = "(primeir[oa]|segund[oa]|terceir[oa]|terç[oa]|quart[oa]|quint[oa]|sext[oa]|s[eé]tim[oa]|oitav[oa]|non[oa])"; public static final String TensOrdinalRegex = "(nonag[eé]sim[oa]|octog[eé]sim[oa]|setuag[eé]sim[oa]|septuag[eé]sim[oa]|sexag[eé]sim[oa]|quinquag[eé]sim[oa]|quadrag[eé]sim[oa]|trig[eé]sim[oa]|vig[eé]sim[oa]|d[eé]cim[oa])"; @@ -147,16 +149,27 @@ public static String NumbersWithPlaceHolder(String placeholder) { public static final String FractionNotationWithSpacesRegex = "(((?<=\\W|^)-\\s*)|(?<=\\b))\\d+\\s+\\d+[/]\\d+(?=(\\b[^/]|$))"; - public static final String FractionNounRegex = "(?<=\\b)({AllIntRegex}\\s+((e|com)\\s+)?)?({AllIntRegex})(\\s+((e|com)\\s)?)((({AllOrdinalRegex})s?|({SpecialFractionInteger})|({SuffixRoundOrdinalRegex})s?)|mei[oa]?|ter[çc]o?)(?=\\b)" + public static final String FractionMultiplierRegex = "(?\\s+(e|com)\\s+(meio|(um|{TwoToNineIntegerRegex})\\s+(meio|terç[oa]|quart[oa]|quint[oa]|sext[oa]|s[eé]tim[oa]|oitav[oa]|non[oa]|d[eé]cim[oa])s?))" + .replace("{TwoToNineIntegerRegex}", TwoToNineIntegerRegex); + + public static final String RoundMultiplierWithFraction = "(?(?:(mil(h([ãa]o|[õo]es))|bilh([ãa]o|[õo]es)|trilh([ãa]o|[õo]es)|qua[td]rilh([ãa]o|[õo]es)|quintilh([ãa]o|[õo]es))))(?={FractionMultiplierRegex}?$)" + .replace("{FractionMultiplierRegex}", FractionMultiplierRegex); + + public static final String RoundMultiplierRegex = "\\b\\s*({RoundMultiplierWithFraction}|(?(mil))$)" + .replace("{RoundMultiplierWithFraction}", RoundMultiplierWithFraction); + + public static final String FractionNounRegex = "(?<=\\b)({AllIntRegex}\\s+((e|com)\\s+)?)?(({AllIntRegex})(\\s+((e|com)\\s)?)((({AllOrdinalRegex})s?|({SpecialFractionInteger})|({SuffixRoundOrdinalRegex})s?)|mei[oa]?|ter[çc]o?)|(meio|um\\s+quarto\\s+de)\\s+{RoundNumberIntegerRegex})(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) .replace("{AllOrdinalRegex}", AllOrdinalRegex) .replace("{SpecialFractionInteger}", SpecialFractionInteger) - .replace("{SuffixRoundOrdinalRegex}", SuffixRoundOrdinalRegex); + .replace("{SuffixRoundOrdinalRegex}", SuffixRoundOrdinalRegex) + .replace("{RoundNumberIntegerRegex}", RoundNumberIntegerRegex); - public static final String FractionNounWithArticleRegex = "(?<=\\b)({AllIntRegex}\\s+(e\\s+)?)?(um|um[as])(\\s+)(({AllOrdinalRegex})|({SuffixRoundOrdinalRegex})|(e\\s+)?mei[oa]?)(?=\\b)" + public static final String FractionNounWithArticleRegex = "(?<=\\b)(({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\\s+(e\\s+)?)?((um|um[as])(\\s+)(({AllOrdinalRegex})|({SuffixRoundOrdinalRegex})|(e\\s+)?mei[oa]?)|mei[oa]?)(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) .replace("{AllOrdinalRegex}", AllOrdinalRegex) - .replace("{SuffixRoundOrdinalRegex}", SuffixRoundOrdinalRegex); + .replace("{SuffixRoundOrdinalRegex}", SuffixRoundOrdinalRegex) + .replace("{RoundNumberIntegerRegexWithLocks}", RoundNumberIntegerRegexWithLocks); public static final String FractionPrepositionRegex = "(?({AllIntRegex})|((?({AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) @@ -214,6 +227,8 @@ public static String DoubleWithoutIntegralRegex(String placeholder) { public static final List WrittenFractionSuffix = Arrays.asList("avo", "ava"); + public static final List OneHalfTokens = Arrays.asList("um", "meio"); + public static final Character PluralSuffix = 's'; public static final String HalfADozenRegex = "meia\\s+d[uú]zia"; @@ -307,6 +322,8 @@ public static String DoubleWithoutIntegralRegex(String placeholder) { .put("segunda", 2L) .put("terceiro", 3L) .put("terceira", 3L) + .put("terço", 3L) + .put("terça", 3L) .put("quarto", 4L) .put("quarta", 4L) .put("quinto", 5L) diff --git a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/SpanishNumeric.java b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/SpanishNumeric.java index 94cd1afe0c..6732861f10 100644 --- a/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/SpanishNumeric.java +++ b/Java/libraries/recognizers-text-number/src/main/java/com/microsoft/recognizers/text/number/resources/SpanishNumeric.java @@ -29,10 +29,12 @@ public class SpanishNumeric { public static final String HundredsNumberIntegerRegex = "(cuatrocient[ao]s|trescient[ao]s|seiscient[ao]s|setecient[ao]s|ochocient[ao]s|novecient[ao]s|doscient[ao]s|quinient[ao]s|(?\\s+(y|con)\\s+(medio|(un|{TwoToNineIntegerRegex})\\s+(medio|terci[oa]?|cuart[oa]|quint[oa]|sext[oa]|s[eé]ptim[oa]|octav[oa]|noven[oa]|d[eé]cim[oa])s?))" + .replace("{TwoToNineIntegerRegex}", TwoToNineIntegerRegex); + + public static final String RoundMultiplierWithFraction = "(?(?:(mil\\s+millones|mill[oó]n(es)?|bill[oó]n(es)?|trill[oó]n(es)?|cuatrill[oó]n(es)?|quintill[oó]n(es)?|sextill[oó]n(es)?|septill[oó]n(es)?)))(?={FractionMultiplierRegex}?$)" + .replace("{FractionMultiplierRegex}", FractionMultiplierRegex); + + public static final String RoundMultiplierRegex = "\\b\\s*({RoundMultiplierWithFraction}|(?(mil))$)" + .replace("{RoundMultiplierWithFraction}", RoundMultiplierWithFraction); + + public static final String FractionNounRegex = "(?<=\\b)({AllIntRegex}\\s+((y|con)\\s+)?)?(({AllIntRegex})(\\s+((y|con)\\s)?)((({AllOrdinalRegex})s?|({SpecialFractionInteger})|({SufixRoundOrdinalRegex})s?)|medi[oa]s?|tercios?)|(medio|un\\s+cuarto\\s+de)\\s+{RoundNumberIntegerRegex})(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) .replace("{AllOrdinalRegex}", AllOrdinalRegex) .replace("{SpecialFractionInteger}", SpecialFractionInteger) - .replace("{SufixRoundOrdinalRegex}", SufixRoundOrdinalRegex); + .replace("{SufixRoundOrdinalRegex}", SufixRoundOrdinalRegex) + .replace("{RoundNumberIntegerRegex}", RoundNumberIntegerRegex); - public static final String FractionNounWithArticleRegex = "(?<=\\b)({AllIntRegex}\\s+(y\\s+)?)?(un|un[oa])(\\s+)(({AllOrdinalRegex})|({SufixRoundOrdinalRegex})|(y\\s+)?medi[oa]s?)(?=\\b)" + public static final String FractionNounWithArticleRegex = "(?<=\\b)(({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\\s+(y\\s+)?)?((un|un[oa])(\\s+)(({AllOrdinalRegex})|({SufixRoundOrdinalRegex}))|(un[ao]?\\s+)?medi[oa]s?)(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) .replace("{AllOrdinalRegex}", AllOrdinalRegex) - .replace("{SufixRoundOrdinalRegex}", SufixRoundOrdinalRegex); + .replace("{SufixRoundOrdinalRegex}", SufixRoundOrdinalRegex) + .replace("{RoundNumberIntegerRegexWithLocks}", RoundNumberIntegerRegexWithLocks); public static final String FractionPrepositionRegex = "(?({AllIntRegex})|((?({AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)" .replace("{AllIntRegex}", AllIntRegex) @@ -317,6 +330,8 @@ public static String DoubleWithoutIntegralRegex(String placeholder) { public static final List WrittenFractionSeparatorTexts = Arrays.asList("con"); + public static final List OneHalfTokens = Arrays.asList("un", "medio"); + public static final String HalfADozenRegex = "media\\s+docena"; public static final String DigitalNumberRegex = "((?<=\\b)(mil(l[oó]n(es)?)?|bill[oó]n(es)?|trill[oó]n(es)?|docenas?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))" diff --git a/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts index d23d6e4b5a..8cbc53db40 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/englishDateTime.ts @@ -265,7 +265,7 @@ export namespace EnglishDateTime { export const UnspecificDatePeriodRegex = `^(week|fortnight|month|year)$`; export const PrepositionSuffixRegex = `\\b(on|in|at|around|circa|from|to)$`; export const FlexibleDayRegex = `(?([A-Za-z]+\\s)?[A-Za-z\\d]+)`; - export const ForTheRegex = `\\b((((?<=for\\s+)the\\s+${FlexibleDayRegex})|((?<=on\\s+)(the\\s+)?${FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\\s*(,|\\.(?!\\d)|!|\\?|$)))`; + export const ForTheRegex = `\\b((((?<=\\bfor\\s+)the\\s+${FlexibleDayRegex})|((?<=\\bon\\s+)(the\\s+)?${FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\\s*(,|\\.(?!\\d)|!|\\?|$)))`; export const WeekDayAndDayOfMonthRegex = `\\b${WeekDayRegex}\\s+(the\\s+${FlexibleDayRegex})\\b`; export const WeekDayAndDayRegex = `\\b${WeekDayRegex}\\s+(?!(the))${DayRegex}(?!([-:]|(\\s+(${AmDescRegex}|${PmDescRegex}|${OclockRegex}))))\\b`; export const RestOfDateRegex = `\\b(rest|remaining)\\s+(of\\s+)?((the|my|this|current)\\s+)?(?week|fortnight|month|year|decade)\\b`; diff --git a/JavaScript/packages/recognizers-date-time/src/resources/frenchDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/frenchDateTime.ts index e727cae03a..96132c1236 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/frenchDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/frenchDateTime.ts @@ -13,8 +13,8 @@ import { BaseDateTime } from "./baseDateTime"; export namespace FrenchDateTime { export const LangMarker = `Fre`; export const CheckBothBeforeAfter = false; - export const TillRegex = `(?au|et|(jusqu')?[aà]|avant|--|-|—|——)`; - export const RangeConnectorRegex = `(?de la|au|[aà]|et(\\s*la)?|--|-|—|——)`; + export const TillRegex = `(?\\b(au|et|(jusqu')?a|avant)\\b|(jusqu')?à|--|-|—|——)`; + export const RangeConnectorRegex = `(?\\b(de\\s+la|au|(jusqu')?a|et(\\s*la)?)\\b|(jusqu')?à|--|-|—|——)`; export const RelativeRegex = `(?prochaine?|de|du|ce(tte)?|l[ae]|derni[eè]re|hier|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))`; export const StrictRelativeRegex = `(?prochaine?|derni[eè]re|hier|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))`; export const NextSuffixRegex = `(?prochain(es?)?|suivante)\\b`; @@ -50,7 +50,7 @@ export namespace FrenchDateTime { export const BetweenRegex = `\\b(entre\\s+)(${DayRegex})\\s*${RangeConnectorRegex}\\s*(${DayRegex})\\s+${MonthSuffixRegex}((\\s+|\\s*,\\s*)${YearRegex})?\\b`; export const YearWordRegex = `\\b(?l'ann[ée]e)\\b`; export const MonthWithYear = `\\b(${WrittenMonthRegex}(\\s*),?(\\s+de)?(\\s*)(${YearRegex}|${TwoDigitYearRegex}|(?cette)\\s*${YearWordRegex})|${YearWordRegex}\\s*(${PastSuffixRegex}|${NextSuffixRegex}))`; - export const OneWordPeriodRegex = `\\b((${RelativeRegex}\\s+)?${WrittenMonthRegex}|(la\\s+)?(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)\\s+${StrictRelativeRegex}|${RelativeRegex}\\s+(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)|weekend|week-end|(mois|l'année))\\b`; + export const OneWordPeriodRegex = `\\b((${RelativeRegex}\\s+)?${WrittenMonthRegex}|(la\\s+)?(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)\\s+${StrictRelativeRegex}|${RelativeRegex}\\s+(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)|weekend|week-end|mois|l'année|an)\\b`; export const MonthNumWithYear = `(${YearRegex}(\\s*)[/\\-\\.](\\s*)${MonthNumRegex})|(${MonthNumRegex}(\\s*)[/\\-](\\s*)${YearRegex})`; export const WeekOfMonthRegex = `(?(le\\s+)?(?premier|1er|duexi[èe]me|2|troisi[èe]me|3|quatri[èe]me|4|cinqi[èe]me|5)\\s+semaine(\\s+de)?\\s+${MonthSuffixRegex})`; export const WeekOfYearRegex = `(?(le\\s+)?(?premier|1er|duexi[èe]me|2|troisi[èe]me|3|quatri[èe]me|4|cinqi[èe]me|5)\\s+semaine(\\s+de)?\\s+(${YearRegex}|${RelativeRegex}\\s+ann[ée]e))`; @@ -225,7 +225,7 @@ export namespace FrenchDateTime { export const FromRegex2 = `((depuis|de)(\\s*las?)?)$`; export const FromToRegex = `\\b(du|depuis|des?).+(au|à|a)\\b.+`; export const SingleAmbiguousMonthRegex = `^(le\\s+)?(may|march)$`; - export const UnspecificDatePeriodRegex = `^\\b$`; + export const UnspecificDatePeriodRegex = `^(semaine|mois|an(n[eé]e)?)$`; export const PrepositionSuffixRegex = `\\b(du|de|[àa]|vers|dans)$`; export const FlexibleDayRegex = `(?([A-Za-z]+\\s)?[A-Za-z\\d]+)`; export const ForTheRegex = `\\b(((pour le ${FlexibleDayRegex})|(dans (le\\s+)?${FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\\s*(,|\\.|!|\\?|$)))`; diff --git a/JavaScript/packages/recognizers-date-time/src/resources/portugueseDateTime.ts b/JavaScript/packages/recognizers-date-time/src/resources/portugueseDateTime.ts index 60893b79cd..3ef23e4cf5 100644 --- a/JavaScript/packages/recognizers-date-time/src/resources/portugueseDateTime.ts +++ b/JavaScript/packages/recognizers-date-time/src/resources/portugueseDateTime.ts @@ -124,7 +124,7 @@ export namespace PortugueseDateTime { export const MidafternoonRegex = `(?meio\\s+da\\s+tarde)`; export const MiddayRegex = `(?meio\\s*(-\\s*)?dia)`; export const MidTimeRegex = `(?(${MidnightRegex}|${MidmorningRegex}|${MidEarlyMorning}|${MidafternoonRegex}|${MiddayRegex}))`; - export const AtRegex = `\\b(((?<=\\b([aà]s?)\\s+)(${WrittenTimeRegex}|${HourNumRegex}|${BaseDateTime.HourRegex})(\\s+horas?|\\s*h\\b)?|(?<=\\b(s(er)?[aã]o|v[aã]o\\s+ser|^[eé]h?)\\s+)(${WrittenTimeRegex}|${HourNumRegex}|${BaseDateTime.HourRegex})(\\s+horas?|\\s*h\\b))(\\s+${OclockRegex})?|${MidTimeRegex})\\b`; + export const AtRegex = `\\b(((?<=\\b([aà]s?)\\s+)(${WrittenTimeRegex}|${HourNumRegex}|${BaseDateTime.HourRegex})(\\s+horas?|\\s*h\\b)?|(?<=\\b(s(er)?[aã]o|v[aã]o\\s+ser|^[eé]h?)\\s+|^\\s*)(${WrittenTimeRegex}|${HourNumRegex}|${BaseDateTime.HourRegex})(\\s+horas?|\\s*h\\b))(\\s+${OclockRegex})?|${MidTimeRegex})\\b`; export const ConnectNumRegex = `(${BaseDateTime.HourRegex}(?[0-5][0-9])\\s*${DescRegex})`; export const TimeRegex1 = `(\\b${TimePrefix}\\s+)?(${WrittenTimeRegex}|${HourNumRegex}|${BaseDateTime.HourRegex})\\s*(${DescRegex})`; export const TimeRegex2 = `(\\b${TimePrefix}\\s+)?(t)?${BaseDateTime.HourRegex}(\\s*)?:(\\s*)?${BaseDateTime.MinuteRegex}((\\s*)?:(\\s*)?${BaseDateTime.SecondRegex})?((\\s*${DescRegex})|\\b)`; @@ -251,6 +251,7 @@ export namespace PortugueseDateTime { export const DefaultLanguageFallback = `DMY`; export const DurationDateRestrictions = [ ]; export const AmbiguityFiltersDict: ReadonlyMap = new Map([["^\\d{4}$", "(\\d\\.\\d{4}|\\d{4}\\.\\d)"],["^(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)$", "([$%£&!?@#])(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)|(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)([$%£&@#])"]]); + export const AmbiguityTimeFiltersDict: ReadonlyMap = new Map([["horas?$", "\\b((por|duração\\s+de|durante)\\s+(\\S+\\s+){1,2}horas?|horas?\\s+(\\S+\\s+){0,2}dur(ação|ou|a(rá|va)?))\\b"]]); export const EarlyMorningTermList = [ "madrugada" ]; export const MorningTermList = [ "manha","manhã" ]; export const AfternoonTermList = [ "passado o meio dia","depois do meio dia" ]; diff --git a/JavaScript/packages/recognizers-number-with-unit/src/resources/baseNumbers.ts b/JavaScript/packages/recognizers-number-with-unit/src/resources/baseNumbers.ts index 3fd81917be..543c2ab786 100644 --- a/JavaScript/packages/recognizers-number-with-unit/src/resources/baseNumbers.ts +++ b/JavaScript/packages/recognizers-number-with-unit/src/resources/baseNumbers.ts @@ -15,7 +15,8 @@ export namespace BaseNumbers { export const IntegerRegexDefinition = (placeholder: string, thousandsmark: string) => { return `(((? { return `(((? { return `(((? { return `(((?${RoundNumberIntegerRegex})$`; + export const FractionMultiplierRegex = `(?\\s+and\\s+(a|one|${TwoToNineIntegerRegex})\\s+(half|quarter|third|fourth|fifth|sixth|seventh|eighth|nine?th|tenth)s?)`; + export const RoundMultiplierWithFraction = `(?<=(?(?:million|mln|billion|bln|trillion|tln)s?)(?=${FractionMultiplierRegex}?$)`; + export const RoundMultiplierRegex = `\\b\\s*((of\\s+)?a\\s+)?(${RoundMultiplierWithFraction}|(?(?:hundred|thousand|lakh|crore)s?)$)`; export const FractionNounRegex = `(?<=\\b)(${AllIntRegex}\\s+(and\\s+)?)?((${AllIntRegex})(\\s+|\\s*-\\s*)(((${AllOrdinalRegex})|(${RoundNumberOrdinalRegex}))s|halves|quarters)((\\s+of\\s+a)?\\s+${RoundNumberIntegerRegex})?|(half(\\s+a)?|quarter(\\s+of\\s+a)?)\\s+${RoundNumberIntegerRegex})(?=\\b)`; - export const FractionNounWithArticleRegex = `(?<=\\b)(((${AllIntRegex}\\s+(and\\s+)?)?(an?|one)(\\s+|\\s*-\\s*)(?!\\bfirst\\b|\\bsecond\\b)((${AllOrdinalRegex})|(${RoundNumberOrdinalRegex})|(half|quarter)(((\\s+of)?\\s+a)?\\s+${RoundNumberIntegerRegex})?))|(half))(?=\\b)`; + export const FractionNounWithArticleRegex = `(?<=\\b)((((${AllIntRegex}|${RoundNumberIntegerRegexWithLocks})\\s+(and\\s+)?)?(an?|one)(\\s+|\\s*-\\s*)(?!\\bfirst\\b|\\bsecond\\b)((${AllOrdinalRegex})|(${RoundNumberOrdinalRegex})|(half|quarter)(((\\s+of)?\\s+a)?\\s+${RoundNumberIntegerRegex})?))|(half))(?=\\b)`; export const FractionPrepositionRegex = `(?(${AllIntRegex})|((?in|out\\s+of))\\s+(?(${AllIntRegex})|(\\d+)(?![\\.,]))(?=\\b)`; export const FractionPrepositionWithinPercentModeRegex = `(?(${AllIntRegex})|((?(${AllIntRegex})|(\\d+)(?![\\.,]))(?=\\b)`; export const AllPointRegex = `((\\s+${ZeroToNineIntegerRegex})+|(\\s+${SeparaIntRegex}))`; @@ -107,9 +109,9 @@ export namespace EnglishNumeric { export const WrittenFractionSeparatorTexts = [ "and" ]; export const HalfADozenRegex = `half\\s+a\\s+dozen`; export const DigitalNumberRegex = `((?<=\\b)(hundred|thousand|[mb]illion|trillion|[mbt]ln|lakh|crore|dozen(s)?)(?=\\b))|((?<=(\\d|\\b))${BaseNumbers.MultiplierLookupRegex}(?=\\b))`; - export const CardinalNumberMap: ReadonlyMap = new Map([["a", 1],["zero", 0],["an", 1],["one", 1],["two", 2],["three", 3],["four", 4],["five", 5],["six", 6],["seven", 7],["eight", 8],["nine", 9],["ten", 10],["eleven", 11],["twelve", 12],["dozen", 12],["dozens", 12],["thirteen", 13],["fourteen", 14],["fifteen", 15],["sixteen", 16],["seventeen", 17],["eighteen", 18],["nineteen", 19],["twenty", 20],["thirty", 30],["forty", 40],["fifty", 50],["sixty", 60],["seventy", 70],["eighty", 80],["ninety", 90],["hundred", 100],["thousand", 1000],["million", 1000000],["mln", 1000000],["billion", 1000000000],["bln", 1000000000],["trillion", 1000000000000],["tln", 1000000000000],["lakh", 100000],["crore", 10000000]]); + export const CardinalNumberMap: ReadonlyMap = new Map([["a", 1],["zero", 0],["an", 1],["one", 1],["two", 2],["three", 3],["four", 4],["five", 5],["six", 6],["seven", 7],["eight", 8],["nine", 9],["ten", 10],["eleven", 11],["twelve", 12],["dozen", 12],["dozens", 12],["thirteen", 13],["fourteen", 14],["fifteen", 15],["sixteen", 16],["seventeen", 17],["eighteen", 18],["nineteen", 19],["twenty", 20],["thirty", 30],["forty", 40],["fifty", 50],["sixty", 60],["seventy", 70],["eighty", 80],["ninety", 90],["hundred", 100],["thousand", 1000],["million", 1000000],["mln", 1000000],["billion", 1000000000],["bln", 1000000000],["trillion", 1000000000000],["tln", 1000000000000],["lakh", 100000],["crore", 10000000],["hundreds", 100],["thousands", 1000],["millions", 1000000],["billions", 1000000000],["trillions", 1000000000000],["lakhs", 100000],["crores", 10000000]]); export const OrdinalNumberMap: ReadonlyMap = new Map([["first", 1],["second", 2],["secondary", 2],["half", 2],["third", 3],["fourth", 4],["quarter", 4],["fifth", 5],["sixth", 6],["seventh", 7],["eighth", 8],["ninth", 9],["nineth", 9],["tenth", 10],["eleventh", 11],["twelfth", 12],["thirteenth", 13],["fourteenth", 14],["fifteenth", 15],["sixteenth", 16],["seventeenth", 17],["eighteenth", 18],["nineteenth", 19],["twentieth", 20],["thirtieth", 30],["fortieth", 40],["fiftieth", 50],["sixtieth", 60],["seventieth", 70],["eightieth", 80],["ninetieth", 90],["hundredth", 100],["thousandth", 1000],["millionth", 1000000],["billionth", 1000000000],["trillionth", 1000000000000],["firsts", 1],["halves", 2],["thirds", 3],["fourths", 4],["quarters", 4],["fifths", 5],["sixths", 6],["sevenths", 7],["eighths", 8],["ninths", 9],["nineths", 9],["tenths", 10],["elevenths", 11],["twelfths", 12],["thirteenths", 13],["fourteenths", 14],["fifteenths", 15],["sixteenths", 16],["seventeenths", 17],["eighteenths", 18],["nineteenths", 19],["twentieths", 20],["thirtieths", 30],["fortieths", 40],["fiftieths", 50],["sixtieths", 60],["seventieths", 70],["eightieths", 80],["ninetieths", 90],["hundredths", 100],["thousandths", 1000],["millionths", 1000000],["billionths", 1000000000],["trillionths", 1000000000000]]); - export const RoundNumberMap: ReadonlyMap = new Map([["hundred", 100],["thousand", 1000],["million", 1000000],["mln", 1000000],["billion", 1000000000],["bln", 1000000000],["trillion", 1000000000000],["tln", 1000000000000],["lakh", 100000],["crore", 10000000],["hundredth", 100],["thousandth", 1000],["millionth", 1000000],["billionth", 1000000000],["trillionth", 1000000000000],["hundredths", 100],["thousandths", 1000],["millionths", 1000000],["billionths", 1000000000],["trillionths", 1000000000000],["dozen", 12],["dozens", 12],["k", 1000],["m", 1000000],["mm", 1000000],["mil", 1000000],["g", 1000000000],["b", 1000000000],["t", 1000000000000]]); + export const RoundNumberMap: ReadonlyMap = new Map([["hundred", 100],["thousand", 1000],["million", 1000000],["mln", 1000000],["billion", 1000000000],["bln", 1000000000],["trillion", 1000000000000],["tln", 1000000000000],["lakh", 100000],["crore", 10000000],["hundreds", 100],["thousands", 1000],["millions", 1000000],["billions", 1000000000],["trillions", 1000000000000],["lakhs", 100000],["crores", 10000000],["hundredth", 100],["thousandth", 1000],["millionth", 1000000],["billionth", 1000000000],["trillionth", 1000000000000],["hundredths", 100],["thousandths", 1000],["millionths", 1000000],["billionths", 1000000000],["trillionths", 1000000000000],["dozen", 12],["dozens", 12],["k", 1000],["m", 1000000],["mm", 1000000],["mil", 1000000],["g", 1000000000],["b", 1000000000],["t", 1000000000000]]); export const AmbiguityFiltersDict: ReadonlyMap = new Map([["\\bone\\b", "\\b(the|this|that|which)\\s+(one)\\b"]]); export const RelativeReferenceOffsetMap: ReadonlyMap = new Map([["last", ""],["next one", ""],["current", ""],["current one", ""],["previous one", ""],["the second to last", ""],["the one before the last one", ""],["the one before the last", ""],["next to last", ""],["penultimate", ""],["the last but one", ""],["antepenultimate", ""],["next", ""],["previous", ""]]); export const RelativeReferenceRelativeToMap: ReadonlyMap = new Map([["last", "end"],["next one", "current"],["previous one", "current"],["current", "current"],["current one", "current"],["the second to last", "end"],["the one before the last one", "end"],["the one before the last", "end"],["next to last", "end"],["penultimate", "end"],["the last but one", "end"],["antepenultimate", "end"],["next", "current"],["previous", "current"]]); diff --git a/JavaScript/packages/recognizers-number/src/resources/frenchNumeric.ts b/JavaScript/packages/recognizers-number/src/resources/frenchNumeric.ts index d1de3b5591..85dde096b8 100644 --- a/JavaScript/packages/recognizers-number/src/resources/frenchNumeric.ts +++ b/JavaScript/packages/recognizers-number/src/resources/frenchNumeric.ts @@ -16,6 +16,7 @@ export namespace FrenchNumeric { export const MultiDecimalSeparatorCulture = true; export const RoundNumberIntegerRegex = `(cent|mille|millions?|milliards?|billions?)`; export const ZeroToNineIntegerRegex = `(une?|deux|trois|quatre|cinq|six|sept|huit|neuf|z[ée]ro)`; + export const TwoToNineIntegerRegex = `(deux|trois|quatre|cinq|six|sept|huit|neuf)`; export const TenToNineteenIntegerRegex = `((seize|quinze|quatorze|treize|douze|onze)|dix(\\Wneuf|\\Whuit|\\Wsept)?)`; export const TensNumberIntegerRegex = `(quatre\\Wvingt(s|\\Wdix)?|soixante(\\Wdix)?|vingt|trente|quarante|cinquante|septante|octante|huitante|nonante)`; export const DigitsNumberRegex = `\\d|\\d{1,3}(\\.\\d{3})`; @@ -51,8 +52,11 @@ export namespace FrenchNumeric { export const OrdinalFrenchRegex = `(?<=\\b)${AllOrdinalRegex}(?=\\b)`; export const FractionNotationWithSpacesRegex = `(((?<=\\W|^)-\\s*)|(?<=\\b))\\d+\\s+\\d+[/]\\d+(?=(\\b[^/]|$))`; export const FractionNotationRegex = `${BaseNumbers.FractionNotationRegex}`; - export const FractionNounRegex = `(?<=\\b)(${AllIntRegex}\\s+((et)\\s+)?)?(${AllIntRegex})(\\s+((et)\\s)?)(((${AllOrdinalRegex})s?|(${SuffixOrdinalRegex})s?)|demi[es]?|tiers?|quarts?)(?=\\b)`; - export const FractionNounWithArticleRegex = `(?<=\\b)(${AllIntRegex}\\s+(et\\s+)?)?(une?)(\\s+)((${AllOrdinalRegex})|(${SuffixOrdinalRegex})|(et\\s+)?demi[es]?)(?=\\b)`; + export const FractionMultiplierRegex = `(?\\s+et\\s+(demi[es]?|(une?|${TwoToNineIntegerRegex})\\s+(demie?|tier|quart|(cinqui|sixi|septi|hui[tr]i|neuvi|dixi)[eè]me)s?))`; + export const RoundMultiplierWithFraction = `(?(millions?|milliards?|billions?))(?=${FractionMultiplierRegex}?$)`; + export const RoundMultiplierRegex = `\\b\\s*(${RoundMultiplierWithFraction}|(?(cent|mille))$)`; + export const FractionNounRegex = `(?<=\\b)(${AllIntRegex}\\s+((et)\\s+)?)?(${AllIntRegex}(\\s+((et)\\s)?)((${AllOrdinalRegex}s?|${SuffixOrdinalRegex}s?)|(demi[es]?|tiers?|quarts?))|(un\\s+)?(demi|tier|quart)(\\s+(de\\s+)?|\\s*-\\s*)${RoundNumberIntegerRegex})(?=\\b)`; + export const FractionNounWithArticleRegex = `(?<=\\b)((${AllIntRegex}|${RoundNumberIntegerRegexWithLocks})\\s+(et\\s+)?)?((une?)(\\s+)((${AllOrdinalRegex})|(${SuffixOrdinalRegex})|(et\\s+)?demi[es]?)|demi[es]?)(?=\\b)`; export const FractionPrepositionRegex = `(?(${AllIntRegex})|((?(${AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)`; export const AllPointRegex = `((\\s+${ZeroToNineIntegerRegex})+|(\\s+${SeparaIntRegex}))`; export const AllFloatRegex = `(${AllIntRegex}(\\s+(virgule|point))${AllPointRegex})`; @@ -101,6 +105,7 @@ export namespace FrenchNumeric { export const WrittenGroupSeparatorTexts = [ "point","points" ]; export const WrittenIntegerSeparatorTexts = [ "et","-" ]; export const WrittenFractionSeparatorTexts = [ "et","sur" ]; + export const OneHalfTokens = [ "un","demi" ]; export const HalfADozenRegex = `(?<=\\b)demie?\\s+douzaine`; export const DigitalNumberRegex = `((?<=\\b)(cent|mille|millions?|milliards?|billions?|douzaines?)(?=\\b))|((?<=(\\d|\\b))${BaseNumbers.MultiplierLookupRegex}(?=\\b))`; export const AmbiguousFractionConnectorsRegex = `^[.]`; diff --git a/JavaScript/packages/recognizers-number/src/resources/portugueseNumeric.ts b/JavaScript/packages/recognizers-number/src/resources/portugueseNumeric.ts index f9b93bd358..d9f209a4e1 100644 --- a/JavaScript/packages/recognizers-number/src/resources/portugueseNumeric.ts +++ b/JavaScript/packages/recognizers-number/src/resources/portugueseNumeric.ts @@ -17,6 +17,7 @@ export namespace PortugueseNumeric { export const HundredsNumberIntegerRegex = `(quatrocent[ao]s|trezent[ao]s|seiscent[ao]s|setecent[ao]s|oitocent[ao]s|novecent[ao]s|duzent[ao]s|quinhent[ao]s|cem|(?\\s+(e|com)\\s+(meio|(um|${TwoToNineIntegerRegex})\\s+(meio|terç[oa]|quart[oa]|quint[oa]|sext[oa]|s[eé]tim[oa]|oitav[oa]|non[oa]|d[eé]cim[oa])s?))`; + export const RoundMultiplierWithFraction = `(?(?:(mil(h([ãa]o|[õo]es))|bilh([ãa]o|[õo]es)|trilh([ãa]o|[õo]es)|qua[td]rilh([ãa]o|[õo]es)|quintilh([ãa]o|[õo]es))))(?=${FractionMultiplierRegex}?$)`; + export const RoundMultiplierRegex = `\\b\\s*(${RoundMultiplierWithFraction}|(?(mil))$)`; + export const FractionNounRegex = `(?<=\\b)(${AllIntRegex}\\s+((e|com)\\s+)?)?((${AllIntRegex})(\\s+((e|com)\\s)?)(((${AllOrdinalRegex})s?|(${SpecialFractionInteger})|(${SuffixRoundOrdinalRegex})s?)|mei[oa]?|ter[çc]o?)|(meio|um\\s+quarto\\s+de)\\s+${RoundNumberIntegerRegex})(?=\\b)`; + export const FractionNounWithArticleRegex = `(?<=\\b)((${AllIntRegex}|${RoundNumberIntegerRegexWithLocks})\\s+(e\\s+)?)?((um|um[as])(\\s+)((${AllOrdinalRegex})|(${SuffixRoundOrdinalRegex})|(e\\s+)?mei[oa]?)|mei[oa]?)(?=\\b)`; export const FractionPrepositionRegex = `(?(${AllIntRegex})|((?(${AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)`; export const AllFloatRegex = `${AllIntRegex}(\\s+(vírgula|virgula|e|ponto))${AllPointRegex}`; export const DoubleWithMultiplierRegex = `(((? = new Map([["zero", 0],["hum", 1],["um", 1],["uma", 1],["dois", 2],["duas", 2],["meia", 2],["meio", 2],["tres", 3],["três", 3],["quatro", 4],["cinco", 5],["seis", 6],["sete", 7],["oito", 8],["nove", 9],["dez", 10],["dezena", 10],["déz", 10],["onze", 11],["doze", 12],["dúzia", 12],["duzia", 12],["dúzias", 12],["duzias", 12],["treze", 13],["catorze", 14],["quatorze", 14],["quinze", 15],["dezesseis", 16],["dezasseis", 16],["dezessete", 17],["dezassete", 17],["dezoito", 18],["dezenove", 19],["dezanove", 19],["vinte", 20],["trinta", 30],["quarenta", 40],["cinquenta", 50],["cincoenta", 50],["sessenta", 60],["setenta", 70],["oitenta", 80],["noventa", 90],["cem", 100],["cento", 100],["duzentos", 200],["duzentas", 200],["trezentos", 300],["trezentas", 300],["quatrocentos", 400],["quatrocentas", 400],["quinhentos", 500],["quinhentas", 500],["seiscentos", 600],["seiscentas", 600],["setecentos", 700],["setecentas", 700],["oitocentos", 800],["oitocentas", 800],["novecentos", 900],["novecentas", 900],["mil", 1000],["milhão", 1000000],["milhao", 1000000],["milhões", 1000000],["milhoes", 1000000],["bilhão", 1000000000],["bilhao", 1000000000],["bilhões", 1000000000],["bilhoes", 1000000000],["trilhão", 1000000000000],["trilhao", 1000000000000],["trilhões", 1000000000000],["trilhoes", 1000000000000]]); - export const OrdinalNumberMap: ReadonlyMap = new Map([["primeiro", 1],["primeira", 1],["segundo", 2],["segunda", 2],["terceiro", 3],["terceira", 3],["quarto", 4],["quarta", 4],["quinto", 5],["quinta", 5],["sexto", 6],["sexta", 6],["sétimo", 7],["setimo", 7],["sétima", 7],["setima", 7],["oitavo", 8],["oitava", 8],["nono", 9],["nona", 9],["décimo", 10],["decimo", 10],["décima", 10],["decima", 10],["undécimo", 11],["undecimo", 11],["undécima", 11],["undecima", 11],["duodécimo", 11],["duodecimo", 11],["duodécima", 11],["duodecima", 11],["vigésimo", 20],["vigesimo", 20],["vigésima", 20],["vigesima", 20],["trigésimo", 30],["trigesimo", 30],["trigésima", 30],["trigesima", 30],["quadragésimo", 40],["quadragesimo", 40],["quadragésima", 40],["quadragesima", 40],["quinquagésimo", 50],["quinquagesimo", 50],["quinquagésima", 50],["quinquagesima", 50],["sexagésimo", 60],["sexagesimo", 60],["sexagésima", 60],["sexagesima", 60],["septuagésimo", 70],["septuagesimo", 70],["septuagésima", 70],["septuagesima", 70],["setuagésimo", 70],["setuagesimo", 70],["setuagésima", 70],["setuagesima", 70],["octogésimo", 80],["octogesimo", 80],["octogésima", 80],["octogesima", 80],["nonagésimo", 90],["nonagesimo", 90],["nonagésima", 90],["nonagesima", 90],["centesimo", 100],["centésimo", 100],["centesima", 100],["centésima", 100],["ducentésimo", 200],["ducentesimo", 200],["ducentésima", 200],["ducentesima", 200],["tricentésimo", 300],["tricentesimo", 300],["tricentésima", 300],["tricentesima", 300],["trecentésimo", 300],["trecentesimo", 300],["trecentésima", 300],["trecentesima", 300],["quadringentésimo", 400],["quadringentesimo", 400],["quadringentésima", 400],["quadringentesima", 400],["quingentésimo", 500],["quingentesimo", 500],["quingentésima", 500],["quingentesima", 500],["sexcentésimo", 600],["sexcentesimo", 600],["sexcentésima", 600],["sexcentesima", 600],["seiscentésimo", 600],["seiscentesimo", 600],["seiscentésima", 600],["seiscentesima", 600],["septingentésimo", 700],["septingentesimo", 700],["septingentésima", 700],["septingentesima", 700],["setingentésimo", 700],["setingentesimo", 700],["setingentésima", 700],["setingentesima", 700],["octingentésimo", 800],["octingentesimo", 800],["octingentésima", 800],["octingentesima", 800],["noningentésimo", 900],["noningentesimo", 900],["noningentésima", 900],["noningentesima", 900],["nongentésimo", 900],["nongentesimo", 900],["nongentésima", 900],["nongentesima", 900],["milésimo", 1000],["milesimo", 1000],["milésima", 1000],["milesima", 1000],["milionésimo", 1000000],["milionesimo", 1000000],["milionésima", 1000000],["milionesima", 1000000],["bilionésimo", 1000000000],["bilionesimo", 1000000000],["bilionésima", 1000000000],["bilionesima", 1000000000]]); + export const OrdinalNumberMap: ReadonlyMap = new Map([["primeiro", 1],["primeira", 1],["segundo", 2],["segunda", 2],["terceiro", 3],["terceira", 3],["terço", 3],["terça", 3],["quarto", 4],["quarta", 4],["quinto", 5],["quinta", 5],["sexto", 6],["sexta", 6],["sétimo", 7],["setimo", 7],["sétima", 7],["setima", 7],["oitavo", 8],["oitava", 8],["nono", 9],["nona", 9],["décimo", 10],["decimo", 10],["décima", 10],["decima", 10],["undécimo", 11],["undecimo", 11],["undécima", 11],["undecima", 11],["duodécimo", 11],["duodecimo", 11],["duodécima", 11],["duodecima", 11],["vigésimo", 20],["vigesimo", 20],["vigésima", 20],["vigesima", 20],["trigésimo", 30],["trigesimo", 30],["trigésima", 30],["trigesima", 30],["quadragésimo", 40],["quadragesimo", 40],["quadragésima", 40],["quadragesima", 40],["quinquagésimo", 50],["quinquagesimo", 50],["quinquagésima", 50],["quinquagesima", 50],["sexagésimo", 60],["sexagesimo", 60],["sexagésima", 60],["sexagesima", 60],["septuagésimo", 70],["septuagesimo", 70],["septuagésima", 70],["septuagesima", 70],["setuagésimo", 70],["setuagesimo", 70],["setuagésima", 70],["setuagesima", 70],["octogésimo", 80],["octogesimo", 80],["octogésima", 80],["octogesima", 80],["nonagésimo", 90],["nonagesimo", 90],["nonagésima", 90],["nonagesima", 90],["centesimo", 100],["centésimo", 100],["centesima", 100],["centésima", 100],["ducentésimo", 200],["ducentesimo", 200],["ducentésima", 200],["ducentesima", 200],["tricentésimo", 300],["tricentesimo", 300],["tricentésima", 300],["tricentesima", 300],["trecentésimo", 300],["trecentesimo", 300],["trecentésima", 300],["trecentesima", 300],["quadringentésimo", 400],["quadringentesimo", 400],["quadringentésima", 400],["quadringentesima", 400],["quingentésimo", 500],["quingentesimo", 500],["quingentésima", 500],["quingentesima", 500],["sexcentésimo", 600],["sexcentesimo", 600],["sexcentésima", 600],["sexcentesima", 600],["seiscentésimo", 600],["seiscentesimo", 600],["seiscentésima", 600],["seiscentesima", 600],["septingentésimo", 700],["septingentesimo", 700],["septingentésima", 700],["septingentesima", 700],["setingentésimo", 700],["setingentesimo", 700],["setingentésima", 700],["setingentesima", 700],["octingentésimo", 800],["octingentesimo", 800],["octingentésima", 800],["octingentesima", 800],["noningentésimo", 900],["noningentesimo", 900],["noningentésima", 900],["noningentesima", 900],["nongentésimo", 900],["nongentesimo", 900],["nongentésima", 900],["nongentesima", 900],["milésimo", 1000],["milesimo", 1000],["milésima", 1000],["milesima", 1000],["milionésimo", 1000000],["milionesimo", 1000000],["milionésima", 1000000],["milionesima", 1000000],["bilionésimo", 1000000000],["bilionesimo", 1000000000],["bilionésima", 1000000000],["bilionesima", 1000000000]]); export const PrefixCardinalMap: ReadonlyMap = new Map([["hum", 1],["um", 1],["dois", 2],["tres", 3],["três", 3],["quatro", 4],["cinco", 5],["seis", 6],["sete", 7],["oito", 8],["nove", 9],["dez", 10],["onze", 11],["doze", 12],["treze", 13],["catorze", 14],["quatorze", 14],["quinze", 15],["dezesseis", 16],["dezasseis", 16],["dezessete", 17],["dezassete", 17],["dezoito", 18],["dezenove", 19],["dezanove", 19],["vinte", 20],["trinta", 30],["quarenta", 40],["cinquenta", 50],["cincoenta", 50],["sessenta", 60],["setenta", 70],["oitenta", 80],["noventa", 90],["cem", 100],["duzentos", 200],["trezentos", 300],["quatrocentos", 400],["quinhentos", 500],["seiscentos", 600],["setecentos", 700],["oitocentos", 800],["novecentos", 900]]); export const SuffixOrdinalMap: ReadonlyMap = new Map([["milesimo", 1000],["milionesimo", 1000000],["bilionesimo", 1000000000],["trilionesimo", 1000000000000]]); export const RoundNumberMap: ReadonlyMap = new Map([["mil", 1000],["milesimo", 1000],["milhão", 1000000],["milhao", 1000000],["milhões", 1000000],["milhoes", 1000000],["milionésimo", 1000000],["milionesimo", 1000000],["bilhão", 1000000000],["bilhao", 1000000000],["bilhões", 1000000000],["bilhoes", 1000000000],["bilionésimo", 1000000000],["bilionesimo", 1000000000],["trilhão", 1000000000000],["trilhao", 1000000000000],["trilhões", 1000000000000],["trilhoes", 1000000000000],["trilionésimo", 1000000000000],["trilionesimo", 1000000000000],["dezena", 10],["dezenas", 10],["dúzia", 12],["duzia", 12],["dúzias", 12],["duzias", 12],["k", 1000],["m", 1000000],["g", 1000000000],["b", 1000000000],["t", 1000000000000]]); diff --git a/JavaScript/packages/recognizers-number/src/resources/spanishNumeric.ts b/JavaScript/packages/recognizers-number/src/resources/spanishNumeric.ts index df9b2555d8..28eb5ea786 100644 --- a/JavaScript/packages/recognizers-number/src/resources/spanishNumeric.ts +++ b/JavaScript/packages/recognizers-number/src/resources/spanishNumeric.ts @@ -16,8 +16,9 @@ export namespace SpanishNumeric { export const MultiDecimalSeparatorCulture = true; export const NonStandardSeparatorVariants = [ "es-mx","es-do","es-sv","es-gt","es-hn","es-ni","es-pa","es-pr" ]; export const HundredsNumberIntegerRegex = `(cuatrocient[ao]s|trescient[ao]s|seiscient[ao]s|setecient[ao]s|ochocient[ao]s|novecient[ao]s|doscient[ao]s|quinient[ao]s|(?\\s+(y|con)\\s+(medio|(un|${TwoToNineIntegerRegex})\\s+(medio|terci[oa]?|cuart[oa]|quint[oa]|sext[oa]|s[eé]ptim[oa]|octav[oa]|noven[oa]|d[eé]cim[oa])s?))`; + export const RoundMultiplierWithFraction = `(?(?:(mil\\s+millones|mill[oó]n(es)?|bill[oó]n(es)?|trill[oó]n(es)?|cuatrill[oó]n(es)?|quintill[oó]n(es)?|sextill[oó]n(es)?|septill[oó]n(es)?)))(?=${FractionMultiplierRegex}?$)`; + export const RoundMultiplierRegex = `\\b\\s*(${RoundMultiplierWithFraction}|(?(mil))$)`; + export const FractionNounRegex = `(?<=\\b)(${AllIntRegex}\\s+((y|con)\\s+)?)?((${AllIntRegex})(\\s+((y|con)\\s)?)(((${AllOrdinalRegex})s?|(${SpecialFractionInteger})|(${SufixRoundOrdinalRegex})s?)|medi[oa]s?|tercios?)|(medio|un\\s+cuarto\\s+de)\\s+${RoundNumberIntegerRegex})(?=\\b)`; + export const FractionNounWithArticleRegex = `(?<=\\b)((${AllIntRegex}|${RoundNumberIntegerRegexWithLocks})\\s+(y\\s+)?)?((un|un[oa])(\\s+)((${AllOrdinalRegex})|(${SufixRoundOrdinalRegex}))|(un[ao]?\\s+)?medi[oa]s?)(?=\\b)`; export const FractionPrepositionRegex = `(?(${AllIntRegex})|((?(${AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)`; export const AllPointRegex = `((\\s+${ZeroToNineIntegerRegex})+|(\\s+${AllIntRegex}))`; export const AllFloatRegex = `${AllIntRegex}(\\s+(coma|con))${AllPointRegex}`; @@ -104,6 +108,7 @@ export namespace SpanishNumeric { export const WrittenGroupSeparatorTexts = [ "punto" ]; export const WrittenIntegerSeparatorTexts = [ "y" ]; export const WrittenFractionSeparatorTexts = [ "con" ]; + export const OneHalfTokens = [ "un","medio" ]; export const HalfADozenRegex = `media\\s+docena`; export const DigitalNumberRegex = `((?<=\\b)(mil(l[oó]n(es)?)?|bill[oó]n(es)?|trill[oó]n(es)?|docenas?)(?=\\b))|((?<=(\\d|\\b))${BaseNumbers.MultiplierLookupRegex}(?=\\b))`; export const CardinalNumberMap: ReadonlyMap = new Map([["cero", 0],["un", 1],["una", 1],["uno", 1],["dos", 2],["tres", 3],["cuatro", 4],["cinco", 5],["seis", 6],["siete", 7],["ocho", 8],["nueve", 9],["diez", 10],["once", 11],["doce", 12],["docena", 12],["docenas", 12],["trece", 13],["catorce", 14],["quince", 15],["dieciseis", 16],["dieciséis", 16],["diecisiete", 17],["dieciocho", 18],["diecinueve", 19],["veinte", 20],["ventiuna", 21],["ventiuno", 21],["veintiun", 21],["veintiún", 21],["veintiuno", 21],["veintiuna", 21],["veintidos", 22],["veintidós", 22],["veintitres", 23],["veintitrés", 23],["veinticuatro", 24],["veinticinco", 25],["veintiseis", 26],["veintiséis", 26],["veintisiete", 27],["veintiocho", 28],["veintinueve", 29],["treinta", 30],["cuarenta", 40],["cincuenta", 50],["sesenta", 60],["setenta", 70],["ochenta", 80],["noventa", 90],["cien", 100],["ciento", 100],["doscientas", 200],["doscientos", 200],["trescientas", 300],["trescientos", 300],["cuatrocientas", 400],["cuatrocientos", 400],["quinientas", 500],["quinientos", 500],["seiscientas", 600],["seiscientos", 600],["setecientas", 700],["setecientos", 700],["ochocientas", 800],["ochocientos", 800],["novecientas", 900],["novecientos", 900],["mil", 1000],["millon", 1000000],["millón", 1000000],["millones", 1000000],["billon", 1000000000000],["billón", 1000000000000],["billones", 1000000000000],["trillon", 1000000000000000000],["trillón", 1000000000000000000],["trillones", 1000000000000000000]]); diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py index 5ba631e111..3e23aa5e53 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/english_date_time.py @@ -268,7 +268,7 @@ class EnglishDateTime: UnspecificDatePeriodRegex = f'^(week|fortnight|month|year)$' PrepositionSuffixRegex = f'\\b(on|in|at|around|circa|from|to)$' FlexibleDayRegex = f'(?([A-Za-z]+\\s)?[A-Za-z\\d]+)' - ForTheRegex = f'\\b((((?<=for\\s+)the\\s+{FlexibleDayRegex})|((?<=on\\s+)(the\\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\\s*(,|\\.(?!\\d)|!|\\?|$)))' + ForTheRegex = f'\\b((((?<=\\bfor\\s+)the\\s+{FlexibleDayRegex})|((?<=\\bon\\s+)(the\\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\\s*(,|\\.(?!\\d)|!|\\?|$)))' WeekDayAndDayOfMonthRegex = f'\\b{WeekDayRegex}\\s+(the\\s+{FlexibleDayRegex})\\b' WeekDayAndDayRegex = f'\\b{WeekDayRegex}\\s+(?!(the)){DayRegex}(?!([-:]|(\\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\\b' RestOfDateRegex = f'\\b(rest|remaining)\\s+(of\\s+)?((the|my|this|current)\\s+)?(?week|fortnight|month|year|decade)\\b' diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/french_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/french_date_time.py index 283a895962..d3cb3a985d 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/french_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/french_date_time.py @@ -16,8 +16,8 @@ class FrenchDateTime: LangMarker = 'Fre' CheckBothBeforeAfter = False - TillRegex = f'(?au|et|(jusqu\')?[aà]|avant|--|-|—|——)' - RangeConnectorRegex = f'(?de la|au|[aà]|et(\\s*la)?|--|-|—|——)' + TillRegex = f'(?\\b(au|et|(jusqu\')?a|avant)\\b|(jusqu\')?à|--|-|—|——)' + RangeConnectorRegex = f'(?\\b(de\\s+la|au|(jusqu\')?a|et(\\s*la)?)\\b|(jusqu\')?à|--|-|—|——)' RelativeRegex = f'(?prochaine?|de|du|ce(tte)?|l[ae]|derni[eè]re|hier|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))' StrictRelativeRegex = f'(?prochaine?|derni[eè]re|hier|pr[eé]c[eé]dente|au\\s+cours+(de|du\\s*))' NextSuffixRegex = f'(?prochain(es?)?|suivante)\\b' @@ -53,7 +53,7 @@ class FrenchDateTime: BetweenRegex = f'\\b(entre\\s+)({DayRegex})\\s*{RangeConnectorRegex}\\s*({DayRegex})\\s+{MonthSuffixRegex}((\\s+|\\s*,\\s*){YearRegex})?\\b' YearWordRegex = f'\\b(?l\'ann[ée]e)\\b' MonthWithYear = f'\\b({WrittenMonthRegex}(\\s*),?(\\s+de)?(\\s*)({YearRegex}|{TwoDigitYearRegex}|(?cette)\\s*{YearWordRegex})|{YearWordRegex}\\s*({PastSuffixRegex}|{NextSuffixRegex}))' - OneWordPeriodRegex = f'\\b(({RelativeRegex}\\s+)?{WrittenMonthRegex}|(la\\s+)?(weekend|(fin de )?semaine|week-end|mois|ans?|l\'année)\\s+{StrictRelativeRegex}|{RelativeRegex}\\s+(weekend|(fin de )?semaine|week-end|mois|ans?|l\'année)|weekend|week-end|(mois|l\'année))\\b' + OneWordPeriodRegex = f'\\b(({RelativeRegex}\\s+)?{WrittenMonthRegex}|(la\\s+)?(weekend|(fin de )?semaine|week-end|mois|ans?|l\'année)\\s+{StrictRelativeRegex}|{RelativeRegex}\\s+(weekend|(fin de )?semaine|week-end|mois|ans?|l\'année)|weekend|week-end|mois|l\'année|an)\\b' MonthNumWithYear = f'({YearRegex}(\\s*)[/\\-\\.](\\s*){MonthNumRegex})|({MonthNumRegex}(\\s*)[/\\-](\\s*){YearRegex})' WeekOfMonthRegex = f'(?(le\\s+)?(?premier|1er|duexi[èe]me|2|troisi[èe]me|3|quatri[èe]me|4|cinqi[èe]me|5)\\s+semaine(\\s+de)?\\s+{MonthSuffixRegex})' WeekOfYearRegex = f'(?(le\\s+)?(?premier|1er|duexi[èe]me|2|troisi[èe]me|3|quatri[èe]me|4|cinqi[èe]me|5)\\s+semaine(\\s+de)?\\s+({YearRegex}|{RelativeRegex}\\s+ann[ée]e))' @@ -228,7 +228,7 @@ class FrenchDateTime: FromRegex2 = f'((depuis|de)(\\s*las?)?)$' FromToRegex = f'\\b(du|depuis|des?).+(au|à|a)\\b.+' SingleAmbiguousMonthRegex = f'^(le\\s+)?(may|march)$' - UnspecificDatePeriodRegex = f'^\\b$' + UnspecificDatePeriodRegex = f'^(semaine|mois|an(n[eé]e)?)$' PrepositionSuffixRegex = f'\\b(du|de|[àa]|vers|dans)$' FlexibleDayRegex = f'(?([A-Za-z]+\\s)?[A-Za-z\\d]+)' ForTheRegex = f'\\b(((pour le {FlexibleDayRegex})|(dans (le\\s+)?{FlexibleDayRegex}(?<=(st|nd|rd|th))))(?\\s*(,|\\.|!|\\?|$)))' diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/portuguese_date_time.py b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/portuguese_date_time.py index cb48e97ab2..dc4be56132 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/resources/portuguese_date_time.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/resources/portuguese_date_time.py @@ -127,7 +127,7 @@ class PortugueseDateTime: MidafternoonRegex = f'(?meio\\s+da\\s+tarde)' MiddayRegex = f'(?meio\\s*(-\\s*)?dia)' MidTimeRegex = f'(?({MidnightRegex}|{MidmorningRegex}|{MidEarlyMorning}|{MidafternoonRegex}|{MiddayRegex}))' - AtRegex = f'\\b(((?<=\\b([aà]s?)\\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\\s+horas?|\\s*h\\b)?|(?<=\\b(s(er)?[aã]o|v[aã]o\\s+ser|^[eé]h?)\\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\\s+horas?|\\s*h\\b))(\\s+{OclockRegex})?|{MidTimeRegex})\\b' + AtRegex = f'\\b(((?<=\\b([aà]s?)\\s+)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\\s+horas?|\\s*h\\b)?|(?<=\\b(s(er)?[aã]o|v[aã]o\\s+ser|^[eé]h?)\\s+|^\\s*)({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})(\\s+horas?|\\s*h\\b))(\\s+{OclockRegex})?|{MidTimeRegex})\\b' ConnectNumRegex = f'({BaseDateTime.HourRegex}(?[0-5][0-9])\\s*{DescRegex})' TimeRegex1 = f'(\\b{TimePrefix}\\s+)?({WrittenTimeRegex}|{HourNumRegex}|{BaseDateTime.HourRegex})\\s*({DescRegex})' TimeRegex2 = f'(\\b{TimePrefix}\\s+)?(t)?{BaseDateTime.HourRegex}(\\s*)?:(\\s*)?{BaseDateTime.MinuteRegex}((\\s*)?:(\\s*)?{BaseDateTime.SecondRegex})?((\\s*{DescRegex})|\\b)' @@ -527,6 +527,7 @@ class PortugueseDateTime: DurationDateRestrictions = [] AmbiguityFiltersDict = dict([("^\\d{4}$", "(\\d\\.\\d{4}|\\d{4}\\.\\d)"), ("^(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)$", "([$%£&!?@#])(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)|(abr|ago|dez|fev|jan|ju[ln]|mar|maio?|nov|out|sep?t)([$%£&@#])")]) + AmbiguityTimeFiltersDict = dict([("horas?$", "\\b((por|duração\\s+de|durante)\\s+(\\S+\\s+){1,2}horas?|horas?\\s+(\\S+\\s+){0,2}dur(ação|ou|a(rá|va)?))\\b")]) EarlyMorningTermList = [r'madrugada'] MorningTermList = [r'manha', r'manhã'] AfternoonTermList = [r'passado o meio dia', r'depois do meio dia'] diff --git a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/base_numbers.py b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/base_numbers.py index 6d551947a3..7d7d25186f 100644 --- a/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/base_numbers.py +++ b/Python/libraries/recognizers-number-with-unit/recognizers_number_with_unit/resources/base_numbers.py @@ -22,7 +22,8 @@ def IntegerRegexDefinition(placeholder, thousandsmark): def DoubleRegexDefinition(placeholder, thousandsmark, decimalmark): return f'(((?{RoundNumberIntegerRegex})$' + FractionMultiplierRegex = f'(?\\s+and\\s+(a|one|{TwoToNineIntegerRegex})\\s+(half|quarter|third|fourth|fifth|sixth|seventh|eighth|nine?th|tenth)s?)' + RoundMultiplierWithFraction = f'(?<=(?(?:million|mln|billion|bln|trillion|tln)s?)(?={FractionMultiplierRegex}?$)' + RoundMultiplierRegex = f'\\b\\s*((of\\s+)?a\\s+)?({RoundMultiplierWithFraction}|(?(?:hundred|thousand|lakh|crore)s?)$)' FractionNounRegex = f'(?<=\\b)({AllIntRegex}\\s+(and\\s+)?)?(({AllIntRegex})(\\s+|\\s*-\\s*)((({AllOrdinalRegex})|({RoundNumberOrdinalRegex}))s|halves|quarters)((\\s+of\\s+a)?\\s+{RoundNumberIntegerRegex})?|(half(\\s+a)?|quarter(\\s+of\\s+a)?)\\s+{RoundNumberIntegerRegex})(?=\\b)' - FractionNounWithArticleRegex = f'(?<=\\b)((({AllIntRegex}\\s+(and\\s+)?)?(an?|one)(\\s+|\\s*-\\s*)(?!\\bfirst\\b|\\bsecond\\b)(({AllOrdinalRegex})|({RoundNumberOrdinalRegex})|(half|quarter)(((\\s+of)?\\s+a)?\\s+{RoundNumberIntegerRegex})?))|(half))(?=\\b)' + FractionNounWithArticleRegex = f'(?<=\\b)(((({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\\s+(and\\s+)?)?(an?|one)(\\s+|\\s*-\\s*)(?!\\bfirst\\b|\\bsecond\\b)(({AllOrdinalRegex})|({RoundNumberOrdinalRegex})|(half|quarter)(((\\s+of)?\\s+a)?\\s+{RoundNumberIntegerRegex})?))|(half))(?=\\b)' FractionPrepositionRegex = f'(?({AllIntRegex})|((?in|out\\s+of))\\s+(?({AllIntRegex})|(\\d+)(?![\\.,]))(?=\\b)' FractionPrepositionWithinPercentModeRegex = f'(?({AllIntRegex})|((?({AllIntRegex})|(\\d+)(?![\\.,]))(?=\\b)' AllPointRegex = f'((\\s+{ZeroToNineIntegerRegex})+|(\\s+{SeparaIntRegex}))' @@ -157,7 +159,14 @@ def DoubleWithoutIntegralRegex(placeholder): ("trillion", 1000000000000), ("tln", 1000000000000), ("lakh", 100000), - ("crore", 10000000)]) + ("crore", 10000000), + ("hundreds", 100), + ("thousands", 1000), + ("millions", 1000000), + ("billions", 1000000000), + ("trillions", 1000000000000), + ("lakhs", 100000), + ("crores", 10000000)]) OrdinalNumberMap = dict([("first", 1), ("second", 2), ("secondary", 2), @@ -238,6 +247,13 @@ def DoubleWithoutIntegralRegex(placeholder): ("tln", 1000000000000), ("lakh", 100000), ("crore", 10000000), + ("hundreds", 100), + ("thousands", 1000), + ("millions", 1000000), + ("billions", 1000000000), + ("trillions", 1000000000000), + ("lakhs", 100000), + ("crores", 10000000), ("hundredth", 100), ("thousandth", 1000), ("millionth", 1000000), diff --git a/Python/libraries/recognizers-number/recognizers_number/resources/french_numeric.py b/Python/libraries/recognizers-number/recognizers_number/resources/french_numeric.py index 0c2b692a62..8e0f8fd5ab 100644 --- a/Python/libraries/recognizers-number/recognizers_number/resources/french_numeric.py +++ b/Python/libraries/recognizers-number/recognizers_number/resources/french_numeric.py @@ -19,6 +19,7 @@ class FrenchNumeric: MultiDecimalSeparatorCulture = True RoundNumberIntegerRegex = f'(cent|mille|millions?|milliards?|billions?)' ZeroToNineIntegerRegex = f'(une?|deux|trois|quatre|cinq|six|sept|huit|neuf|z[ée]ro)' + TwoToNineIntegerRegex = f'(deux|trois|quatre|cinq|six|sept|huit|neuf)' TenToNineteenIntegerRegex = f'((seize|quinze|quatorze|treize|douze|onze)|dix(\\Wneuf|\\Whuit|\\Wsept)?)' TensNumberIntegerRegex = f'(quatre\\Wvingt(s|\\Wdix)?|soixante(\\Wdix)?|vingt|trente|quarante|cinquante|septante|octante|huitante|nonante)' DigitsNumberRegex = f'\\d|\\d{{1,3}}(\\.\\d{{3}})' @@ -56,8 +57,11 @@ def NumbersWithPlaceHolder(placeholder): OrdinalFrenchRegex = f'(?<=\\b){AllOrdinalRegex}(?=\\b)' FractionNotationWithSpacesRegex = f'(((?<=\\W|^)-\\s*)|(?<=\\b))\\d+\\s+\\d+[/]\\d+(?=(\\b[^/]|$))' FractionNotationRegex = f'{BaseNumbers.FractionNotationRegex}' - FractionNounRegex = f'(?<=\\b)({AllIntRegex}\\s+((et)\\s+)?)?({AllIntRegex})(\\s+((et)\\s)?)((({AllOrdinalRegex})s?|({SuffixOrdinalRegex})s?)|demi[es]?|tiers?|quarts?)(?=\\b)' - FractionNounWithArticleRegex = f'(?<=\\b)({AllIntRegex}\\s+(et\\s+)?)?(une?)(\\s+)(({AllOrdinalRegex})|({SuffixOrdinalRegex})|(et\\s+)?demi[es]?)(?=\\b)' + FractionMultiplierRegex = f'(?\\s+et\\s+(demi[es]?|(une?|{TwoToNineIntegerRegex})\\s+(demie?|tier|quart|(cinqui|sixi|septi|hui[tr]i|neuvi|dixi)[eè]me)s?))' + RoundMultiplierWithFraction = f'(?(millions?|milliards?|billions?))(?={FractionMultiplierRegex}?$)' + RoundMultiplierRegex = f'\\b\\s*({RoundMultiplierWithFraction}|(?(cent|mille))$)' + FractionNounRegex = f'(?<=\\b)({AllIntRegex}\\s+((et)\\s+)?)?({AllIntRegex}(\\s+((et)\\s)?)(({AllOrdinalRegex}s?|{SuffixOrdinalRegex}s?)|(demi[es]?|tiers?|quarts?))|(un\\s+)?(demi|tier|quart)(\\s+(de\\s+)?|\\s*-\\s*){RoundNumberIntegerRegex})(?=\\b)' + FractionNounWithArticleRegex = f'(?<=\\b)(({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\\s+(et\\s+)?)?((une?)(\\s+)(({AllOrdinalRegex})|({SuffixOrdinalRegex})|(et\\s+)?demi[es]?)|demi[es]?)(?=\\b)' FractionPrepositionRegex = f'(?({AllIntRegex})|((?({AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)' AllPointRegex = f'((\\s+{ZeroToNineIntegerRegex})+|(\\s+{SeparaIntRegex}))' AllFloatRegex = f'({AllIntRegex}(\\s+(virgule|point)){AllPointRegex})' @@ -110,6 +114,7 @@ def DoubleWithoutIntegralRegex(placeholder): WrittenGroupSeparatorTexts = [r'point', r'points'] WrittenIntegerSeparatorTexts = [r'et', r'-'] WrittenFractionSeparatorTexts = [r'et', r'sur'] + OneHalfTokens = [r'un', r'demi'] HalfADozenRegex = f'(?<=\\b)demie?\\s+douzaine' DigitalNumberRegex = f'((?<=\\b)(cent|mille|millions?|milliards?|billions?|douzaines?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))' AmbiguousFractionConnectorsRegex = f'^[.]' diff --git a/Python/libraries/recognizers-number/recognizers_number/resources/german_numeric.py b/Python/libraries/recognizers-number/recognizers_number/resources/german_numeric.py index e9a4b8e456..996f5c996e 100644 --- a/Python/libraries/recognizers-number/recognizers_number/resources/german_numeric.py +++ b/Python/libraries/recognizers-number/recognizers_number/resources/german_numeric.py @@ -18,14 +18,15 @@ class GermanNumeric: CompoundNumberLanguage = True MultiDecimalSeparatorCulture = False ZeroToNineIntegerRegex = f'(drei|sieben|acht|vier|fuenf|fünf|null|neun|eins|(ein(?!($|\\.|,|!|\\?)))|eine[rn]?|zwei|zwo|sechs)' - RoundNumberIntegerRegex = f'((ein)?hundert|tausend|(\\s*(million(en)?|mio|milliarden?|mrd|billion(en)?)\\s*))' + TwoToNineIntegerRegex = f'(drei|sieben|acht|vier|fuenf|fünf|neun|zwei|zwo|sechs)' + RoundNumberIntegerRegex = f'((ein)?hundert|tausend|((million(en)?|mio|milliarden?|mrd|billion(en)?)))' AnIntRegex = f'(eine?)(?=\\s)' TenToNineteenIntegerRegex = f'(siebzehn|dreizehn|vierzehn|achtzehn|neunzehn|fünfzehn|fuenfzehn|sechzehn|elf|zwoelf|zwölf|zehn)' TensNumberIntegerRegex = f'(siebzig|zwanzig|dreißig|achtzig|neunzig|vierzig|fuenfzig|fünfzig|sechzig|hundert|tausend)' NegativeNumberTermsRegex = f'^[.]' NegativeNumberSignRegex = f'^({NegativeNumberTermsRegex}\\s+).*' SeparaIntRegex = f'((({TenToNineteenIntegerRegex}|({ZeroToNineIntegerRegex}und{TensNumberIntegerRegex})|{TensNumberIntegerRegex}|{ZeroToNineIntegerRegex})(\\s*{RoundNumberIntegerRegex})*))|(({AnIntRegex}(\\s*{RoundNumberIntegerRegex})+))' - AllIntRegex = f'(((({TenToNineteenIntegerRegex}|({ZeroToNineIntegerRegex}und{TensNumberIntegerRegex})|{TensNumberIntegerRegex}|({ZeroToNineIntegerRegex}|{AnIntRegex}))?(\\s*{RoundNumberIntegerRegex})))*{SeparaIntRegex})' + AllIntRegex = f'(((({TenToNineteenIntegerRegex}|({ZeroToNineIntegerRegex}und{TensNumberIntegerRegex})|{TensNumberIntegerRegex}|({ZeroToNineIntegerRegex}|{AnIntRegex}))?(\\s*{RoundNumberIntegerRegex}\\s*)))*{SeparaIntRegex})' PlaceHolderPureNumber = f'\\b' PlaceHolderDefault = f'\\D|\\b' @@ -51,8 +52,11 @@ def NumbersWithPlaceHolder(placeholder): FractionUnitsRegex = f'((?anderthalb|einundhalb)|(?dreiviertel))' FractionHalfRegex = f'(einhalb(es)?)$' OneHalfTokens = [r'ein', r'halb', r'halbes'] - FractionNounRegex = f'(?<=\\b)(({AllIntRegex})(\\s*|\\s*-\\s*)((({AllOrdinalRegex})|({RoundNumberOrdinalRegex}))|halb(e[rs]?)?|hälfte)|{FractionUnitsRegex})(?=\\b)' - FractionNounWithArticleRegex = f'(?<=\\b)(({AllIntRegex}\\s+(und\\s+)?)?eine?(\\s+|\\s*-\\s*)({AllOrdinalRegex}|{RoundNumberOrdinalRegex}|{FractionUnitsRegex}|({AllIntRegex}ein)?(halb(e[rs]?)?|hälfte))|{AllIntRegex}ein(halb))(?=\\b)' + FractionMultiplierRegex = f'(?(\\s+und\\s+)?(anderthalb|einundhalb|dreiviertel)|(\\s+und\\s+)?(eine?|{TwoToNineIntegerRegex})\\s*(halbe?|(dritt|viert|fünft|fuenft|sechst|siebt|acht|neunt|zehnt)(er|es|en|el|e)?))' + RoundMultiplierWithFraction = f'(?<=(?(million(en)?|mio|milliarden?|mrd|billion(en)?))(?={FractionMultiplierRegex}?$)' + RoundMultiplierRegex = f'\\b\\s*((von\\s+)?ein(er|es|en|el|e)?\\s+)?({RoundMultiplierWithFraction}|(?(?:hundert|tausend))$)' + FractionNounRegex = f'(?<=\\b)({AllIntRegex}\\s+(und\\s+)?)?(({AllIntRegex})(\\s*|\\s*-\\s*)((({AllOrdinalRegex})|({RoundNumberOrdinalRegex}))|halb(e[rs]?)?|hälfte)(\\s+{RoundNumberIntegerRegex})?|(eine\\s+(halbe|viertel)\\s+){RoundNumberIntegerRegex}|{FractionUnitsRegex}(\\s+{RoundNumberIntegerRegex})?)(?=\\b)' + FractionNounWithArticleRegex = f'(?<=\\b)((({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\\s+(und\\s+)?)?eine?(\\s+|\\s*-\\s*)({AllOrdinalRegex}|{RoundNumberOrdinalRegex}|{FractionUnitsRegex}|({AllIntRegex}ein)?(halb(e[rs]?)?|hälfte))|{AllIntRegex}ein(halb)(\\s+{RoundNumberIntegerRegex})?)(?=\\b)' FractionPrepositionRegex = f'(?({AllIntRegex})|((?({AllIntRegex})|(\\d+)(?!\\.))(?=\\b)' AllPointRegex = f'((\\s*{ZeroToNineIntegerRegex})+|(\\s*{SeparaIntRegex}))' AllFloatRegex = f'({AllIntRegex}(\\s*komma\\s*){AllPointRegex})' @@ -104,7 +108,7 @@ def DoubleWithoutIntegralRegex(placeholder): WrittenDecimalSeparatorTexts = [r'komma'] WrittenGroupSeparatorTexts = [r'punkt'] WrittenIntegerSeparatorTexts = [r'und'] - WrittenFractionSeparatorTexts = [r'durch'] + WrittenFractionSeparatorTexts = [r'durch', r'und'] HalfADozenRegex = f'ein\\s+halbes\\s+dutzend' DigitalNumberRegex = f'((?<=\\b)(hundert|tausend|million(en)?|mio|milliarde(n)?|mrd|billion(en)?|dutzend(e)?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))' CardinalNumberMap = dict([("ein", 1), diff --git a/Python/libraries/recognizers-number/recognizers_number/resources/portuguese_numeric.py b/Python/libraries/recognizers-number/recognizers_number/resources/portuguese_numeric.py index 3421ebd44e..a9f8cc11a8 100644 --- a/Python/libraries/recognizers-number/recognizers_number/resources/portuguese_numeric.py +++ b/Python/libraries/recognizers-number/recognizers_number/resources/portuguese_numeric.py @@ -20,6 +20,7 @@ class PortugueseNumeric: HundredsNumberIntegerRegex = f'(quatrocent[ao]s|trezent[ao]s|seiscent[ao]s|setecent[ao]s|oitocent[ao]s|novecent[ao]s|duzent[ao]s|quinhent[ao]s|cem|(?\\s+(e|com)\\s+(meio|(um|{TwoToNineIntegerRegex})\\s+(meio|terç[oa]|quart[oa]|quint[oa]|sext[oa]|s[eé]tim[oa]|oitav[oa]|non[oa]|d[eé]cim[oa])s?))' + RoundMultiplierWithFraction = f'(?(?:(mil(h([ãa]o|[õo]es))|bilh([ãa]o|[õo]es)|trilh([ãa]o|[õo]es)|qua[td]rilh([ãa]o|[õo]es)|quintilh([ãa]o|[õo]es))))(?={FractionMultiplierRegex}?$)' + RoundMultiplierRegex = f'\\b\\s*({RoundMultiplierWithFraction}|(?(mil))$)' + FractionNounRegex = f'(?<=\\b)({AllIntRegex}\\s+((e|com)\\s+)?)?(({AllIntRegex})(\\s+((e|com)\\s)?)((({AllOrdinalRegex})s?|({SpecialFractionInteger})|({SuffixRoundOrdinalRegex})s?)|mei[oa]?|ter[çc]o?)|(meio|um\\s+quarto\\s+de)\\s+{RoundNumberIntegerRegex})(?=\\b)' + FractionNounWithArticleRegex = f'(?<=\\b)(({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\\s+(e\\s+)?)?((um|um[as])(\\s+)(({AllOrdinalRegex})|({SuffixRoundOrdinalRegex})|(e\\s+)?mei[oa]?)|mei[oa]?)(?=\\b)' FractionPrepositionRegex = f'(?({AllIntRegex})|((?({AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)' AllFloatRegex = f'{AllIntRegex}(\\s+(vírgula|virgula|e|ponto)){AllPointRegex}' DoubleWithMultiplierRegex = f'(((?\\s+(y|con)\\s+(medio|(un|{TwoToNineIntegerRegex})\\s+(medio|terci[oa]?|cuart[oa]|quint[oa]|sext[oa]|s[eé]ptim[oa]|octav[oa]|noven[oa]|d[eé]cim[oa])s?))' + RoundMultiplierWithFraction = f'(?(?:(mil\\s+millones|mill[oó]n(es)?|bill[oó]n(es)?|trill[oó]n(es)?|cuatrill[oó]n(es)?|quintill[oó]n(es)?|sextill[oó]n(es)?|septill[oó]n(es)?)))(?={FractionMultiplierRegex}?$)' + RoundMultiplierRegex = f'\\b\\s*({RoundMultiplierWithFraction}|(?(mil))$)' + FractionNounRegex = f'(?<=\\b)({AllIntRegex}\\s+((y|con)\\s+)?)?(({AllIntRegex})(\\s+((y|con)\\s)?)((({AllOrdinalRegex})s?|({SpecialFractionInteger})|({SufixRoundOrdinalRegex})s?)|medi[oa]s?|tercios?)|(medio|un\\s+cuarto\\s+de)\\s+{RoundNumberIntegerRegex})(?=\\b)' + FractionNounWithArticleRegex = f'(?<=\\b)(({AllIntRegex}|{RoundNumberIntegerRegexWithLocks})\\s+(y\\s+)?)?((un|un[oa])(\\s+)(({AllOrdinalRegex})|({SufixRoundOrdinalRegex}))|(un[ao]?\\s+)?medi[oa]s?)(?=\\b)' FractionPrepositionRegex = f'(?({AllIntRegex})|((?({AllIntRegex})|((\\d+)(?!\\.)))(?=\\b)' AllPointRegex = f'((\\s+{ZeroToNineIntegerRegex})+|(\\s+{AllIntRegex}))' AllFloatRegex = f'{AllIntRegex}(\\s+(coma|con)){AllPointRegex}' @@ -113,6 +117,7 @@ def DoubleWithoutIntegralRegex(placeholder): WrittenGroupSeparatorTexts = [r'punto'] WrittenIntegerSeparatorTexts = [r'y'] WrittenFractionSeparatorTexts = [r'con'] + OneHalfTokens = [r'un', r'medio'] HalfADozenRegex = f'media\\s+docena' DigitalNumberRegex = f'((?<=\\b)(mil(l[oó]n(es)?)?|bill[oó]n(es)?|trill[oó]n(es)?|docenas?)(?=\\b))|((?<=(\\d|\\b)){BaseNumbers.MultiplierLookupRegex}(?=\\b))' CardinalNumberMap = dict([("cero", 0), diff --git a/Specs/DateTime/English/DateTimeModel.json b/Specs/DateTime/English/DateTimeModel.json index d4f25573b6..0cc613bd9b 100644 --- a/Specs/DateTime/English/DateTimeModel.json +++ b/Specs/DateTime/English/DateTimeModel.json @@ -22708,5 +22708,105 @@ } } ] + }, + { + "Input": "It's an everyday routine.", + "Context": { + "ReferenceDateTime": "2018-11-30T12:00:00" + }, + "Results": [ + { + "Text": "everyday", + "Start": 8, + "End": 15, + "TypeName": "datetimeV2.set", + "Resolution": { + "values": [ + { + "timex": "P1D", + "type": "set", + "value": "not resolved" + } + ] + } + } + ] + }, + { + "Input": "We meet in half an hour", + "Context": { + "ReferenceDateTime": "2022-01-07T18:55:00" + }, + "NotSupported": "javascript, python", + "Results": [ + { + "Text": "in half an hour", + "Start": 8, + "End": 22, + "TypeName": "datetimeV2.datetime", + "Resolution": { + "values": [ + { + "timex": "2022-01-07T19:25:00", + "type": "datetime", + "value": "2022-01-07 19:25:00" + } + ] + } + } + ] + }, + { + "Input": "What about half an hour from now?", + "Context": { + "ReferenceDateTime": "2022-01-07T18:56:00" + }, + "NotSupported": "javascript, python", + "Results": [ + { + "Text": "half an hour from now", + "Start": 11, + "End": 31, + "TypeName": "datetimeV2.datetime", + "Resolution": { + "values": [ + { + "timex": "2022-01-07T19:26:00", + "type": "datetime", + "value": "2022-01-07 19:26:00" + } + ] + } + } + ] + }, + { + "Input": "I'll go back on Mon 13th", + "Context": { + "ReferenceDateTime": "2022-05-01T00:00:00" + }, + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "mon 13th", + "Start": 16, + "End": 23, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "XXXX-WXX-1", + "type": "date", + "value": "2021-12-13" + }, + { + "timex": "XXXX-WXX-1", + "type": "date", + "value": "2022-06-13" + } + ] + } + } + ] } ] diff --git a/Specs/DateTime/English/DateTimeModelCalendarMode.json b/Specs/DateTime/English/DateTimeModelCalendarMode.json index d1453c0c4d..9656927500 100644 --- a/Specs/DateTime/English/DateTimeModelCalendarMode.json +++ b/Specs/DateTime/English/DateTimeModelCalendarMode.json @@ -1249,5 +1249,76 @@ } } ] + }, + { + "Input": "It's an everyday routine.", + "Context": { + "ReferenceDateTime": "2018-11-30T12:00:00" + }, + "Results": [ + { + "Text": "everyday", + "Start": 8, + "End": 15, + "TypeName": "datetimeV2.set", + "Resolution": { + "values": [ + { + "timex": "P1D", + "type": "set", + "value": "not resolved" + } + ] + } + } + ] + }, + { + "Input": "We meet in half an hour", + "Context": { + "ReferenceDateTime": "2022-01-07T18:55:00" + }, + "NotSupportedByDesign": "javascript, python", + "Results": [ + { + "Text": "in half an hour", + "Start": 8, + "End": 22, + "TypeName": "datetimeV2.datetime", + "Resolution": { + "values": [ + { + "timex": "2022-01-07T19:25:00", + "type": "datetime", + "value": "2022-01-07 19:25:00" + } + ] + } + } + ] + }, + { + "Input": "What about half an hour from now?", + "Context": { + "ReferenceDateTime": "2022-01-07T18:56:00" + }, + "NotSupportedByDesign": "javascript, python", + "Results": [ + { + "Text": "half an hour from now", + "Start": 11, + "End": 31, + "TypeName": "datetimeV2.datetime", + "Resolution": { + "values": [ + { + "timex": "2022-01-07T19:26:00", + "type": "datetime", + "value": "2022-01-07 19:26:00" + } + ] + } + } + ] } -] \ No newline at end of file +] diff --git a/Specs/DateTime/English/DateTimeModelComplexCalendar.json b/Specs/DateTime/English/DateTimeModelComplexCalendar.json index 5f1d8700d9..418c4b86ac 100644 --- a/Specs/DateTime/English/DateTimeModelComplexCalendar.json +++ b/Specs/DateTime/English/DateTimeModelComplexCalendar.json @@ -13499,5 +13499,76 @@ } } ] + }, + { + "Input": "It's an everyday routine.", + "Context": { + "ReferenceDateTime": "2018-11-30T12:00:00" + }, + "Results": [ + { + "Text": "everyday", + "Start": 8, + "End": 15, + "TypeName": "datetimeV2.set", + "Resolution": { + "values": [ + { + "timex": "P1D", + "type": "set", + "value": "not resolved" + } + ] + } + } + ] + }, + { + "Input": "We meet in half an hour", + "Context": { + "ReferenceDateTime": "2022-01-07T18:55:00" + }, + "NotSupportedByDesign": "javascript, python", + "Results": [ + { + "Text": "in half an hour", + "Start": 8, + "End": 22, + "TypeName": "datetimeV2.datetime", + "Resolution": { + "values": [ + { + "timex": "2022-01-07T19:25:00", + "type": "datetime", + "value": "2022-01-07 19:25:00" + } + ] + } + } + ] + }, + { + "Input": "What about half an hour from now?", + "Context": { + "ReferenceDateTime": "2022-01-07T18:56:00" + }, + "NotSupportedByDesign": "javascript, python", + "Results": [ + { + "Text": "half an hour from now", + "Start": 11, + "End": 31, + "TypeName": "datetimeV2.datetime", + "Resolution": { + "values": [ + { + "timex": "2022-01-07T19:26:00", + "type": "datetime", + "value": "2022-01-07 19:26:00" + } + ] + } + } + ] } ] diff --git a/Specs/DateTime/English/SetParser.json b/Specs/DateTime/English/SetParser.json index 01568d762d..2b3eaadc17 100644 --- a/Specs/DateTime/English/SetParser.json +++ b/Specs/DateTime/English/SetParser.json @@ -711,5 +711,28 @@ "Length": 9 } ] + }, + { + "Input": "It's an everyday routine.", + "Context": { + "ReferenceDateTime": "2018-11-30T12:00:00" + }, + "Results": [ + { + "Text": "everyday", + "Type": "set", + "Value": { + "Timex": "P1D", + "FutureResolution": { + "set": "Set: P1D" + }, + "PastResolution": { + "set": "Set: P1D" + } + }, + "Start": 8, + "Length": 8 + } + ] } ] \ No newline at end of file