Skip to content

Commit

Permalink
Merge pull request #53 from microsoft/master
Browse files Browse the repository at this point in the history
Sync Fork
  • Loading branch information
samhickey25 authored Dec 19, 2022
2 parents 86f14b8 + c2d9970 commit 4451a53
Show file tree
Hide file tree
Showing 36 changed files with 2,230 additions and 374 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,16 @@ private DateTimeResolutionResult ParseMergedDuration(string text, DateObject ref
var durationExtractor = this.config.DurationExtractor;

// DurationExtractor without parameter will not extract merged duration
var ers = durationExtractor.Extract(text, referenceTime);

// TrimStart() was added to address a bug with french duration expression "depuis ans"
// for which the basecase of the recursive call (i.e., if(ers.Count <= 1))
// would never be reached in which case the stack would overflow.
// The statement if(minStart){...} is meant to find the isolated unit as explained in
// the below comment. However, if there is whitespace before the extacted entity
// (as in " ans") the minStart will be greater than 1 and the Followed Unit regex
// keeps on matching with "ans" and it adds it to ers and it always has
// more than one item in it, hence the recursion never ends.
var ers = durationExtractor.Extract(text.TrimStart(), referenceTime);

// If the duration extractions do not start at 0, check if the input starts with an isolated unit.
// This happens for example with patterns like "next week and 3 days" where "next" is not part of the extraction.
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions Patterns/Dutch/Dutch-Numbers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ NumbersWithDozenSuffix: !simpleRegex
AllIntRegexWithLocks: !nestedRegex
def: ((?<=\b){AllIntRegex}(?=\b))
references: [ AllIntRegex ]
GrossRegex: !simpleRegex
def: (een\s+)?gros
AllIntRegexWithDozenSuffixLocks: !nestedRegex
def: (?<=\b)(((een\s+)?half\s+dozijn)|({AllIntRegex}\s+dozijn(en)?)|{GrossRegex})(?=\b)
references: [ AllIntRegex, GrossRegex ]
Expand Down Expand Up @@ -128,7 +130,7 @@ DoubleExponentialNotationRegex: !simpleRegex
DoubleCaretExponentialNotationRegex: !simpleRegex
def: (((?<!\d+\s*)-\s*)|((?<=\b)(?<!\d+,)))(\d+(,\d+)?)\^([+-]*[1-9]\d*)(?=\b)
DoubleDecimalPointRegex: !paramsRegex
def: (((?<!\d+\s*)-\s*)|((?<=\b)(?<!\d+,)))\d+,\d+(?!(,\d+))(?={placeholder})
def: (?<=\b)((\d{1,3})(\.\d{3})*(\,\d+)?)(?={placeholder})
params: [ placeholder ]
DoubleWithoutIntegralRegex: !paramsRegex
def: (?<=\s|^)(?<!(\d+)),\d+(?!(,\d+))(?={placeholder})
Expand All @@ -139,6 +141,8 @@ DoubleWithRoundNumber: !nestedRegex
DoubleAllFloatRegex: !nestedRegex
def: ((?<=\b){AllFloatRegex}(?=\b))
references: [ AllFloatRegex ]
ConnectorRegex: !simpleRegex
def: (?<spacer>en)
#Percentage Regex
NumberWithSuffixPercentage: !nestedRegex
def: (?<!%)({BaseNumbers.NumberReplaceToken})(\s*)(%(?!{BaseNumbers.NumberReplaceToken})|(procent|percentage|percent)\b)
Expand Down Expand Up @@ -238,8 +242,6 @@ WrittenIntegerSeparatorTexts: [en, ën]
WrittenFractionSeparatorTexts: [uit, van de, op de, en]
HalfADozenRegex: !simpleRegex
def: (een\s+)?half\s+dozijn
GrossRegex: !simpleRegex
def: (een\s+)?gros
DigitalNumberRegex: !nestedRegex
def: ((?<=\b)(honderd|duizend|miljoen|miljard|biljoen|dozijn?)(?=\b))|((?<=(\d|\b)){BaseNumbers.MultiplierLookupRegex}(?=\b))
references: [ BaseNumbers.MultiplierLookupRegex ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
from .extractors import *
from .parsers import *
from .chinese import *
from .dutch import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from .extractors import *
from .parsers import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from typing import Dict, List, Pattern

from recognizers_text.culture import Culture
from recognizers_text.extractor import Extractor
from recognizers_text.utilities import RegExpUtility, DefinitionLoader
from recognizers_number.culture import CultureInfo
from recognizers_number.number.models import NumberMode
from recognizers_number.number.dutch.extractors import DutchNumberExtractor
from recognizers_number_with_unit.number_with_unit.constants import Constants
from recognizers_number_with_unit.number_with_unit.extractors import NumberWithUnitExtractorConfiguration
from recognizers_number_with_unit.resources.dutch_numeric_with_unit import DutchNumericWithUnit
from recognizers_number_with_unit.resources.base_units import BaseUnits


# pylint: disable=abstract-method
class DutchNumberWithUnitExtractorConfiguration(NumberWithUnitExtractorConfiguration):
@property
def ambiguity_filters_dict(self) -> Dict[Pattern, Pattern]:
return DefinitionLoader.load_ambiguity_filters(DutchNumericWithUnit.AmbiguityFiltersDict)

@property
def dimension_ambiguity_filters_dict(self) -> Dict[Pattern, Pattern]:
return DefinitionLoader.load_ambiguity_filters(DutchNumericWithUnit.DimensionAmbiguityFiltersDict)

@property
def unit_num_extractor(self) -> Extractor:
return self._unit_num_extractor

@property
def build_prefix(self) -> str:
return self._build_prefix

@property
def build_suffix(self) -> str:
return self._build_suffix

@property
def connector_token(self) -> str:
return ''

@property
def compound_unit_connector_regex(self) -> Pattern:
return self._compound_unit_connector_regex

@property
def non_unit_regex(self) -> Pattern:
return self._pm_non_unit_regex

@property
def ambiguous_unit_number_multiplier_regex(self) -> Pattern:
return None

def expand_half_suffix(self, source, result, numbers):
pass

def __init__(self, culture_info: CultureInfo):
if culture_info is None:
culture_info = CultureInfo(Culture.Dutch)
super().__init__(culture_info)
self._unit_num_extractor = DutchNumberExtractor(NumberMode.Unit)
self._build_prefix = DutchNumericWithUnit.BuildPrefix
self._build_suffix = DutchNumericWithUnit.BuildSuffix
self._compound_unit_connector_regex = RegExpUtility.get_safe_reg_exp(
DutchNumericWithUnit.CompoundUnitConnectorRegex)
self._pm_non_unit_regex = RegExpUtility.get_safe_reg_exp(
BaseUnits.PmNonUnitRegex)


# pylint: enable=abstract-method

class DutchAgeExtractorConfiguration(DutchNumberWithUnitExtractorConfiguration):
@property
def extract_type(self) -> str:
return Constants.SYS_UNIT_AGE

@property
def suffix_list(self) -> Dict[str, str]:
return self._suffix_list

@property
def prefix_list(self) -> Dict[str, str]:
return self._prefix_list

@property
def ambiguous_unit_list(self) -> List[str]:
return self._ambiguous_unit_list

def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self._suffix_list = DutchNumericWithUnit.AgeSuffixList
self._prefix_list = dict()
self._ambiguous_unit_list = list()


class DutchCurrencyExtractorConfiguration(DutchNumberWithUnitExtractorConfiguration):

@property
def extract_type(self) -> str:
return Constants.SYS_UNIT_CURRENCY

@property
def suffix_list(self) -> Dict[str, str]:
return self._suffix_list

@property
def prefix_list(self) -> Dict[str, str]:
return self._prefix_list

@property
def ambiguous_unit_list(self) -> List[str]:
return self._ambiguous_unit_list

def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self._suffix_list = DutchNumericWithUnit.CurrencySuffixList
self._prefix_list = DutchNumericWithUnit.CurrencyPrefixList
self._ambiguous_unit_list = DutchNumericWithUnit.AmbiguousCurrencyUnitList


class DutchDimensionExtractorConfiguration(DutchNumberWithUnitExtractorConfiguration):

@property
def ambiguity_filters_dict(self) -> Dict[Pattern, Pattern]:
return DutchNumericWithUnit.AmbiguityFiltersDict

@property
def extract_type(self) -> str:
return Constants.SYS_UNIT_DIMENSION

@property
def suffix_list(self) -> Dict[str, str]:
return self._suffix_list

@property
def prefix_list(self) -> Dict[str, str]:
return self._prefix_list

@property
def ambiguous_unit_list(self) -> List[str]:
return self._ambiguous_unit_list

def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self._suffix_list = {
**DutchNumericWithUnit.InformationSuffixList,
**DutchNumericWithUnit.AreaSuffixList,
**DutchNumericWithUnit.LengthSuffixList,
**DutchNumericWithUnit.AngleSuffixList,
**DutchNumericWithUnit.SpeedSuffixList,
**DutchNumericWithUnit.VolumeSuffixList,
**DutchNumericWithUnit.WeightSuffixList
}
self._prefix_list = dict()
self._ambiguous_unit_list = DutchNumericWithUnit.AmbiguousDimensionUnitList +\
DutchNumericWithUnit.AmbiguousAngleUnitList +\
DutchNumericWithUnit.AmbiguousLengthUnitList +\
DutchNumericWithUnit.AmbiguousVolumeUnitList +\
DutchNumericWithUnit.AmbiguousWeightUnitList


class DutchTemperatureExtractorConfiguration(DutchNumberWithUnitExtractorConfiguration):

@property
def ambiguity_filters_dict(self) -> Dict[Pattern, Pattern]:
return DutchNumericWithUnit.AmbiguityFiltersDict

@property
def extract_type(self) -> str:
return Constants.SYS_UNIT_TEMPERATURE

@property
def suffix_list(self) -> Dict[str, str]:
return self._suffix_list

@property
def prefix_list(self) -> Dict[str, str]:
return self._prefix_list

@property
def ambiguous_unit_list(self) -> List[str]:
return self._ambiguous_unit_list

@property
def ambiguous_unit_number_multiplier_regex(self) -> Pattern:
return self._ambiguous_unit_number_multiplier_regex

def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self._suffix_list = DutchNumericWithUnit.TemperatureSuffixList
self._prefix_list = dict()
self._ambiguous_unit_list = DutchNumericWithUnit.AmbiguousTemperatureUnitList
self._ambiguous_unit_number_multiplier_regex = RegExpUtility.get_safe_reg_exp(
BaseUnits.AmbiguousUnitNumberMultiplierRegex)
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from recognizers_text import Culture
from recognizers_text.extractor import Extractor
from recognizers_text.parser import Parser
from recognizers_number.culture import CultureInfo
from recognizers_number.number.dutch.extractors import DutchNumberExtractor, NumberMode
from recognizers_number.number.parser_factory import AgnosticNumberParserFactory, ParserType
from recognizers_number.number.dutch.parsers import DutchNumberParserConfiguration
from recognizers_number_with_unit.number_with_unit.parsers import NumberWithUnitParserConfiguration
from recognizers_number_with_unit.resources.dutch_numeric_with_unit import DutchNumericWithUnit


class DutchNumberWithUnitParserConfiguration(NumberWithUnitParserConfiguration):
@property
def internal_number_parser(self) -> Parser:
return self._internal_number_parser

@property
def internal_number_extractor(self) -> Extractor:
return self._internal_number_extractor

@property
def connector_token(self) -> str:
return ''

def __init__(self, culture_info: CultureInfo):
if culture_info is None:
culture_info = CultureInfo(Culture.Dutch)
super().__init__(culture_info)
self._internal_number_extractor = DutchNumberExtractor(
NumberMode.DEFAULT)
self._internal_number_parser = AgnosticNumberParserFactory.get_parser(
ParserType.NUMBER, DutchNumberParserConfiguration(culture_info))


class DutchAgeParserConfiguration(DutchNumberWithUnitParserConfiguration):
def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self.add_dict_to_unit_map(DutchNumericWithUnit.AgeSuffixList)


class DutchCurrencyParserConfiguration(DutchNumberWithUnitParserConfiguration):
def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self.add_dict_to_unit_map(DutchNumericWithUnit.CurrencySuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.CurrencyPrefixList)
self.currency_name_to_iso_code_map = DutchNumericWithUnit.CurrencyNameToIsoCodeMap
self.currency_fraction_code_list = DutchNumericWithUnit.FractionalUnitNameToCodeMap


class DutchDimensionParserConfiguration(DutchNumberWithUnitParserConfiguration):
def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self.add_dict_to_unit_map(DutchNumericWithUnit.InformationSuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.AreaSuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.LengthSuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.SpeedSuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.AngleSuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.VolumeSuffixList)
self.add_dict_to_unit_map(DutchNumericWithUnit.WeightSuffixList)


class DutchTemperatureParserConfiguration(DutchNumberWithUnitParserConfiguration):
def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self.add_dict_to_unit_map(DutchNumericWithUnit.TemperatureSuffixList)
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@
ChineseTemperatureParserConfiguration,
ChineseDimensionParserConfiguration,
ChineseAgeParserConfiguration)
from .dutch.extractors import (DutchCurrencyExtractorConfiguration,
DutchTemperatureExtractorConfiguration,
DutchDimensionExtractorConfiguration,
DutchAgeExtractorConfiguration)
from .dutch.parsers import (DutchCurrencyParserConfiguration,
DutchTemperatureParserConfiguration,
DutchDimensionParserConfiguration,
DutchAgeParserConfiguration)
from .spanish.extractors import (SpanishCurrencyExtractorConfiguration,
SpanishTemperatureExtractorConfiguration,
SpanishDimensionExtractorConfiguration,
Expand Down Expand Up @@ -128,6 +136,30 @@ def initialize_configuration(self):
]))
# endregion

# region Dutch
self.register_model('CurrencyModel', Culture.Dutch, lambda options: CurrencyModel(
[ExtractorParserModel(BaseMergedUnitExtractor(DutchCurrencyExtractorConfiguration(
)), BaseMergedUnitParser(DutchCurrencyParserConfiguration()))]
))
self.register_model('TemperatureModel', Culture.Dutch, lambda options: TemperatureModel([
ExtractorParserModel(
NumberWithUnitExtractor(
DutchTemperatureExtractorConfiguration()),
NumberWithUnitParser(DutchTemperatureParserConfiguration()))
]))
self.register_model('DimensionModel', Culture.Dutch, lambda options: DimensionModel([
ExtractorParserModel(
NumberWithUnitExtractor(
DutchDimensionExtractorConfiguration()),
NumberWithUnitParser(DutchDimensionParserConfiguration()))
]))
self.register_model('AgeModel', Culture.Dutch, lambda options: AgeModel([
ExtractorParserModel(
NumberWithUnitExtractor(DutchAgeExtractorConfiguration()),
NumberWithUnitParser(DutchAgeParserConfiguration()))
]))
# endregion

# region French
self.register_model('CurrencyModel', Culture.French, lambda options: CurrencyModel(
[ExtractorParserModel(BaseMergedUnitExtractor(FrenchCurrencyExtractorConfiguration(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@
from .french_numeric_with_unit import FrenchNumericWithUnit
from .italian_numeric_with_unit import ItalianNumericWithUnit
from .german_numeric_with_unit import GermanNumericWithUnit
from .dutch_numeric_with_unit import DutchNumericWithUnit
from .portuguese_numeric_with_unit import PortugueseNumericWithUnit
from .spanish_numeric_with_unit import SpanishNumericWithUnit
Loading

0 comments on commit 4451a53

Please sign in to comment.