Skip to content

Commit

Permalink
Merge pull request #68 from purecloudlabs/NLU-3505
Browse files Browse the repository at this point in the history
[NLU-3505] Japanese AmountOfMoney support
  • Loading branch information
samhickey25 authored Apr 19, 2023
2 parents c59a18e + 15aa559 commit a5d3a23
Show file tree
Hide file tree
Showing 17 changed files with 200 additions and 102 deletions.
14 changes: 7 additions & 7 deletions Patterns/Japanese/Japanese-NumbersWithUnit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ CurrencySuffixList: !dictionary
Chetrum: チェルタム
#Bolivian boliviano
Bolivian boliviano: ボリビアーノ
#Bosnia and Herzegovina convertible mark
#Bosnia and Herzegovina convertible mark
Bosnia and Herzegovina convertible mark: 兌換マルク
#Botswana pula
Botswana pula: ボツワナ・プラ|ボツワナプラ|プラ
Thebe: テベ
#Brazilian real
Brazilian real: ブラジル・レアル|ブラジルレアル|レアル
#Bulgarian lev
#Bulgarian lev
Bulgarian lev: ブルガリア・レフ|ブルガリアレフ|レフ
Stotinka: ストティンカ
#Cambodian riel
Expand Down Expand Up @@ -190,7 +190,7 @@ CurrencySuffixList: !dictionary
Manat: マナト
#Shilling
Somali shilling: ソマリア・シリング
Somaliland shilling: ソマリランド・シリング
Somaliland shilling: ソマリランド・シリング
Tanzanian shilling: タンザニア・シリング
Ugandan shilling: ウガンダ・シリング
#Leu
Expand Down Expand Up @@ -499,10 +499,10 @@ CurrencyNameToIsoCodeMap: !dictionary
Kiribati dollar: _KID
Guernsey pound: _GGP
Faroese króna: _FOK
Cook Islands dollar: _CKD
British Virgin Islands dollar: _BD
Ascension pound: _AP
Alderney pound: _ALP
Cook Islands dollar: _CKD
British Virgin Islands dollar: _BD
Ascension pound: _AP
Alderney pound: _ALP
Abkhazian apsar: _AA
FractionalUnitNameToCodeMap: !dictionary
types: [ string, string ]
Expand Down
2 changes: 1 addition & 1 deletion Python/libraries/datatypes-timex-expression/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def read(fname):


NAME = 'datatypes_timex_expression_genesys'
VERSION = '1.0.45'
VERSION = '1.0.46'
REQUIRES = []

setup(
Expand Down
2 changes: 1 addition & 1 deletion Python/libraries/recognizers-choice/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def read(fname):


NAME = 'recognizers-text-choice-genesys'
VERSION = '1.0.45'
VERSION = '1.0.46'
REQUIRES = ['recognizers-text-genesys', 'regex', 'grapheme']

setup(
Expand Down
2 changes: 1 addition & 1 deletion Python/libraries/recognizers-date-time/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def read(fname):


NAME = 'recognizers-text-date-time-genesys'
VERSION = '1.0.45'
VERSION = '1.0.46'
REQUIRES = ['recognizers-text-genesys', 'recognizers-text-number-genesys',
'recognizers-text-number-with-unit-genesys', 'regex', 'datedelta']

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from recognizers_text.extractor import Extractor
from recognizers_text.utilities import RegExpUtility
from recognizers_number.culture import CultureInfo
from recognizers_number.number.models import NumberMode
from recognizers_number.number.japanese.extractors import JapaneseNumberExtractor
from recognizers_number.number.japanese.extractors import JapaneseNumberExtractor, JapaneseNumberExtractorMode
from recognizers_number_with_unit.number_with_unit.constants import Constants
from recognizers_number_with_unit.number_with_unit.extractors import NumberWithUnitExtractorConfiguration
from recognizers_number_with_unit.resources.japanese_numeric_with_unit import JapaneseNumericWithUnit
Expand Down Expand Up @@ -43,6 +42,10 @@ def compound_unit_connector_regex(self) -> Pattern:
def non_unit_regex(self) -> Pattern:
return self._pm_non_unit_regex

@property
def half_unit_regex(self) -> Pattern:
return self._half_unit_regex

@property
def ambiguous_unit_number_multiplier_regex(self) -> Pattern:
return None
Expand All @@ -54,14 +57,15 @@ def __init__(self, culture_info: CultureInfo):
if culture_info is None:
culture_info = CultureInfo(Culture.Japanese)
super().__init__(culture_info)
self._unit_num_extractor = JapaneseNumberExtractor(NumberMode.Unit)
self._unit_num_extractor = JapaneseNumberExtractor(JapaneseNumberExtractorMode.EXTRACT_ALL)
self._build_prefix = JapaneseNumericWithUnit.BuildPrefix
self._build_suffix = JapaneseNumericWithUnit.BuildSuffix
self._connector_token = JapaneseNumericWithUnit.ConnectorToken
self._compound_unit_connector_regex = RegExpUtility.get_safe_reg_exp(
JapaneseNumericWithUnit.CompoundUnitConnectorRegex)
self._pm_non_unit_regex = RegExpUtility.get_safe_reg_exp(
BaseUnits.PmNonUnitRegex)
self._half_unit_regex = RegExpUtility.get_safe_reg_exp(JapaneseNumericWithUnit.HalfUnitRegex)


# pylint: enable=abstract-method
Expand All @@ -87,8 +91,5 @@ def __init__(self, culture_info: CultureInfo = None):
super().__init__(culture_info)
self._suffix_list = JapaneseNumericWithUnit.CurrencySuffixList
self._prefix_list = JapaneseNumericWithUnit.CurrencyPrefixList
# NOTE: JapaneseNumericWithUnit has no attribute AmbiguousCurrencyUnitList
# Changing it to empty list
# self._ambiguous_unit_list = JapaneseNumericWithUnit.AmbiguousCurrencyUnitList
self._ambiguous_unit_list = []
self._ambiguous_unit_list = JapaneseNumericWithUnit.CurrencyAmbiguousValues

Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from recognizers_text.extractor import Extractor
from recognizers_text.parser import Parser
from recognizers_number.culture import CultureInfo
from recognizers_number.number.japanese.extractors import JapaneseNumberExtractor, NumberMode
from recognizers_number.number.japanese.extractors import JapaneseNumberExtractor, JapaneseNumberExtractorMode
from recognizers_number.number.parser_factory import AgnosticNumberParserFactory, ParserType
from recognizers_number.number.japanese.parsers import JapaneseNumberParserConfiguration
from recognizers_number_with_unit.number_with_unit.parsers import NumberWithUnitParserConfiguration
Expand All @@ -30,7 +30,7 @@ def __init__(self, culture_info: CultureInfo):
culture_info = CultureInfo(Culture.Japanese)
super().__init__(culture_info)
self._internal_number_extractor = JapaneseNumberExtractor(
NumberMode.DEFAULT)
JapaneseNumberExtractorMode.EXTRACT_ALL)
self._internal_number_parser = AgnosticNumberParserFactory.get_parser(
ParserType.NUMBER, JapaneseNumberParserConfiguration(culture_info))
self._connector_token = JapaneseNumericWithUnit.ConnectorToken
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def parse(self, query: str) -> List[ModelResult]:
parse_results.append(j)
else:
parse_results.append(r)

model_result = None
for parse_result in parse_results:
model_result = ModelResult()
model_result.start = parse_result.start
Expand All @@ -57,6 +57,8 @@ def parse(self, query: str) -> List[ModelResult]:

if b_add:
extraction_results.append(model_result)
if model_result:
break
except Exception:
pass

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,19 @@ def initialize_configuration(self):
]))
# endregion

# region Japanese
self.register_model('CurrencyModel', Culture.Japanese, lambda options: CurrencyModel([
ExtractorParserModel(
BaseMergedUnitExtractor(
JapaneseCurrencyExtractorConfiguration()),
BaseMergedUnitParser(JapaneseCurrencyParserConfiguration())),
ExtractorParserModel(
NumberWithUnitExtractor(
EnglishCurrencyExtractorConfiguration()),
NumberWithUnitParser(EnglishCurrencyParserConfiguration()))
]))
# endregion

def get_age_model(self, culture: str = None, fallback_to_default_culture: bool = True) -> Model:
return self.get_model('AgeModel', culture, fallback_to_default_culture)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ class JapaneseNumericWithUnit:
("Djiboutian franc", "ジブチ・フラン"),
("CFP franc", "CFPフラン"),
("Guinean franc", "ギニア・フラン"),
("Swiss franc", "スイス・フラン"),
("Swiss franc", "スイス・フラン|スイスフラン"),
("Rwandan franc", "ルワンダ・フラン"),
("Belgian franc", "ベルギー・フラン"),
("Rappen", "Rappen"),
Expand Down Expand Up @@ -203,7 +203,7 @@ class JapaneseNumericWithUnit:
("Pound", "ポンド"),
("Pence", "ペンス"),
("Shilling", "シリング"),
("United States dollar", "ドル|USドル"),
("United States dollar", "米ドル|USドル|ドル"),
("East Caribbean dollar", "東カリブ・ドル"),
("Australian dollar", "オーストラリア・ドル|オーストラリアドル"),
("Bahamian dollar", "バハマ・ドル"),
Expand All @@ -220,15 +220,16 @@ class JapaneseNumericWithUnit:
("Guyanese dollar", "ガイアナ・ドル|ガイアナ・ドル"),
("Hong Kong dollar", "香港ドル"),
("Macau Pataca", "マカオ・パタカ|マカオ・パタカ"),
("New Taiwan dollar", "ニュー台湾ドル|ニュー台湾ドル"),
("New Taiwan dollar", "ニュー台湾ドル|ニュー台湾ドル|台湾ドル"),
("Jamaican dollar", "ジャマイカ・ドル|ジャマイカドル"),
("Kiribati dollar", "キリバス・ドル"),
("Liberian dollar", "リベリア・ドル|リベリアドル"),
("Namibian dollar", "ナミビア・ドル|ナミビアドル"),
("Surinamese dollar", "スリナム・ドル|スリナムドル"),
("Trinidad and Tobago dollar", "トリニダード・トバゴ・ドル|トリニダードトバゴ・ドル"),
("Tuvaluan dollar", "ツバル・ドル|ツバルドル"),
("Chinese yuan", "人民元"),
("Dollar", "どる|$"),
("Chinese yuan", "人民元|元"),
("Fen", "分"),
("Jiao", "角"),
("Finnish markka", "フィンランド・マルカ"),
Expand Down Expand Up @@ -508,13 +509,80 @@ class JapaneseNumericWithUnit:
("Solomon Islands dollar", "si$|si $"),
("New Taiwan dollar", "nt$|nt $"),
("Samoan tālā", "ws$"),
("Chinese yuan", "¥"),
("Chinese yuan", "¥|人民元"),
("Japanese yen", "¥|\\"),
("Turkish lira", "₺"),
("Euro", "€"),
("Pound", "£"),
("Costa Rican colón", "₡")])
CurrencyAmbiguousValues = [r'円', r'銭', r'分', r'レク', r'プル', r'ブル', r'\\']
CurrencyAmbiguousValues = [r'円', r'銭', r'分', r'レク', r'プル', r'ブル', r'\\', r'元']
DimensionSuffixList = dict([("Meter", "米|公尺|m|メートル"),
("Kilometer", "千米|公里|km|キロメートル"),
("Decimeter", "分米|公寸|dm|デシメートル"),
("Millimeter", "ミリ"),
("Centimeter", "釐米|厘米|公分|cm|センチ"),
("Micrometer", "毫米|公釐|mm"),
("Microns", "微米"),
("Picometer", "皮米|ピクトメーター|pm"),
("Nanometer", "纳米"),
("Mile", "英里|マイル"),
("Inch", "英寸|インチ"),
("Foot", "呎|英尺|フィート"),
("Yard", "码"),
("Knot", "海里"),
("Light year", "光年"),
("Meter per second", "米每秒|米/秒|m/s|秒速メートル|毎秒メートル"),
("Kilometer per hour", "公里每小时|千米每小时|公里/小时|千米/小时|km/h|時速キロメートル"),
("Kilometer per minute", "公里每分钟|千米每分钟|公里/分钟|千米/分钟|km/min|分速キロメートル"),
("Kilometer per second", "公里每秒|千米每秒|公里/秒|千米/秒|km/s|秒速キロメートル|毎秒キロメートル"),
("Mile per hour", "英里每小时|英里/小时|時速マイル"),
("Foot per second", "英尺每小时|英尺/小时"),
("Foot per minute", "英尺每分钟|英尺/分钟"),
("Yard per minute", "码每分|码/分"),
("Yard per second", "码每秒|码/秒"),
("Square centimetre", "平方厘米"),
("Square decimeter", "平方分米"),
("Square meter", "平方米|平方メートル"),
("Square kilometer", "平方公里|平方キロメートル"),
("Acre", "英亩|公亩|エーカー"),
("Hectare", "公顷"),
("Mu", "亩|市亩|ムー"),
("Liter", "公升|升|l"),
("Milliliter", "毫升|ml|ミリリットル"),
("Cubic meter", "立方米"),
("Cubic decimeter", "立方分米"),
("Cubic millimeter", "立方毫米"),
("Cubic foot", "立方英尺|立方フィート"),
("Gallon", "加仑|ガロン"),
("Pint", "品脱"),
("Dou", "市斗|斗"),
("Dan", "市石|石"),
("Kilogram", "千克|公斤|kg|キログラム"),
("Gram", "克|g"),
("Milligram", "毫克|mg"),
("Microgram", "微克|μg"),
("Ton", "公吨|吨|t|トン"),
("Metric ton", "メートルトン"),
("Pound", "磅|ポンド"),
("Ounce", "盎司|オンス"),
("Jin", "市斤|斤"),
("Liang", "两"),
("Barrel", "桶"),
("Pot", "罐"),
("Bit", "比特|位|b|bit|ビット"),
("Kilobit", "千比特|千位|kb|Kb"),
("Megabit", "兆比特|兆位|mb|Mb|メガバイト"),
("Gigabit", "十亿比特|千兆比特|十亿位|千兆位|gb|Gb"),
("Terabit", "万亿比特|兆兆比特|万亿位|兆兆位|tb|Tb"),
("Petabit", "千兆兆比特|千万亿比特|千兆兆位|千万亿位|pb|Pb"),
("Byte", "字节|byte|Byte"),
("Kilobyte", "千字节|kB|KB"),
("Megabyte", "兆字节|mB|MB"),
("Gigabyte", "十亿字节|千兆字节|gB|GB"),
("Terabyte", "万亿字节|兆兆字节|tB|TB"),
("Petabyte", "千兆兆字节|千万亿字节|pB|PB")])
DimensionPrefixList = dict([("split_unit", "時速|分速|秒速|毎秒")])
DimensionAmbiguousValues = [r'丈', r'位', r'克', r'分', r'升', r'寸', r'尺', r'斗', r'斤', r'桶', r'毫', r'石', r'码', r'磅', r'米', r'罐', r'里', r'm', r'km', r'dm', r'cm', r'mm', r'l', r'ml', r'kg', r'mg', r'g', r't', r'b', r'byte', r'kb', r'mb', r'gb', r'tb', r'pb', r'時速', r'トン']
AmbiguityFiltersDict = dict([("五角", "五角大楼"),
("普尔", "标准普尔")])
TemperatureSuffixList = dict([("F", "華氏|華氏温度|華氏温度の|°f"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,18 @@
"class DutchNumericWithUnit:"
],
"footer": [ "# pylint: enable=line-too-long" ]
},
{
"input": [ "Japanese", "Japanese-NumbersWithUnit" ],
"output": "japanese_numeric_with_unit",
"header": [
"from .base_numbers import BaseNumbers",
"# pylint: disable=line-too-long",
"",
"",
"class JapaneseNumericWithUnit:"
],
"footer": [ "# pylint: enable=line-too-long" ]
}
]
}
2 changes: 1 addition & 1 deletion Python/libraries/recognizers-number-with-unit/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def read(fname):


NAME = "recognizers-text-number-with-unit-genesys"
VERSION = "1.0.45"
VERSION = "1.0.46"
REQUIRES = ['recognizers-text-genesys', 'recognizers-text-number-genesys', 'regex']

setup(
Expand Down
2 changes: 1 addition & 1 deletion Python/libraries/recognizers-number/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def read(fname):


NAME = "recognizers-text-number-genesys"
VERSION = "1.0.45"
VERSION = "1.0.46"
REQUIRES = ['recognizers-text-genesys', 'regex']

setup(
Expand Down
2 changes: 1 addition & 1 deletion Python/libraries/recognizers-sequence/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def read(fname):


NAME = "recognizers-text-sequence-genesys"
VERSION = "1.0.45"
VERSION = "1.0.46"
REQUIRES = ['recognizers-text-genesys', 'recognizers-text-number-genesys', 'regex']

setup(
Expand Down
14 changes: 7 additions & 7 deletions Python/libraries/recognizers-suite/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ def read(fname):


NAME = 'recognizers-text-suite-genesys'
VERSION = '1.0.45'
VERSION = '1.0.46'
REQUIRES = [
'recognizers-text-genesys==1.0.45',
'recognizers-text-number-genesys==1.0.45',
'recognizers-text-number-with-unit-genesys==1.0.45',
'recognizers-text-date-time-genesys==1.0.45',
'recognizers-text-sequence-genesys==1.0.45',
'recognizers-text-choice-genesys==1.0.45'
'recognizers-text-genesys==1.0.46',
'recognizers-text-number-genesys==1.0.46',
'recognizers-text-number-with-unit-genesys==1.0.46',
'recognizers-text-date-time-genesys==1.0.46',
'recognizers-text-sequence-genesys==1.0.46',
'recognizers-text-choice-genesys==1.0.46'
]

setup(
Expand Down
2 changes: 1 addition & 1 deletion Python/libraries/recognizers-text/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from setuptools import setup, find_packages

NAME = "recognizers-text-genesys"
VERSION = "1.0.45"
VERSION = "1.0.46"
REQUIRES = ['emoji==1.1.0', 'multipledispatch']

setup(
Expand Down
2 changes: 2 additions & 0 deletions Python/libraries/resource-generator/lib/code_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ def __init__(self, name, value_type, entries):

for value in entries:
value = value.replace('\'', '\\\'')
if value == '\\':
value = value.replace('\\', '\\\\')
self.entries.append(f'r{value_quote}{value}{value_quote}')

def write(self):
Expand Down
Loading

0 comments on commit a5d3a23

Please sign in to comment.