From 9e686e7cec42cb729f2590b8e45c1a49d6e24cc1 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Mon, 18 Nov 2024 11:41:36 -0600 Subject: [PATCH] Ensure all languages load --- CHANGELOG.md | 5 + README.md | 2 + tests/test_decimal_format.py | 9 + tests/test_load_all.py | 9 + unicode_rbnf/VERSION | 2 +- unicode_rbnf/decimal_format.py | 52 +++ unicode_rbnf/engine.py | 19 +- unicode_rbnf/rbnf/en_001.xml | 14 - unicode_rbnf/rbnf/es_419.xml | 50 --- unicode_rbnf/rbnf/nb.xml | 13 - unicode_rbnf/rbnf/root.xml | 724 --------------------------------- 11 files changed, 95 insertions(+), 804 deletions(-) create mode 100644 tests/test_decimal_format.py create mode 100644 tests/test_load_all.py create mode 100644 unicode_rbnf/decimal_format.py delete mode 100644 unicode_rbnf/rbnf/en_001.xml delete mode 100644 unicode_rbnf/rbnf/es_419.xml delete mode 100644 unicode_rbnf/rbnf/nb.xml delete mode 100644 unicode_rbnf/rbnf/root.xml diff --git a/CHANGELOG.md b/CHANGELOG.md index 06367c6..6f78866 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 2.1.0 + +- Ensure all supported languages can load +- Start on decimal pattern format implementation (not complete) + ## 2.0.0 - Change `format_number` to return `FormatResult` instead of a `str` diff --git a/README.md b/README.md index 7f6e65c..43d6fdb 100644 --- a/README.md +++ b/README.md @@ -76,3 +76,5 @@ Some features that will need to be added eventually: * Proper fraction rules (`0.x`) * Preceding reminder substitution (`>>>` or `→→→`) * Number format strings (`==`) +* Decimal format patterns (`#,##0.00`) +* Plural replacements (`$(ordinal,one{st}...)`) diff --git a/tests/test_decimal_format.py b/tests/test_decimal_format.py new file mode 100644 index 0000000..cdaf6c0 --- /dev/null +++ b/tests/test_decimal_format.py @@ -0,0 +1,9 @@ +from unicode_rbnf.decimal_format import format_decimal + + +def test_format_decimal() -> None: + assert format_decimal(12345.6789, "#,##0.00") == "12,345.68" + assert format_decimal(5, "0000.00") == "0005.00" + assert format_decimal(12345.6, "#,##0.0#") == "12,345.6" + assert format_decimal(0.1, "#,##0.00") == "0.10" + assert format_decimal(12345, "#,##0") == "12,345" diff --git a/tests/test_load_all.py b/tests/test_load_all.py new file mode 100644 index 0000000..1282a53 --- /dev/null +++ b/tests/test_load_all.py @@ -0,0 +1,9 @@ +from unicode_rbnf import RbnfEngine + +import pytest + + +@pytest.mark.parametrize("language", RbnfEngine.get_supported_languages()) +def test_load_language(language: str): + engine = RbnfEngine.for_language(language) + assert engine.format_number(0).text diff --git a/unicode_rbnf/VERSION b/unicode_rbnf/VERSION index 227cea2..7ec1d6d 100644 --- a/unicode_rbnf/VERSION +++ b/unicode_rbnf/VERSION @@ -1 +1 @@ -2.0.0 +2.1.0 diff --git a/unicode_rbnf/decimal_format.py b/unicode_rbnf/decimal_format.py new file mode 100644 index 0000000..99f97d9 --- /dev/null +++ b/unicode_rbnf/decimal_format.py @@ -0,0 +1,52 @@ +"""Handle decimal formatting. + +See: https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1DecimalFormat.html +""" +from decimal import Decimal +from typing import Union + + +def format_decimal(value: Union[int, float, str, Decimal], pattern: str) -> str: + """Format a number according to a simplified ICU DecimalFormat pattern.""" + # Split the pattern into integer and fractional parts + if "." in pattern: + integer_part, fractional_part = pattern.split(".") + else: + integer_part, fractional_part = pattern, "" + + # Determine grouping (e.g., thousands separator) + grouping = "," in integer_part + min_integer_digits = integer_part.replace(",", "").count("0") + + # Determine the number of decimal places + min_fraction_digits = fractional_part.count("0") + max_fraction_digits = len(fractional_part) + + # Round the number to the maximum fractional digits + format_str = f"{{:.{max_fraction_digits}f}}" + rounded_value = format_str.format(value) + + # Split the rounded value into integer and fractional parts + if fractional_part: + integer_value, fractional_value = rounded_value.split(".") + fractional_value = fractional_value[:max_fraction_digits].rstrip("0") + else: + integer_value, fractional_value = rounded_value, "" + + # Apply integer padding + if len(integer_value) < min_integer_digits: + integer_value = integer_value.zfill(min_integer_digits) + + # Apply grouping + if grouping: + # pylint: disable=consider-using-f-string + integer_value = "{:,}".format(int(integer_value)) + + # Combine integer and fractional parts + if min_fraction_digits > 0: + fractional_value = fractional_value.ljust(min_fraction_digits, "0") + formatted_number = f"{integer_value}.{fractional_value}" + else: + formatted_number = integer_value + + return formatted_number diff --git a/unicode_rbnf/engine.py b/unicode_rbnf/engine.py index bd4b6ff..b461b87 100644 --- a/unicode_rbnf/engine.py +++ b/unicode_rbnf/engine.py @@ -1,4 +1,5 @@ import logging +import re from abc import ABC from bisect import bisect_left from dataclasses import dataclass, field @@ -110,6 +111,9 @@ class SubRulePart(RbnfRulePart): ruleset_name: Optional[str] = None """Ruleset name to use during substitution (None for current ruleset name).""" + format_pattern: Optional[str] = None + """DecimalFormat pattern (e.g., #,##0.00).""" + @dataclass class ReplaceRulePart(RbnfRulePart): @@ -292,6 +296,17 @@ def parse(value_str: str, text: str, radix: int = 10) -> "Optional[RbnfRule]": assert isinstance(part, TextRulePart) part.text += c + elif c in ("#", "0", ",", "."): + # decimal format pattern (e.g., #,##0.00) + assert isinstance(part, SubRulePart) + assert state in ( + ParseState.SUB_REMAINDER, + ParseState.SUB_QUOTIENT, + ), state + if part.format_pattern is None: + part.format_pattern = "" + + part.format_pattern += c else: raise ValueError(f"Got {c} in {state}") @@ -459,7 +474,7 @@ def load_xml(self, root: et.Element) -> None: raise ValueError("Missing identity/language element") language = lang_elem.attrib["type"] - if language != self.language: + if (language != self.language) and (not self.language.startswith(language)): raise ValueError(f"Expected language {self.language}, got {language}") for group_elem in root.findall("rbnf//ruleset"): @@ -473,7 +488,7 @@ def load_xml(self, root: et.Element) -> None: continue value_str = rule_elem.attrib["value"] - radix = int(rule_elem.attrib.get("radix", 10)) + radix = int(re.sub(r"[^0-9]+", "", rule_elem.attrib.get("radix", "10"))) self.add_rule( value_str, diff --git a/unicode_rbnf/rbnf/en_001.xml b/unicode_rbnf/rbnf/en_001.xml deleted file mode 100644 index 6a8b977..0000000 --- a/unicode_rbnf/rbnf/en_001.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - diff --git a/unicode_rbnf/rbnf/es_419.xml b/unicode_rbnf/rbnf/es_419.xml deleted file mode 100644 index f0d135d..0000000 --- a/unicode_rbnf/rbnf/es_419.xml +++ /dev/null @@ -1,50 +0,0 @@ - - - - - - - - - - - - - º; - ᵉʳ; - º; - ᵉʳ; - º; - →→; - →→; - - - −→→; - =#,##0==%%dord-mascabbrev=.; - - - −→→; - =#,##0=º.; - - - −→→; - =#,##0=ª.; - - - −→→; - =#,##0=ᵒˢ.; - - - −→→; - =#,##0=ᵃˢ.; - - - =%digits-ordinal-masculine=; - - - - diff --git a/unicode_rbnf/rbnf/nb.xml b/unicode_rbnf/rbnf/nb.xml deleted file mode 100644 index b8b02f9..0000000 --- a/unicode_rbnf/rbnf/nb.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - diff --git a/unicode_rbnf/rbnf/root.xml b/unicode_rbnf/rbnf/root.xml deleted file mode 100644 index d7865c8..0000000 --- a/unicode_rbnf/rbnf/root.xml +++ /dev/null @@ -1,724 +0,0 @@ - - - - - - - - - - - - −→→; - =#,##0.00=; - 0; - ա; - բ; - գ; - դ; - ե; - զ; - է; - ը; - թ; - ժ[→→]; - ի[→→]; - լ[→→]; - խ[→→]; - ծ[→→]; - կ[→→]; - հ[→→]; - ձ[→→]; - ղ[→→]; - ճ[→→]; - մ[→→]; - յ[→→]; - ն[→→]; - շ[→→]; - ո[→→]; - չ[→→]; - պ[→→]; - ջ[→→]; - ռ[→→]; - ս[→→]; - վ[→→]; - տ[→→]; - ր[→→]; - ց[→→]; - ւ[→→]; - փ[→→]; - ք[→→]; - =#,##0=; - - - −→→; - =#,##0.00=; - 0; - Ա; - Բ; - Գ; - Դ; - Ե; - Զ; - Է; - Ը; - Թ; - Ժ[→→]; - Ի[→→]; - Լ[→→]; - Խ[→→]; - Ծ[→→]; - Կ[→→]; - Հ[→→]; - Ձ[→→]; - Ղ[→→]; - Ճ[→→]; - Մ[→→]; - Յ[→→]; - Ն[→→]; - Շ[→→]; - Ո[→→]; - Չ[→→]; - Պ[→→]; - Ջ[→→]; - Ռ[→→]; - Ս[→→]; - Վ[→→]; - Տ[→→]; - Ր[→→]; - Ց[→→]; - Ւ[→→]; - Փ[→→]; - Ք[→→]; - =#,##0=; - - - а; - в; - г; - д; - є; - ѕ; - з; - и; - ѳ; - і; - - - ҃; - ҃=%%cyrillic-lower-1-10=; - а҃і; - в҃і; - г҃і; - д҃і; - є҃і; - ѕ҃і; - з҃і; - и҃і; - ѳ҃і; - ҃к; - к→→; - ҃л; - л→→; - ҃м; - м→→; - ҃н; - н→→; - ҃ѯ; - ѯ→→; - ҃ѻ; - ѻ→→; - ҃п; - п→→; - ҃ч; - ч→→; - - - ҃; - =%cyrillic-lower=; - - - ҃; - ҃҂а; - ҃҂в; - ҃҂г; - ҃҂д; - ҃҂є; - ҃҂ѕ; - ҃҂з; - ҃҂и; - ҃҂ѳ; - ҃҂і; - ҂а҃҂і; - ҂в҃҂і; - ҂г҃҂і; - ҂д҃҂і; - ҂є҃҂і; - ҂ѕ҃҂і; - ҂з҃҂і; - ҂и҃҂і; - ҂ѳ҃҂і; - ҂к→→; - ҂л→→; - ҂м→→; - ҂н→→; - ҂ѯ→→; - ҂ѻ→→; - ҂п→→; - ҂ч→→; - ҂р→→; - ҂с→→; - ҂т→→; - ҂у→→; - ҂ф→→; - ҂х→→; - ҂ѱ→→; - ҂ѿ→→; - ҂ц→→; - - - −→→; - ←←.→→→; - 0҃; - =%%cyrillic-lower-1-10=҃; - а҃і; - в҃і; - г҃і; - д҃і; - є҃і; - ѕ҃і; - з҃і; - и҃і; - ѳ҃і; - к→%%cyrillic-lower-final→; - л→%%cyrillic-lower-final→; - м→%%cyrillic-lower-final→; - н→%%cyrillic-lower-final→; - ѯ→%%cyrillic-lower-final→; - ѻ→%%cyrillic-lower-final→; - п→%%cyrillic-lower-final→; - ч→%%cyrillic-lower-final→; - р→%%cyrillic-lower-final→; - с→%%cyrillic-lower-final→; - т→%%cyrillic-lower-final→; - у→%%cyrillic-lower-final→; - ф→%%cyrillic-lower-final→; - х→%%cyrillic-lower-final→; - ѱ→%%cyrillic-lower-final→; - ѿ҃; - ѿ→→; - ц→%%cyrillic-lower-final→; - ҂←%%cyrillic-lower-1-10←→%%cyrillic-lower-post→; - ҂←←[ →→]; - ←%%cyrillic-lower-thousands←[ →→]; - ҂҂←←[ →→]; - ҂҂҂←←[ →→]; - ҂҂҂҂←←[ →→]; - ҂҂҂҂҂←←[ →→]; - =#,##0=; - - - =%ethiopic=; - ←←፼[→→]; - ←←፼→%%ethiopic-p1→; - ←←፼→%%ethiopic-p2→; - ←←፼→%%ethiopic-p3→; - - - ፼; - ፼=%%ethiopic-p=; - ←%ethiopic←፼[→%ethiopic→]; - - - ፼፼; - ፼፼=%%ethiopic-p=; - ←%ethiopic←፼→%%ethiopic-p1→; - - - ፼፼፼; - ፼፼፼=%%ethiopic-p=; - ←%ethiopic←፼→%%ethiopic-p2→; - - - −→→; - ←←፡→→; - ባዶ; - ፩; - ፪; - ፫; - ፬; - ፭; - ፮; - ፯; - ፰; - ፱; - ፲[→→]; - ፳[→→]; - ፴[→→]; - ፵[→→]; - ፶[→→]; - ፷[→→]; - ፸[→→]; - ፹[→→]; - ፺[→→]; - ፻[→→]; - ←←፻[→→]; - ፼[→→]; - ←←፼[→→]; - ፼→%%ethiopic-p1→; - ←←፼→%%ethiopic-p1→; - ፼→%%ethiopic-p2→; - ←←፼→%%ethiopic-p2→; - ፼→%%ethiopic-p3→; - ←←፼→%%ethiopic-p3→; - =#,##0=; - - - −→→; - =#,##0.00=; - =#,##0=; - ა; - ბ; - გ; - დ; - ე; - ვ; - ზ; - ჱ; - თ; - ი[→→]; - კ[→→]; - ლ[→→]; - მ[→→]; - ნ[→→]; - ჲ[→→]; - ო[→→]; - პ[→→]; - ჟ[→→]; - რ[→→]; - ს[→→]; - ტ[→→]; - უ[→→]; - ჳ[→→]; - ფ[→→]; - ქ[→→]; - ღ[→→]; - ყ[→→]; - შ[→→]; - ჩ[→→]; - ც[→→]; - ძ[→→]; - წ[→→]; - ჭ[→→]; - ხ[→→]; - ჴ[→→]; - ჵ[→→]; - ჯ[→→]; - =#,##0=; - - - −→→; - ←←.→→→; - =%%greek-numeral-minuscules=´; - - - 𐆊; - α; - β; - γ; - δ; - ε; - ϝ; - ζ; - η; - θ; - ι[→→]; - κ[→→]; - λ[→→]; - μ[→→]; - ν[→→]; - ξ[→→]; - ο[→→]; - π[→→]; - ϟ[→→]; - ρ[→→]; - σ[→→]; - τ[→→]; - υ[→→]; - φ[→→]; - χ[→→]; - ψ[→→]; - ω[→→]; - ϡ[→→]; - ͵←←[→→]; - ←←μ[ →→]; - ←←μμ[ →→]; - ←←μμμ[ →→]; - ←←μμμμ[ →→]; - =#,##0=; - - - −→→; - ←←.→→→; - =%%greek-numeral-majuscules=´; - - - 𐆊; - Α; - Β; - Γ; - Δ; - Ε; - Ϝ; - Ζ; - Η; - Θ; - Ι[→→]; - Κ[→→]; - Λ[→→]; - Μ[→→]; - Ν[→→]; - Ξ[→→]; - Ο[→→]; - Π[→→]; - Ϟ[→→]; - Ρ[→→]; - Σ[→→]; - Τ[→→]; - Υ[→→]; - Φ[→→]; - Χ[→→]; - Ψ[→→]; - Ω[→→]; - Ϡ[→→]; - ͵←←[→→]; - ←←Μ[ →→]; - ←←ΜΜ[ →→]; - ←←ΜΜΜ[ →→]; - ←←ΜΜΜΜ[ →→]; - =#,##0=; - - - =%hebrew=; - =%hebrew=[׳]; - =%hebrew=[׳]; - =%hebrew=׳; - - - −→→; - =#,##0.00=; - =%hebrew-item=׳; - י״→%hebrew-item→; - ט״ו; - ט״ז; - י״→%hebrew-item→; - כ׳; - כ״→%hebrew-item→; - ל׳; - ל״→%hebrew-item→; - מ׳; - מ״→%hebrew-item→; - נ׳; - נ״→%hebrew-item→; - ס׳; - ס״→%hebrew-item→; - ע׳; - ע״→%hebrew-item→; - פ׳; - פ״→%hebrew-item→; - צ׳; - צ״→%hebrew-item→; - ק→%%hebrew-0-99→; - ר→%%hebrew-0-99→; - רח״צ; - ר→%%hebrew-0-99→; - ש→%%hebrew-0-99→; - ד״ש; - ש→%%hebrew-0-99→; - שד״מ; - ש→%%hebrew-0-99→; - ת→%%hebrew-0-99→; - ת״ק; - תק→%%hebrew-0-99→; - ת״ר; - תר→%%hebrew-0-99→; - תרח״צ; - תר→%%hebrew-0-99→; - ת״ש; - תש→%%hebrew-0-99→; - תשד״מ; - תש→%%hebrew-0-99→; - ת״ת; - תת→%%hebrew-0-99→; - תת״ק; - תתק→%%hebrew-0-99→; - אלף; - ←%%hebrew-thousands←[→→]; - אלפיים; - ←%%hebrew-thousands←[→→]; - ←← אלפים; - ←%%hebrew-thousands←[→→]; - אלף אלפים; - =#,##0=; - - - ׳; - ״=%hebrew-item=; - י״→%hebrew-item→; - ט״ו; - ט״ז; - י״→%hebrew-item→; - ״כ; - כ״→%hebrew-item→; - ״ל; - ל״→%hebrew-item→; - ״מ; - מ״→%hebrew-item→; - ״נ; - נ״→%hebrew-item→; - ״ס; - ס״→%hebrew-item→; - ״ע; - ע״→%hebrew-item→; - ״ף; - פ״→%hebrew-item→; - ״צ; - צ״→%hebrew-item→; - - - −→→; - =#,##0.00=; - ״; - א; - ב; - ג; - ד; - ה; - ו; - ז; - ח; - ט; - י[→→]; - טו; - טז; - י→→; - כ[→→]; - ל[→→]; - מ[→→]; - נ[→→]; - ס[→→]; - ע[→→]; - ף; - פ[→→]; - צ[→→]; - ק[→→]; - ר[→→]; - רחצ; - ר→→; - ש[→→]; - דש; - ש→→; - שדמ; - ש→→; - ת[→→]; - תק[→→]; - תר[→→]; - תרחצ; - תר→→; - תש[→→]; - תשדמ; - תש→→; - תת[→→]; - תתק[→→]; - תתר[→→]; - תתש[→→]; - תתת[→→]; - תתתק[→→]; - תתתר[→→]; - תתתש[→→]; - תתתת[→→]; - תתתתק[→→]; - תתתתר[→→]; - תתתתש[→→]; - תתתתת[→→]; - =#,##0=; - - - −→→; - =#,##0.00=; - ״; - א; - ב; - ג; - ד; - ה; - ו; - ז; - ח; - ט; - י[→→]; - טו; - טז; - י→→; - כ[→→]; - ל[→→]; - מ[→→]; - נ[→→]; - ס[→→]; - ע[→→]; - פ[→→]; - צ[→→]; - =%%hebrew-item-hundreds=; - - - −→→; - =#,##0.00=; - n; - i; - ii; - iii; - iv; - v; - vi; - vii; - viii; - ix; - x[→→]; - xx[→→]; - xxx[→→]; - xl[→→]; - l[→→]; - lx[→→]; - lxx[→→]; - lxxx[→→]; - xc[→→]; - c[→→]; - cc[→→]; - ccc[→→]; - cd[→→]; - d[→→]; - dc[→→]; - dcc[→→]; - dccc[→→]; - cm[→→]; - m[→→]; - mm[→→]; - mmm[→→]; - mmmm[→→]; - =#,##0=; - - - −→→; - =#,##0.00=; - N; - I; - II; - III; - IV; - V; - VI; - VII; - VIII; - IX; - X[→→]; - XX[→→]; - XXX[→→]; - XL[→→]; - L[→→]; - LX[→→]; - LXX[→→]; - LXXX[→→]; - XC[→→]; - C[→→]; - CC[→→]; - CCC[→→]; - CD[→→]; - D[→→]; - DC[→→]; - DCC[→→]; - DCCC[→→]; - CM[→→]; - M[→→]; - MM[→→]; - MMM[→→]; - Mↁ[→→]; - ↁ[→→]; - ↁM[→→]; - ↁMM[→→]; - ↁMMM[→→]; - Mↂ[→→]; - ↂ[→→]; - ↂↂ[→→]; - ↂↂↂ[→→]; - ↂↇ[→→]; - ↇ[→→]; - ↇↂ[→→]; - ↇↂↂ[→→]; - ↇↂↂↂ[→→]; - ↂↈ[→→]; - ↈ[→→]; - ↈↈ[→→]; - ↈↈↈ[→→]; - =#,##0=; - - - −→→; - =#,##0.00=; - ௦; - ௧; - ௨; - ௩; - ௪; - ௫; - ௬; - ௭; - ௮; - ௯; - ௰[→→]; - ←←௰[→→]; - ௱[→→]; - ←←௱[→→]; - ௲[→→]; - ←←௲[→→]; - ←←௱௲[→%%tamil-thousands→]; - =#,##,##0=; - - - =%tamil=; - ←←௲[→→]; - - - =#,##0=; - - - - - −→→; - =#,##0=.; - - - - - −→→; - =0.0=; - =0=; - - - −→→; - =#,##0.#=; - - - −→→; - =#,##0.#=; - - - −→→; - =#,##0.#=.; - - - -