Skip to content

Commit

Permalink
Merge pull request #4 from rhasspy/synesthesiam-20241118-fix-loading
Browse files Browse the repository at this point in the history
Ensure all languages load
  • Loading branch information
synesthesiam authored Nov 18, 2024
2 parents f51a7f6 + 9e686e7 commit 6a84331
Show file tree
Hide file tree
Showing 11 changed files with 95 additions and 804 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## 2.1.0

- Ensure all supported languages can load
- Start on decimal pattern format implementation (not complete)

## 2.0.0

- Change `format_number` to return `FormatResult` instead of a `str`
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,5 @@ Some features that will need to be added eventually:
* Proper fraction rules (`0.x`)
* Preceding reminder substitution (`>>>` or `→→→`)
* Number format strings (`==`)
* Decimal format patterns (`#,##0.00`)
* Plural replacements (`$(ordinal,one{st}...)`)
9 changes: 9 additions & 0 deletions tests/test_decimal_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from unicode_rbnf.decimal_format import format_decimal


def test_format_decimal() -> None:
assert format_decimal(12345.6789, "#,##0.00") == "12,345.68"
assert format_decimal(5, "0000.00") == "0005.00"
assert format_decimal(12345.6, "#,##0.0#") == "12,345.6"
assert format_decimal(0.1, "#,##0.00") == "0.10"
assert format_decimal(12345, "#,##0") == "12,345"
9 changes: 9 additions & 0 deletions tests/test_load_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from unicode_rbnf import RbnfEngine

import pytest


@pytest.mark.parametrize("language", RbnfEngine.get_supported_languages())
def test_load_language(language: str):
engine = RbnfEngine.for_language(language)
assert engine.format_number(0).text
2 changes: 1 addition & 1 deletion unicode_rbnf/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.0.0
2.1.0
52 changes: 52 additions & 0 deletions unicode_rbnf/decimal_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Handle decimal formatting.
See: https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1DecimalFormat.html
"""
from decimal import Decimal
from typing import Union


def format_decimal(value: Union[int, float, str, Decimal], pattern: str) -> str:
"""Format a number according to a simplified ICU DecimalFormat pattern."""
# Split the pattern into integer and fractional parts
if "." in pattern:
integer_part, fractional_part = pattern.split(".")
else:
integer_part, fractional_part = pattern, ""

# Determine grouping (e.g., thousands separator)
grouping = "," in integer_part
min_integer_digits = integer_part.replace(",", "").count("0")

# Determine the number of decimal places
min_fraction_digits = fractional_part.count("0")
max_fraction_digits = len(fractional_part)

# Round the number to the maximum fractional digits
format_str = f"{{:.{max_fraction_digits}f}}"
rounded_value = format_str.format(value)

# Split the rounded value into integer and fractional parts
if fractional_part:
integer_value, fractional_value = rounded_value.split(".")
fractional_value = fractional_value[:max_fraction_digits].rstrip("0")
else:
integer_value, fractional_value = rounded_value, ""

# Apply integer padding
if len(integer_value) < min_integer_digits:
integer_value = integer_value.zfill(min_integer_digits)

# Apply grouping
if grouping:
# pylint: disable=consider-using-f-string
integer_value = "{:,}".format(int(integer_value))

# Combine integer and fractional parts
if min_fraction_digits > 0:
fractional_value = fractional_value.ljust(min_fraction_digits, "0")
formatted_number = f"{integer_value}.{fractional_value}"
else:
formatted_number = integer_value

return formatted_number
19 changes: 17 additions & 2 deletions unicode_rbnf/engine.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import re
from abc import ABC
from bisect import bisect_left
from dataclasses import dataclass, field
Expand Down Expand Up @@ -110,6 +111,9 @@ class SubRulePart(RbnfRulePart):
ruleset_name: Optional[str] = None
"""Ruleset name to use during substitution (None for current ruleset name)."""

format_pattern: Optional[str] = None
"""DecimalFormat pattern (e.g., #,##0.00)."""


@dataclass
class ReplaceRulePart(RbnfRulePart):
Expand Down Expand Up @@ -292,6 +296,17 @@ def parse(value_str: str, text: str, radix: int = 10) -> "Optional[RbnfRule]":

assert isinstance(part, TextRulePart)
part.text += c
elif c in ("#", "0", ",", "."):
# decimal format pattern (e.g., #,##0.00)
assert isinstance(part, SubRulePart)
assert state in (
ParseState.SUB_REMAINDER,
ParseState.SUB_QUOTIENT,
), state
if part.format_pattern is None:
part.format_pattern = ""

part.format_pattern += c
else:
raise ValueError(f"Got {c} in {state}")

Expand Down Expand Up @@ -459,7 +474,7 @@ def load_xml(self, root: et.Element) -> None:
raise ValueError("Missing identity/language element")

language = lang_elem.attrib["type"]
if language != self.language:
if (language != self.language) and (not self.language.startswith(language)):
raise ValueError(f"Expected language {self.language}, got {language}")

for group_elem in root.findall("rbnf//ruleset"):
Expand All @@ -473,7 +488,7 @@ def load_xml(self, root: et.Element) -> None:
continue

value_str = rule_elem.attrib["value"]
radix = int(rule_elem.attrib.get("radix", 10))
radix = int(re.sub(r"[^0-9]+", "", rule_elem.attrib.get("radix", "10")))

self.add_rule(
value_str,
Expand Down
14 changes: 0 additions & 14 deletions unicode_rbnf/rbnf/en_001.xml

This file was deleted.

50 changes: 0 additions & 50 deletions unicode_rbnf/rbnf/es_419.xml

This file was deleted.

13 changes: 0 additions & 13 deletions unicode_rbnf/rbnf/nb.xml

This file was deleted.

Loading

0 comments on commit 6a84331

Please sign in to comment.