Skip to content

Commit

Permalink
Employ optional nfc2nfd Unicode transform for all descriptive analysers
Browse files Browse the repository at this point in the history
  • Loading branch information
snomos committed Nov 20, 2023
1 parent 6d27a3d commit 09ded13
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 0 deletions.
4 changes: 4 additions & 0 deletions am-shared/src_alt_orth-include.am
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ analyser-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
filters/remove-Use_minus_PMatch-tags.% \
filters/remove-Use_PMatch-strings.% \
filters/remove-mwe-tags.% \
$(GLT_NFCNFD_FILTER) \
orthography/spellrelax.$(1).compose.% \
orthography/$(DEFAULT_ORTH)-to-$(1).compose.%
$$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \
Expand All @@ -94,6 +95,7 @@ analyser-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-Use_PMatch-strings.$$*\" \
.o. @\"filters/remove-mwe-tags.$$*\" \
.o. @\"$$<\" \
$(GLT_NFCNFD_COMPOSE_DEF) \
.o. @\"filters/remove-hyphenation-marks.$$*\" \
.o. @\"orthography/$(DEFAULT_ORTH)-to-$(1).compose.$$*\" \
.o. @\"filters/remove-infl_deriv-borders.$$*\" \
Expand All @@ -119,6 +121,7 @@ analyser-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
filters/remove-Use_minus_PMatch-tags.% \
filters/remove-Use_PMatch-strings.% \
filters/remove-mwe-tags.% \
$(GLT_NFCNFD_FILTER) \
orthography/spellrelax.$(1).compose.% \
orthography/raw-to-$(1).compose.%
$$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \
Expand All @@ -135,6 +138,7 @@ analyser-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-Use_PMatch-strings.$$*\" \
.o. @\"filters/remove-mwe-tags.$$*\" \
.o. @\"$$<\" \
$(GLT_NFCNFD_COMPOSE_DEF) \
.o. @\"filters/remove-hyphenation-marks.$$*\" \
.o. @\"orthography/raw-to-$(1).compose.$$*\" \
.o. @\"filters/remove-infl_deriv-borders.$$*\" \
Expand Down
2 changes: 2 additions & 0 deletions am-shared/src_alt_ws-include.am
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ analyser-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
filters/remove-Use_minus_PMatch-tags.% \
filters/remove-Use_PMatch-strings.% \
filters/remove-mwe-tags.% \
$(GLT_NFCNFD_FILTER) \
orthography/spellrelax.$(1).compose.% \
orthography/$(DEFAULT_WS)-to-$(1).compose.%
$$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \
Expand All @@ -97,6 +98,7 @@ analyser-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-Use_PMatch-strings.$$*\" \
.o. @\"filters/remove-mwe-tags.$$*\" \
.o. @\"$$<\" \
$(GLT_NFCNFD_COMPOSE_DEF) \
.o. @\"filters/remove-hyphenation-marks.$$*\" \
.o. @\"filters/remove-infl_deriv-borders.$$*\" \
.o. @\"filters/remove-word-boundary.$$*\" \
Expand Down
8 changes: 8 additions & 0 deletions am-shared/src_dictionary-include.am
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ analyser-dict-gt-desc.tmp.%: analyser-raw-gt-desc.% \
filters/remove-mwe-tags.% \
orthography/inituppercase.compose.% \
orthography/spellrelax.compose.% \
$(GLT_NFCNFD_FILTER) \
$(GLT_DOWNCASE_FILTER)
$(AM_V_XFST_TOOL)$(PRINTF) "read regex \
@\"filters/remove-area-tags.$*\" \
Expand All @@ -67,6 +68,7 @@ analyser-dict-gt-desc.tmp.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-Use_PMatch-strings.$*\" \
.o. @\"filters/remove-mwe-tags.$*\" \
.o. @\"$<\" \
$(GLT_NFCNFD_COMPOSE) \
$(GLT_DOWNCASE_COMPOSE) \
.o. @\"filters/remove-hyphenation-marks.$*\" \
.o. @\"filters/remove-infl_deriv-borders.$*\" \
Expand Down Expand Up @@ -172,6 +174,7 @@ analyser-dict-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
filters/remove-Use_minus_PMatch-tags.% \
filters/remove-Use_PMatch-strings.% \
filters/remove-mwe-tags.% \
$(GLT_NFCNFD_FILTER) \
orthography/spellrelax.$(1).compose.% \
orthography/$$(DEFAULT_WS)-to-$(1).compose.%
$$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \
Expand All @@ -188,6 +191,7 @@ analyser-dict-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-Use_PMatch-strings.$$*\" \
.o. @\"filters/remove-mwe-tags.$$*\" \
.o. @\"$$<\" \
$(GLT_NFCNFD_COMPOSE_DEF) \
.o. @\"filters/remove-hyphenation-marks.$$*\" \
.o. @\"filters/remove-infl_deriv-borders.$$*\" \
.o. @\"filters/remove-word-boundary.$$*\" \
Expand Down Expand Up @@ -270,6 +274,7 @@ analyser-dict-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
filters/remove-Use_minus_PMatch-tags.% \
filters/remove-Use_PMatch-strings.% \
filters/remove-mwe-tags.% \
$(GLT_NFCNFD_FILTER) \
orthography/spellrelax.$(1).compose.% \
orthography/raw-to-$(1).compose.%
$$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \
Expand All @@ -288,6 +293,7 @@ analyser-dict-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-Use_PMatch-strings.$$*\" \
.o. @\"filters/remove-mwe-tags.$$*\" \
.o. @\"$$<\" \
$(GLT_NFCNFD_COMPOSE_DEF) \
.o. @\"filters/remove-hyphenation-marks.$$*\" \
.o. @\"filters/remove-infl_deriv-borders.$$*\" \
.o. @\"filters/remove-word-boundary.$$*\" \
Expand All @@ -313,6 +319,7 @@ analyser-dict-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
filters/remove-Use_minus_PMatch-tags.% \
filters/remove-Use_PMatch-strings.% \
filters/remove-mwe-tags.% \
$(GLT_NFCNFD_FILTER) \
orthography/spellrelax.$(1).compose.% \
orthography/$(DEFAULT_ORTH)-to-$(1).compose.%
$$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \
Expand All @@ -331,6 +338,7 @@ analyser-dict-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-Use_PMatch-strings.$$*\" \
.o. @\"filters/remove-mwe-tags.$$*\" \
.o. @\"$$<\" \
$(GLT_NFCNFD_COMPOSE_DEF) \
.o. @\"filters/remove-hyphenation-marks.$$*\" \
.o. @\"filters/remove-infl_deriv-borders.$$*\" \
.o. @\"filters/remove-word-boundary.$$*\" \
Expand Down
8 changes: 8 additions & 0 deletions am-shared/src_disamb-include.am
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ analyser-disamb-gt-desc.tmp1.%: analyser-raw-gt-desc.% \
filters/block-mwe-compounds.% \
orthography/inituppercase.compose.% \
orthography/spellrelax.compose.% \
$(GLT_NFCNFD_FILTER) \
$(GLT_DOWNCASE_FILTER)
$(AM_V_XFST_TOOL)$(PRINTF) "read regex \
@\"filters/remove-variant-tags.$*\" \
Expand All @@ -70,6 +71,7 @@ analyser-disamb-gt-desc.tmp1.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-mwe-tags.$*\" \
.o. @\"filters/block-mwe-compounds.$*\" \
.o. @\"$<\" \
$(GLT_NFCNFD_COMPOSE) \
$(GLT_DOWNCASE_COMPOSE) \
.o. @\"filters/remove-hyphenation-marks.$*\" \
.o. @\"filters/remove-infl_deriv-borders.$*\" \
Expand Down Expand Up @@ -104,6 +106,7 @@ analyser-disamb-gt-desc.$(1).tmp1.%: analyser-raw-gt-desc.% \
filters/remove-variant-tags.% \
filters/remove-mwe-tags.% \
filters/block-mwe-compounds.% \
$(GLT_NFCNFD_FILTER) \
orthography/spellrelax.$(1).compose.% \
orthography/$(DEFAULT_WS)-to-$(1).compose.%
$$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \
Expand All @@ -113,6 +116,7 @@ analyser-disamb-gt-desc.$(1).tmp1.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-mwe-tags.$$*\" \
.o. @\"filters/block-mwe-compounds.$$*\" \
.o. @\"$$<\" \
$(GLT_NFCNFD_COMPOSE_DEF) \
.o. @\"filters/remove-hyphenation-marks.$$*\" \
.o. @\"filters/remove-infl_deriv-borders.$$*\" \
.o. @\"filters/remove-word-boundary.$$*\" \
Expand Down Expand Up @@ -151,6 +155,7 @@ analyser-disamb-gt-desc.$(1).tmp1.%: analyser-raw-gt-desc.% \
orthography/spellrelax.$(1).compose.% \
orthography/inituppercase.compose.% \
orthography/$(DEFAULT_ORTH)-to-$(1).compose.% \
$(GLT_NFCNFD_FILTER) \
$(GLT_DOWNCASE_FILTER)
$$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \
@\"filters/remove-orthography-tags.$$*\" \
Expand All @@ -159,6 +164,7 @@ analyser-disamb-gt-desc.$(1).tmp1.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-mwe-tags.$$*\" \
.o. @\"filters/block-mwe-compounds.$$*\" \
.o. @\"$$<\" \
$(GLT_NFCNFD_COMPOSE_DEF) \
$(GLT_DOWNCASE_COMPOSE_DEF) \
.o. @\"filters/remove-hyphenation-marks.$$*\" \
.o. @\"filters/remove-infl_deriv-borders.$$*\" \
Expand Down Expand Up @@ -186,6 +192,7 @@ analyser-disamb-gt-desc.$(1).tmp1.%: analyser-raw-gt-desc.% \
orthography/spellrelax.$(1).compose.% \
orthography/inituppercase.compose.% \
orthography/raw-to-$(1).compose.% \
$(GLT_NFCNFD_FILTER) \
$(GLT_DOWNCASE_FILTER)
$$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \
@\"filters/remove-orthography-tags.$$*\" \
Expand All @@ -194,6 +201,7 @@ analyser-disamb-gt-desc.$(1).tmp1.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-mwe-tags.$$*\" \
.o. @\"filters/block-mwe-compounds.$$*\" \
.o. @\"$$<\" \
$(GLT_NFCNFD_COMPOSE_DEF) \
$(GLT_DOWNCASE_COMPOSE_DEF) \
.o. @\"filters/remove-hyphenation-marks.$$*\" \
.o. @\"filters/remove-infl_deriv-borders.$$*\" \
Expand Down
8 changes: 8 additions & 0 deletions am-shared/src_gramcheck-include.am
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ analyser-gramcheck-gt-desc.tmp.%: analyser-raw-gt-desc.% \
filters/block-mwe-compounds.% \
orthography/inituppercase.compose.% \
orthography/spellrelax.compose.% \
$(GLT_NFCNFD_FILTER) \
$(GLT_DOWNCASE_FILTER)
$(AM_V_XFST_TOOL)$(PRINTF) "read regex \
@\"filters/remove-orig_lang-tags.$*\" \
Expand All @@ -40,6 +41,7 @@ analyser-gramcheck-gt-desc.tmp.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-Use_PMatch-tags.$*\" \
.o. @\"filters/block-mwe-compounds.$*\" \
.o. @\"$<\" \
$(GLT_NFCNFD_COMPOSE) \
$(GLT_DOWNCASE_COMPOSE) \
.o. @\"filters/remove-hyphenation-marks.$*\" \
.o. @\"filters/remove-infl_deriv-borders.$*\" \
Expand Down Expand Up @@ -127,6 +129,7 @@ analyser-gramcheck-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
filters/remove-Use_PMatch-tags.% \
filters/block-mwe-compounds.% \
orthography/spellrelax.$(1).compose.% \
$(GLT_NFCNFD_FILTER) \
orthography/$(DEFAULT_WS)-to-$(1).compose.%
$$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \
@\"filters/remove-orig_lang-tags.$$*\" \
Expand All @@ -139,6 +142,7 @@ analyser-gramcheck-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-Use_PMatch-tags.$$*\" \
.o. @\"filters/block-mwe-compounds.$$*\" \
.o. @\"$$<\" \
$(GLT_NFCNFD_COMPOSE_DEF) \
.o. @\"filters/remove-hyphenation-marks.$$*\" \
.o. @\"filters/remove-infl_deriv-borders.$$*\" \
.o. @\"filters/remove-word-boundary.$$*\" \
Expand Down Expand Up @@ -234,6 +238,7 @@ analyser-gramcheck-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
orthography/spellrelax.$(1).compose.% \
orthography/inituppercase.compose.% \
orthography/$(DEFAULT_ORTH)-to-$(1).compose.% \
$(GLT_NFCNFD_FILTER) \
$(GLT_DOWNCASE_FILTER)
$$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \
@\"filters/remove-orig_lang-tags.$$*\" \
Expand All @@ -246,6 +251,7 @@ analyser-gramcheck-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-Use_PMatch-tags.$$*\" \
.o. @\"filters/block-mwe-compounds.$$*\" \
.o. @\"$$<\" \
$(GLT_NFCNFD_COMPOSE_DEF) \
$(GLT_DOWNCASE_COMPOSE_DEF) \
.o. @\"filters/remove-hyphenation-marks.$$*\" \
.o. @\"filters/remove-infl_deriv-borders.$$*\" \
Expand Down Expand Up @@ -277,6 +283,7 @@ analyser-gramcheck-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
orthography/spellrelax.$(1).compose.% \
orthography/inituppercase.compose.% \
orthography/raw-to-$(1).compose.% \
$(GLT_NFCNFD_FILTER) \
$(GLT_DOWNCASE_FILTER)
$$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \
@\"filters/remove-orig_lang-tags.$$*\" \
Expand All @@ -289,6 +296,7 @@ analyser-gramcheck-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \
.o. @\"filters/remove-Use_PMatch-tags.$$*\" \
.o. @\"filters/block-mwe-compounds.$$*\" \
.o. @\"$$<\" \
$(GLT_NFCNFD_COMPOSE_DEF) \
$(GLT_DOWNCASE_COMPOSE_DEF) \
.o. @\"filters/remove-hyphenation-marks.$$*\" \
.o. @\"filters/remove-infl_deriv-borders.$$*\" \
Expand Down
2 changes: 2 additions & 0 deletions am-shared/src_tts-include.am
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,11 @@ analyser-tts-gt-input.tmp.%: analyser-tts-gt-input.midtmp.% \
filters/remove-word-boundary.% \
orthography/inituppercase.compose.% \
orthography/spellrelax.compose.% \
$(GLT_NFCNFD_FILTER) \
$(GLT_DOWNCASE_FILTER)
$(AM_V_XFST_TOOL)$(PRINTF) "read regex \
@\"$<\" \
$(GLT_NFCNFD_COMPOSE) \
$(GLT_DOWNCASE_COMPOSE) \
.o. @\"filters/remove-hyphenation-marks.$*\" \
.o. @\"filters/remove-infl_deriv-borders.$*\" \
Expand Down

0 comments on commit 09ded13

Please sign in to comment.