From 648ae61d6276eea83e53f779f9b5dc6ec67ec112 Mon Sep 17 00:00:00 2001 From: Flammie A Pirinen Date: Mon, 16 Dec 2024 23:42:28 +0100 Subject: [PATCH] compile emojis --- am-shared/src-fst-dir-include.am | 18 ++++++++++++--- am-shared/src-morphology-dir-include.am | 6 ++--- am-shared/tools-tokenisers-dir-include.am | 3 ++- .../tools-tokenisers-filters-dir-include.am | 5 +++++ configure.ac | 22 +++++++++++++++++++ 5 files changed, 47 insertions(+), 7 deletions(-) diff --git a/am-shared/src-fst-dir-include.am b/am-shared/src-fst-dir-include.am index 3623a296..33f76d91 100644 --- a/am-shared/src-fst-dir-include.am +++ b/am-shared/src-fst-dir-include.am @@ -47,13 +47,16 @@ GT_FSTs_NO_INSTALL=$(subst hfstol,hfst,$(filter %.hfstol, \ # Add other targets that should be built but not installed, # such as the URL analyser, to the GT_FSTs_NO_INSTALL variable: if CAN_HFST -GT_FSTs_NO_INSTALL+=analyser-url-gt-desc.hfst +GT_FSTs_NO_INSTALL+=analyser-url-gt-desc.hfst\ + analyser-emojis-gt-desc.hfst endif # CAN_HFST if CAN_XFST -GT_FSTs_NO_INSTALL+=analyser-url-gt-desc.xfst +GT_FSTs_NO_INSTALL+=analyser-url-gt-desc.xfst\ + analyser-emojis-gt-desc.xfst endif # CAN_XFST if CAN_FOMA -GT_FSTs_NO_INSTALL+=analyser-url-gt-desc.foma +GT_FSTs_NO_INSTALL+=analyser-url-gt-desc.foma\ + analyser-emojis-gt-desc.foma endif # CAN_FOMA ##### BEGIN Hfst target list ##### @@ -968,6 +971,15 @@ analyser-url-gt-desc.%: morphology/.generated/url.% save stack $@\n\ quit\n" | $(XFST_TOOL) +# also emojis should be kept separate from main morphology for efficiency +# (large alphabet slows everything down) +analyser-emojis-gt-desc.%: morphology/.generated/emojis.% + $(AM_V_XFST_TOOL)$(PRINTF) "\ + load stack $<\n\ + $(INVERT_HFST)\ + save stack $@\n\ + quit\n" | $(XFST_TOOL) + CLEANFILES=$(GT_FSTs_NO_INSTALL) $(GT_RAW) diff --git a/am-shared/src-morphology-dir-include.am b/am-shared/src-morphology-dir-include.am index d0379b79..c604fbd7 100644 --- a/am-shared/src-morphology-dir-include.am +++ b/am-shared/src-morphology-dir-include.am @@ -80,7 +80,7 @@ GIELLA_LOCAL_SRC_TARGETS+=\ GIELLA_LOCAL_SRC_TARGETS+=$(patsubst %.xfscript,%.hfst,$(GT_LOCAL_XFSCRIPT_SRCS)) # We need to explicitly add url.?fst here, as the generated targets only cover # url.tmp.?fst -GIELLA_LOCAL_SRC_TARGETS+=.generated/url.hfst +GIELLA_LOCAL_SRC_TARGETS+=.generated/url.hfst .generated/emojis.hfst endif # CAN_HFST if CAN_XFST @@ -89,7 +89,7 @@ GIELLA_LOCAL_SRC_TARGETS+=\ GIELLA_LOCAL_SRC_TARGETS+=$(patsubst %.xfscript,%.xfst,$(GT_LOCAL_XFSCRIPT_SRCS)) # We need to explicitly add url.?fst here, as the generated targets only cover # url.tmp.?fst -GIELLA_LOCAL_SRC_TARGETS+=.generated/url.xfst +GIELLA_LOCAL_SRC_TARGETS+=.generated/url.xfst .generated/emojis.xfst endif # CAN_XFST if CAN_FOMA @@ -98,7 +98,7 @@ GIELLA_LOCAL_SRC_TARGETS+=\ GIELLA_LOCAL_SRC_TARGETS+=$(patsubst %.xfscript,%.foma,$(GT_LOCAL_XFSCRIPT_SRCS)) # We need to explicitly add url.?fst here, as the generated targets only cover # url.tmp.?fst -GIELLA_LOCAL_SRC_TARGETS+=.generated/url.foma +GIELLA_LOCAL_SRC_TARGETS+=.generated/url.foma .generated/emojis.foma endif # CAN_FOMA if WANT_L2 diff --git a/am-shared/tools-tokenisers-dir-include.am b/am-shared/tools-tokenisers-dir-include.am index 208495ce..bdad827c 100644 --- a/am-shared/tools-tokenisers-dir-include.am +++ b/am-shared/tools-tokenisers-dir-include.am @@ -164,7 +164,8 @@ analyser_relabelled-%.hfst: analyser-%.hfst \ # This is the real tokeniser build target for all non-TTS tokenisers: tokeniser-%.pmhfst: tokeniser-%.pmscript \ analyser_relabelled-%.hfst \ - analyser-url-gt-desc.hfst + analyser-url-gt-desc.hfst \ + analyser-emojis-gt-desc.hfst $(AM_V_PM2FST)$(HFST_PMATCH2FST) < $< > $@.tmp $(AM_V_at)mv -f $@.tmp $@ diff --git a/am-shared/tools-tokenisers-filters-dir-include.am b/am-shared/tools-tokenisers-filters-dir-include.am index 8665acb9..c0f461c9 100644 --- a/am-shared/tools-tokenisers-filters-dir-include.am +++ b/am-shared/tools-tokenisers-filters-dir-include.am @@ -33,11 +33,13 @@ GT_DISAMB_TOKENISERS_FILTER_SRCS= GT_GRAMCHECK_TOKENISERS_FILTER_SRCS= GT_TTS_TOKENISERS_FILTER_SRCS= GT_TOKENISER_URLFILTER_SRC= +GT_TOKENISER_EMOFILTER_SRC= if WANT_TOKENISERS GT_DISAMB_TOKENISERS_FILTER_SRCS+=make-disamb-CG-tags.regex \ disamb-tokeniser-flags.regex GT_TOKENISER_URLFILTER_SRC+=make-url-CG-tags.regex +GT_TOKENISER_EMOFILTER_SRC+=make-emojis-CG-tags.regex endif # WANT_TOKENISERS @@ -45,12 +47,14 @@ if WANT_GRAMCHECK GT_GRAMCHECK_TOKENISERS_FILTER_SRCS+=make-gramcheck-CG-tags.regex \ gramcheck-tokeniser-flags.regex GT_TOKENISER_URLFILTER_SRC+=make-url-CG-tags.regex +GT_TOKENISER_EMOFILTER_SRC+=make-emojis-CG-tags.regex endif # WANT_GRAMCHECK if WANT_TTS GT_TTS_TOKENISERS_FILTER_SRCS+=make-tts-CG-tags.regex \ tts-tokeniser-flags.regex GT_TOKENISER_URLFILTER_SRC+=make-url-CG-tags.regex +GT_TOKENISER_EMOFILTER_SRC+=make-emojis-CG-tags.regex endif # WANT_TTS if HAVE_ALT_ORTHS @@ -83,6 +87,7 @@ GT_TOKENISERS_FILTER_ALL_SRCS=\ $(GT_GRAMCHECK_TOKENISERS_FILTER_SRCS) \ $(GT_TTS_TOKENISERS_FILTER_SRCS) \ $(sort $(GT_TOKENISER_URLFILTER_SRC) ) \ + $(GT_TOKENISER_EMOFILTER_SRC) \ $(GT_LOCAL_TOKENISERS_FILTER_SRCS) \ $(ALT_ORTH_TOKENISER_FILTER_SRCS) \ $(GT_LOCAL_COPY_FILTER_SRCS) \ diff --git a/configure.ac b/configure.ac index 9dac7ac4..6ec1298d 100644 --- a/configure.ac +++ b/configure.ac @@ -76,6 +76,28 @@ AS_IF([test x$UCONV = xfalse], ])]) AC_PATH_PROG([PRECOMMIT], [pre-commit], [false]) +# check for shared version (not fatal yet) +# +# c/p from giella-macros.m4: +# Get the relative path from pwd to where src dir is: +MYSRCDIR=$srcdir +# Get the absolute path to the present dir: +BUILD_DIR_PATH=$(pwd) +# Combine to get the full path to the scrdir: +THIS_TOP_SRC_DIR=$BUILD_DIR_PATH/$MYSRCDIR +_gt_shared_mul_min_version=0.0.3 +AC_MSG_CHECKING([whether shared-mul is at least $_gt_shared_mul_min_version]) +AS_IF([test -d "$THIS_TOP_SRC_DIR"/../shared-mul], + [AS_IF([pkg-config --atleast-version=$_gt_shared_mul_min_version --with-path="$THIS_TOP_SRC_DIR"/../shared-mul giella-shared-mul], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + AC_MSG_WARN([shared-mul is too old for this giella-core, please update])])], + [AS_IF([pkg-config --atleast-version=$_gt_shared_mul_min_version giella-shared-mul], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + AC_MSG_WARN([shared-mul is too old for this giella-core, please update])])]) + + AC_CONFIG_FILES([Makefile \ $PACKAGE.pc \