Skip to content

Commit

Permalink
compile emojis
Browse files Browse the repository at this point in the history
  • Loading branch information
flammie committed Dec 16, 2024
1 parent 3831d2b commit 648ae61
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 7 deletions.
18 changes: 15 additions & 3 deletions am-shared/src-fst-dir-include.am
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,16 @@ GT_FSTs_NO_INSTALL=$(subst hfstol,hfst,$(filter %.hfstol, \
# Add other targets that should be built but not installed,
# such as the URL analyser, to the GT_FSTs_NO_INSTALL variable:
if CAN_HFST
GT_FSTs_NO_INSTALL+=analyser-url-gt-desc.hfst
GT_FSTs_NO_INSTALL+=analyser-url-gt-desc.hfst\
analyser-emojis-gt-desc.hfst
endif # CAN_HFST
if CAN_XFST
GT_FSTs_NO_INSTALL+=analyser-url-gt-desc.xfst
GT_FSTs_NO_INSTALL+=analyser-url-gt-desc.xfst\
analyser-emojis-gt-desc.xfst
endif # CAN_XFST
if CAN_FOMA
GT_FSTs_NO_INSTALL+=analyser-url-gt-desc.foma
GT_FSTs_NO_INSTALL+=analyser-url-gt-desc.foma\
analyser-emojis-gt-desc.foma
endif # CAN_FOMA

##### BEGIN Hfst target list #####
Expand Down Expand Up @@ -968,6 +971,15 @@ analyser-url-gt-desc.%: morphology/.generated/url.%
save stack $@\n\
quit\n" | $(XFST_TOOL)

# also emojis should be kept separate from main morphology for efficiency
# (large alphabet slows everything down)
analyser-emojis-gt-desc.%: morphology/.generated/emojis.%
$(AM_V_XFST_TOOL)$(PRINTF) "\
load stack $<\n\
$(INVERT_HFST)\
save stack $@\n\
quit\n" | $(XFST_TOOL)


CLEANFILES=$(GT_FSTs_NO_INSTALL) $(GT_RAW)

Expand Down
6 changes: 3 additions & 3 deletions am-shared/src-morphology-dir-include.am
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ GIELLA_LOCAL_SRC_TARGETS+=\
GIELLA_LOCAL_SRC_TARGETS+=$(patsubst %.xfscript,%.hfst,$(GT_LOCAL_XFSCRIPT_SRCS))
# We need to explicitly add url.?fst here, as the generated targets only cover
# url.tmp.?fst
GIELLA_LOCAL_SRC_TARGETS+=.generated/url.hfst
GIELLA_LOCAL_SRC_TARGETS+=.generated/url.hfst .generated/emojis.hfst
endif # CAN_HFST

if CAN_XFST
Expand All @@ -89,7 +89,7 @@ GIELLA_LOCAL_SRC_TARGETS+=\
GIELLA_LOCAL_SRC_TARGETS+=$(patsubst %.xfscript,%.xfst,$(GT_LOCAL_XFSCRIPT_SRCS))
# We need to explicitly add url.?fst here, as the generated targets only cover
# url.tmp.?fst
GIELLA_LOCAL_SRC_TARGETS+=.generated/url.xfst
GIELLA_LOCAL_SRC_TARGETS+=.generated/url.xfst .generated/emojis.xfst
endif # CAN_XFST

if CAN_FOMA
Expand All @@ -98,7 +98,7 @@ GIELLA_LOCAL_SRC_TARGETS+=\
GIELLA_LOCAL_SRC_TARGETS+=$(patsubst %.xfscript,%.foma,$(GT_LOCAL_XFSCRIPT_SRCS))
# We need to explicitly add url.?fst here, as the generated targets only cover
# url.tmp.?fst
GIELLA_LOCAL_SRC_TARGETS+=.generated/url.foma
GIELLA_LOCAL_SRC_TARGETS+=.generated/url.foma .generated/emojis.foma
endif # CAN_FOMA

if WANT_L2
Expand Down
3 changes: 2 additions & 1 deletion am-shared/tools-tokenisers-dir-include.am
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ analyser_relabelled-%.hfst: analyser-%.hfst \
# This is the real tokeniser build target for all non-TTS tokenisers:
tokeniser-%.pmhfst: tokeniser-%.pmscript \
analyser_relabelled-%.hfst \
analyser-url-gt-desc.hfst
analyser-url-gt-desc.hfst \
analyser-emojis-gt-desc.hfst
$(AM_V_PM2FST)$(HFST_PMATCH2FST) < $< > [email protected]
$(AM_V_at)mv -f [email protected] $@

Expand Down
5 changes: 5 additions & 0 deletions am-shared/tools-tokenisers-filters-dir-include.am
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,28 @@ GT_DISAMB_TOKENISERS_FILTER_SRCS=
GT_GRAMCHECK_TOKENISERS_FILTER_SRCS=
GT_TTS_TOKENISERS_FILTER_SRCS=
GT_TOKENISER_URLFILTER_SRC=
GT_TOKENISER_EMOFILTER_SRC=

if WANT_TOKENISERS
GT_DISAMB_TOKENISERS_FILTER_SRCS+=make-disamb-CG-tags.regex \
disamb-tokeniser-flags.regex
GT_TOKENISER_URLFILTER_SRC+=make-url-CG-tags.regex
GT_TOKENISER_EMOFILTER_SRC+=make-emojis-CG-tags.regex

endif # WANT_TOKENISERS

if WANT_GRAMCHECK
GT_GRAMCHECK_TOKENISERS_FILTER_SRCS+=make-gramcheck-CG-tags.regex \
gramcheck-tokeniser-flags.regex
GT_TOKENISER_URLFILTER_SRC+=make-url-CG-tags.regex
GT_TOKENISER_EMOFILTER_SRC+=make-emojis-CG-tags.regex
endif # WANT_GRAMCHECK

if WANT_TTS
GT_TTS_TOKENISERS_FILTER_SRCS+=make-tts-CG-tags.regex \
tts-tokeniser-flags.regex
GT_TOKENISER_URLFILTER_SRC+=make-url-CG-tags.regex
GT_TOKENISER_EMOFILTER_SRC+=make-emojis-CG-tags.regex
endif # WANT_TTS

if HAVE_ALT_ORTHS
Expand Down Expand Up @@ -83,6 +87,7 @@ GT_TOKENISERS_FILTER_ALL_SRCS=\
$(GT_GRAMCHECK_TOKENISERS_FILTER_SRCS) \
$(GT_TTS_TOKENISERS_FILTER_SRCS) \
$(sort $(GT_TOKENISER_URLFILTER_SRC) ) \
$(GT_TOKENISER_EMOFILTER_SRC) \
$(GT_LOCAL_TOKENISERS_FILTER_SRCS) \
$(ALT_ORTH_TOKENISER_FILTER_SRCS) \
$(GT_LOCAL_COPY_FILTER_SRCS) \
Expand Down
22 changes: 22 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,28 @@ AS_IF([test x$UCONV = xfalse],
])])
AC_PATH_PROG([PRECOMMIT], [pre-commit], [false])

# check for shared version (not fatal yet)
#
# c/p from giella-macros.m4:
# Get the relative path from pwd to where src dir is:
MYSRCDIR=$srcdir
# Get the absolute path to the present dir:
BUILD_DIR_PATH=$(pwd)
# Combine to get the full path to the scrdir:
THIS_TOP_SRC_DIR=$BUILD_DIR_PATH/$MYSRCDIR
_gt_shared_mul_min_version=0.0.3
AC_MSG_CHECKING([whether shared-mul is at least $_gt_shared_mul_min_version])
AS_IF([test -d "$THIS_TOP_SRC_DIR"/../shared-mul],
[AS_IF([pkg-config --atleast-version=$_gt_shared_mul_min_version --with-path="$THIS_TOP_SRC_DIR"/../shared-mul giella-shared-mul],
[AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])
AC_MSG_WARN([shared-mul is too old for this giella-core, please update])])],
[AS_IF([pkg-config --atleast-version=$_gt_shared_mul_min_version giella-shared-mul],
[AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])
AC_MSG_WARN([shared-mul is too old for this giella-core, please update])])])



AC_CONFIG_FILES([Makefile \
$PACKAGE.pc \
Expand Down

0 comments on commit 648ae61

Please sign in to comment.