diff --git a/am-shared/tools-spellcheckers-fstbased-mobile_weights-dir-include.am b/am-shared/tools-spellcheckers-fstbased-mobile_weights-dir-include.am index 79fd3100..f1ded5b5 100644 --- a/am-shared/tools-spellcheckers-fstbased-mobile_weights-dir-include.am +++ b/am-shared/tools-spellcheckers-fstbased-mobile_weights-dir-include.am @@ -38,19 +38,19 @@ mob_corpus_size_limit_command=$(shell \ $(AM_V_GEN)$(HFST_TXT2FST) $(HFST_FLAGS) -f openfst-tropical $< -o $@ # sort the clean corpus: -%.sort.txt: weights/%.clean.txt - $(AM_V_GEN)sort < $< > $@ +.generated/%.sort.txt: weights/%.clean.txt $(GENDIR) + $(AM_V_GEN)LC_ALL=C.utf8 sort < $< > $@ # token count: -%.wordcount.txt: %.sort.txt +.generated/%.wordcount.txt: .generated/%.sort.txt $(AM_V_GEN)wc -l < $< > $@ # Unique the sorted, clean corpus: -mob_%.uniq.txt: %.sort.txt +.generated/mob_%.uniq.txt: .generated/%.sort.txt $(AM_V_GEN)uniq -c < $< | sort -nr $(mob_corpus_size_limit_command) > $@ # type count: -mob_%.typecount.txt: mob_%.uniq.txt +.generated/mob_%.typecount.txt: .generated/mob_%.uniq.txt $(AM_V_GEN)wc -l < $< > $@ # calculate unit weight, smoothed using ALPHA: @@ -66,16 +66,16 @@ mob_%.typecount.txt: mob_%.uniq.txt # | $(BC) -l > $@ # # add tropical weights to the corpus: -mob_%.tropical.txt: mob_%.uniq.txt %.wordcount.txt mob_%.typecount.txt +.generated/mob_%.tropical.txt: .generated/mob_%.uniq.txt .generated/%.wordcount.txt .generated/mob_%.typecount.txt $(AM_V_GEN)cat $< |\ - $(GAWK) -v CS="$$(cat $*.wordcount.txt)" \ - -v DS="$$(cat mob_$*.typecount.txt)" \ + $(GAWK) -v CS="$$(cat .generated/$*.wordcount.txt)" \ + -v DS="$$(cat .generated/mob_$*.typecount.txt)" \ -v ALPHA=$(ALPHA) \ -f $(GTCORE)/scripts/uniq_count2tropical_weight.awk \ > $@ # build an fst of surface forms with tropical weights for each word form: -.generated/mob_%.surfs.hfst: mob_%.tropical.txt $(GENDIR) +.generated/mob_%.surfs.hfst: .generated/mob_%.tropical.txt $(AM_V_STR2FST)cat $< |\ $(HFST_STRINGS2FST) -j $(HFST_FLAGS) -f openfst-tropical -o $@ @@ -89,10 +89,10 @@ mob_%.tropical.txt: mob_%.uniq.txt %.wordcount.txt mob_%.typecount.txt # Add the unit weight to each unit in compounds, both dynamic and lexical: .generated/mob_unitweighted.hfst: $(UW_SPELLER_SRC) \ - $(MOB_UNITWEIGHT) \ + .generated/$(MOB_UNITWEIGHT) \ $(srcdir)/weights/word-boundary.txt $(AM_V_REWEIGHT)$(HFST_REWEIGHT) $(HFST_FLAGS) \ - -e -a $$(cat $(MOB_UNITWEIGHT)) $< \ + -e -a $$(cat .generated/$(MOB_UNITWEIGHT)) $< \ -o $@ # Keep these intermediate targets when building using --debug: