Skip to content

Commit

Permalink
some converters
Browse files Browse the repository at this point in the history
  • Loading branch information
flammie committed Nov 27, 2024
1 parent dcbf44c commit 470df26
Show file tree
Hide file tree
Showing 3 changed files with 644 additions and 0 deletions.
34 changes: 34 additions & 0 deletions scripts/unimorph/generate-word.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash

if test $# -lt 2 ; then
echo "Usage: $0 WORD GENERATOR"
echo
echo WORD should be a lemma form in generator
echo GENERATOR should be generator-gt-desc.hfst of target language
exit 1
fi

cyclictags=$(dirname "$0")/excluded.tags
if test ! -f "$cyclictags" ; then
echo "missing $cyclictags please get them from giella-core"
exit 2
fi
cyclicRE=$(tr '\n' '|' < "$cyclictags" | sed -e 's/|*$//')
lemma=$1
lemmaRE=$(echo $1 | sed -e 's/./& /g')
shift
generator=$1
shift

if test ! -f "$generator" ; then
echo "Could not find generator automaton $generator"
exit 1
fi
echo "$cyclicRE +UglyHack | $lemmaRE [? - [ $cyclicRE ] ]* ;" |
sed -e 's/+/%+/g' -e 's:/:%/:g' -e 's/#/%#/g' -e 's/\^/%^/g' > generative.$lemma.regex
hfst-regexp2fst -i generative.$lemma.regex -o generative.$lemma.hfst -f foma
hfst-compose -F -1 generative.$lemma.hfst -2 "$generator" |\
hfst-fst2fst -f olw -o generator.$lemma.hfst
hfst-fst2strings -c 0 generator.$lemma.hfst > generated.$lemma
echo $lemma
uniq < generated.$lemma | "$(dirname "$0")"/convert.py
22 changes: 22 additions & 0 deletions scripts/universal_dependencies/ccat2ud.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
if test $# -lt 2; then
echo "Usage: $0 LANGCODE CORPUSDIR"
exit 1
fi
LANG=$1
CORPUS=$2
if ! test -e "$CORPUS" ; then
echo "$0: could not find $1, try:"
ls -R "$GTLANGS/corpus-$1/converted/"
exit 1
fi
ANALYSER="$GTLANGS/lang-$1/tools/analysers/modes/trace-$1-analyser.mode"
if ! test -f "$ANALYSER" ; then
echo "$0: missing analyser mode, configure $1 with --enable-analyser-tool"
echo "and make and make dev and try again"
exit 1
fi
ANALYSED=$(basename "$CORPUS" .xml).cg3text
ccat "$CORPUS" | "$ANALYSER" > "$ANALYSED"
python vislcg2ud.py -i "$ANALYSED" \
-o "$(basename "$ANALYSED" .cg3text).conllu"
Loading

0 comments on commit 470df26

Please sign in to comment.