diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..37658f5 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include timur/data/syms.txt diff --git a/requirements.txt b/requirements.txt index 3612806..bd60f14 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ +click http://www.opengrm.org/twiki/pub/GRM/PyniniDownload/pynini-2.0.0.tar.gz#egg=pynini diff --git a/setup.py b/setup.py index 39bf1d4..5624639 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ author_email='wuerzner@gmail.com', license=license, packages=find_packages(exclude=('tests', 'docs')), + include_package_data=True, install_requires=[ ], entry_points={ diff --git a/syms.txt b/timur/data/syms.txt similarity index 100% rename from syms.txt rename to timur/data/syms.txt diff --git a/timur/fsts/__init__.py b/timur/fsts/__init__.py index a9a8e00..35eab7c 100644 --- a/timur/fsts/__init__.py +++ b/timur/fsts/__init__.py @@ -1 +1,2 @@ from .num_fst import num_fst +from .phon_fst import phon_fst diff --git a/timur/fsts/phon_fst.py b/timur/fsts/phon_fst.py new file mode 100644 index 0000000..d4e025c --- /dev/null +++ b/timur/fsts/phon_fst.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import pynini + +from timur.helpers import union +from timur.helpers import concat + +def phon_fst(symbol_table): + ''' + Orthographic and phonological surface realizations rules + ''' + cons_lower = pynini.string_map(["b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z", "ß"], input_token_type=symbol_table, output_token_type=symbol_table) + cons_upper = pynini.string_map(["B", "C", "D", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "X", "Y", "Z"], input_token_type=symbol_table, output_token_type=symbol_table) + #return cons.optimize() diff --git a/timur/helpers/helpers.py b/timur/helpers/helpers.py index 7e55eb4..189ccb1 100644 --- a/timur/helpers/helpers.py +++ b/timur/helpers/helpers.py @@ -29,7 +29,7 @@ def load_alphabet(source, auto_singletons=True): symbol = chr(i) if symbol.isprintable() and not symbol.isspace(): syms.add_symbol(symbol) - for symbol in source: + for symbol in source.split('\n'): if symbol.startswith('#'): continue syms.add_symbol(symbol.strip()) diff --git a/timur/scripts/timur.py b/timur/scripts/timur.py index b31a41f..606943e 100644 --- a/timur/scripts/timur.py +++ b/timur/scripts/timur.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -import pynini +import click, pynini + +from pkg_resources import resource_string, Requirement from timur import helpers from timur import fsts @@ -20,17 +22,16 @@ def construct_any(symbol_table): sym_it.next() return ANY -def phon_fst(symbol_table): - ''' - Orthographic and phonological surface realizations rules - ''' - cons_lower = pynini.string_map(["b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z", "ß"], input_token_type=symbol_table, output_token_type=symbol_table) - cons_upper = pynini.string_map(["B", "C", "D", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "X", "Y", "Z"], input_token_type=symbol_table, output_token_type=symbol_table) - #return cons.optimize() +@click.group() +def cli(): + pass -def cli(): - syms = helpers.load_alphabet(open("syms.txt")) +@cli.command(name="compile") +@click.argument('lexicon') +def compile(lexicon): + + syms = helpers.load_alphabet(resource_string(Requirement.parse("timur"), 'timur/data/syms.txt').decode("utf-8")) #phon = phon_fst(syms) #phon.draw("test.dot") @@ -38,4 +39,4 @@ def cli(): ANY = construct_any(syms) - print(syms.member('A')) + print(syms.member(''))