Skip to content

Commit

Permalink
Modify package structure, start using Click, add phon_fst
Browse files Browse the repository at this point in the history
  • Loading branch information
wrznr committed Nov 4, 2018
1 parent 7a6dacb commit ae36ca3
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 12 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include timur/data/syms.txt
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
click
http://www.opengrm.org/twiki/pub/GRM/PyniniDownload/pynini-2.0.0.tar.gz#egg=pynini
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
author_email='[email protected]',
license=license,
packages=find_packages(exclude=('tests', 'docs')),
include_package_data=True,
install_requires=[
],
entry_points={
Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions timur/fsts/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .num_fst import num_fst
from .phon_fst import phon_fst
15 changes: 15 additions & 0 deletions timur/fsts/phon_fst.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import

import pynini

from timur.helpers import union
from timur.helpers import concat

def phon_fst(symbol_table):
'''
Orthographic and phonological surface realizations rules
'''
cons_lower = pynini.string_map(["b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z", "ß"], input_token_type=symbol_table, output_token_type=symbol_table)
cons_upper = pynini.string_map(["B", "C", "D", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "X", "Y", "Z"], input_token_type=symbol_table, output_token_type=symbol_table)
#return cons.optimize()
2 changes: 1 addition & 1 deletion timur/helpers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def load_alphabet(source, auto_singletons=True):
symbol = chr(i)
if symbol.isprintable() and not symbol.isspace():
syms.add_symbol(symbol)
for symbol in source:
for symbol in source.split('\n'):
if symbol.startswith('#'):
continue
syms.add_symbol(symbol.strip())
Expand Down
23 changes: 12 additions & 11 deletions timur/scripts/timur.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import

import pynini
import click, pynini

from pkg_resources import resource_string, Requirement

from timur import helpers
from timur import fsts
Expand All @@ -20,22 +22,21 @@ def construct_any(symbol_table):
sym_it.next()
return ANY

def phon_fst(symbol_table):
'''
Orthographic and phonological surface realizations rules
'''
cons_lower = pynini.string_map(["b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z", "ß"], input_token_type=symbol_table, output_token_type=symbol_table)
cons_upper = pynini.string_map(["B", "C", "D", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "X", "Y", "Z"], input_token_type=symbol_table, output_token_type=symbol_table)
#return cons.optimize()
@click.group()
def cli():
pass


def cli():
syms = helpers.load_alphabet(open("syms.txt"))
@cli.command(name="compile")
@click.argument('lexicon')
def compile(lexicon):

syms = helpers.load_alphabet(resource_string(Requirement.parse("timur"), 'timur/data/syms.txt').decode("utf-8"))

#phon = phon_fst(syms)
#phon.draw("test.dot")
num_stems = fsts.num_fst(syms)

ANY = construct_any(syms)

print(syms.member('A'))
print(syms.member('<QUANT>'))

0 comments on commit ae36ca3

Please sign in to comment.