Skip to content

Commit 6472edc

Browse files
committed
Converted into a package
Separated tests from main function
1 parent 392a63a commit 6472edc

File tree

6 files changed

+189
-140
lines changed

6 files changed

+189
-140
lines changed

romanize.py

Lines changed: 0 additions & 140 deletions
This file was deleted.

romanize/__init__.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import print_function
3+
4+
__author__ = 'George Schizas'
5+
__version__ = '1.0.1'
6+
7+
"""
8+
This is the "romanize" module.
9+
10+
The romanize module supplies one function, romanize(). For example,
11+
12+
>>> romanize('Γιώργος Σχίζας')
13+
Giorgos Schizas
14+
15+
You can call it as a module to translate the rest of the command line
16+
17+
>>> python -m romanize.py Γιώργος
18+
'Giorgos'
19+
"""
20+
21+
22+
def romanize(greek_text):
23+
"""Return the ISO 843:1997 transcription of the input Greek text.
24+
Any non-Greek characters will be ignored and printed as they were."""
25+
26+
result = ""
27+
cursor = 0
28+
while cursor < len(greek_text):
29+
letter = greek_text[cursor]
30+
prev_letter = greek_text[cursor - 1] if cursor > 0 else ""
31+
next_letter = greek_text[cursor + 1] if cursor < len(greek_text) - 1 else ""
32+
third_letter = greek_text[cursor + 2] if cursor < len(greek_text) - 2 else ""
33+
34+
is_upper = (letter.upper() == letter)
35+
is_upper_next = (next_letter.upper() == next_letter)
36+
letter = letter.lower()
37+
prev_letter = prev_letter.lower()
38+
next_letter = next_letter.lower()
39+
third_letter = third_letter.lower()
40+
41+
simple_translation_greek = u'άβδέζήιίϊΐκλνξόπρσςτυύϋΰφωώ'
42+
simple_translation_latin = u'avdeziiiiiklnxoprsstyyyyfoo'
43+
44+
digraph_translation_greek = u'θχψ'
45+
digraph_translation_latin = u'thchps'
46+
47+
digraph_ypsilon_greek = u'αεη'
48+
digraph_ypsilon_latin = u'aei'
49+
digraph_ypsilon_beta = u'βγδζλμνραάεέηήιίϊΐοόυύϋΰωώ'
50+
digraph_ypsilon_phi = u'θκξπστφχψ'
51+
52+
if letter in simple_translation_greek:
53+
new_letter = simple_translation_latin[simple_translation_greek.index(letter)]
54+
elif letter in digraph_translation_greek:
55+
diphthong_index = digraph_translation_greek.index(letter)
56+
new_letter = digraph_translation_latin[diphthong_index * 2:diphthong_index * 2 + 2]
57+
elif letter in digraph_ypsilon_greek:
58+
new_letter = digraph_ypsilon_latin[digraph_ypsilon_greek.index(letter)]
59+
if next_letter in [u'υ', u'ύ']:
60+
if third_letter in digraph_ypsilon_beta:
61+
new_letter += u'v'
62+
cursor += 1
63+
elif third_letter in digraph_ypsilon_phi:
64+
new_letter += u'f'
65+
cursor += 1
66+
elif letter == u'γ':
67+
if next_letter == u'γ':
68+
new_letter = u'ng'
69+
cursor += 1
70+
elif next_letter == u'ξ':
71+
new_letter = u'nx'
72+
cursor += 1
73+
elif next_letter in u'χ':
74+
new_letter = u'nch'
75+
cursor += 1
76+
else:
77+
new_letter = u'g'
78+
elif letter == u'μ':
79+
if next_letter == u'π':
80+
if prev_letter.strip() == "" or third_letter.strip() == "":
81+
new_letter = u'b'
82+
cursor += 1
83+
else:
84+
new_letter = u'mp'
85+
cursor += 1
86+
else:
87+
new_letter = u'm'
88+
elif letter == u'ο':
89+
new_letter = u'o'
90+
if next_letter in [u'υ', u'ύ']:
91+
new_letter += u'u'
92+
cursor += 1
93+
else:
94+
new_letter = letter
95+
if is_upper:
96+
new_letter = new_letter[0].upper() + (new_letter[1:].upper() if is_upper_next else new_letter[1:].lower())
97+
result += new_letter
98+
cursor += 1
99+
return result
100+
101+
102+
def main():
103+
import sys
104+
105+
if len(sys.argv) > 1:
106+
print(romanize(' '.join(sys.argv[1:])))
107+
else:
108+
words = sys.stdin.read()
109+
print(romanize(words))
110+
111+
112+
if __name__ == "__main__":
113+
main()

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[metadata]
2+
description-file = README.md

setup.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from __future__ import print_function
2+
from setuptools import setup, find_packages
3+
from setuptools.command.test import test as TestCommand
4+
import io
5+
import codecs
6+
import os
7+
import sys
8+
9+
import romanize
10+
11+
here = os.path.abspath(os.path.dirname(__file__))
12+
13+
14+
def read(*filenames, **kwargs):
15+
encoding = kwargs.get('encoding', 'utf-8')
16+
sep = kwargs.get('sep', '\n')
17+
buf = []
18+
for filename in filenames:
19+
with io.open(filename, encoding=encoding) as f:
20+
buf.append(f.read())
21+
return sep.join(buf)
22+
23+
24+
long_description = read('README.md', 'CHANGES.md')
25+
26+
setup(
27+
name='Romanize',
28+
version=romanize.__version__,
29+
url='https://github.com/gschizas/RomanizePython',
30+
license='Apache Software License',
31+
author='George Schizas',
32+
author_email='[email protected]',
33+
description='Transcribe Greek text to Latin alphabet using the ISO 843:1997 standard (also known as ELOT 743:1987)',
34+
long_description=long_description,
35+
packages=['romanize'],
36+
include_package_data=True,
37+
platforms='any',
38+
classifiers=[
39+
'Programming Language :: Python',
40+
'Development Status :: 5 - Production/Stable',
41+
'Natural Language :: English',
42+
'Natural Language :: Greek',
43+
'Intended Audience :: Developers',
44+
'License :: OSI Approved :: Apache Software License',
45+
'Operating System :: OS Independent',
46+
'Topic :: Software Development :: Libraries :: Python Modules',
47+
'Topic :: Text Processing'
48+
],
49+
test_suite="tests"
50+
)

tests/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

tests/tests.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import os
4+
import sys
5+
import unittest
6+
from romanize import romanize
7+
8+
9+
class TestCommonWords(unittest.TestCase):
10+
def test_inline(self):
11+
self.assertEqual(romanize(u'Γιώργος Σχίζας'), u'Giorgos Schizas')
12+
self.assertEqual(romanize(u'Θανάσης ΘΑΝΑΣΗΣ θΑνάσης ΘΑνάσης'), u'Thanasis THANASIS thAnasis THAnasis')
13+
self.assertEqual(romanize(u'Αντώνης Ψαράς με ψάρια'), u'Antonis Psaras me psaria')
14+
self.assertEqual(romanize(u'Αυγά αύριο παύση'), u'Avga avrio pafsi')
15+
self.assertEqual(romanize(u'Άγγελος αρχάγγελος'), u'Angelos archangelos')
16+
self.assertEqual(romanize(u'Ξάδελφος εξ αγχιστείας'), u'Xadelfos ex anchisteias')
17+
self.assertEqual(romanize(u'Ακούμπα κάτω τα μπαούλα Γιακούμπ'), u'Akoumpa kato ta baoula Giakoub')
18+
self.assertEqual(romanize(u'Ζεύξη Ρίου-Αντιρρίου'), u'Zefxi Riou-Antirriou')
19+
self.assertEqual(romanize(u'μεταγραφή'), u'metagrafi')
20+
self.assertEqual(romanize(u'Ούτε το αγγούρι ούτε η αγκινάρα γράφονται με γξ'),
21+
u'Oute to angouri oute i agkinara grafontai me nx')
22+
self.assertEqual(romanize(u'ΟΥΡΑΝΟΣ Ουρανός ουρανός οϋρανός'), u'OURANOS Ouranos ouranos oyranos')
23+
self.assertEqual(romanize(u'Έχω ελέγξει το 100% της μεθόδου'), u'Echo elenxei to 100% tis methodou')

0 commit comments

Comments
 (0)