Skip to content

Commit

Permalink
Merge pull request #891 from PyThaiNLP/add-thai-morse
Browse files Browse the repository at this point in the history
Add pythainlp.util.morse
  • Loading branch information
wannaphong authored Dec 15, 2023
2 parents dd11578 + 5079d3f commit b3c1143
Show file tree
Hide file tree
Showing 3 changed files with 232 additions and 16 deletions.
10 changes: 10 additions & 0 deletions docs/api/util.rst
Original file line number Diff line number Diff line change
Expand Up @@ -277,3 +277,13 @@ Modules
:members:

The `Trie` class is a data structure for efficient dictionary operations. It's a valuable resource for managing and searching word lists and dictionaries in a structured and efficient manner.

.. autofunction:: pythainlp.util.morse.morse_encode
:noindex:

The `pythainlp.util.morse.morse_encode` function is convert text to Morse code.

.. autofunction:: pythainlp.util.morse.morse_decode
:noindex:

The `pythainlp.util.morse.morse_decode` function is convert Morse code to text.
197 changes: 197 additions & 0 deletions pythainlp/util/morse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0

THAI_MORSE_CODE = {
"ก": "--.",
"ข": "-.-.",
"ค": "-.-",
"ฆ": "-.-",
"ง": "-.--.",
"จ": "-..-.",
"ฉ": "----",
"ช": "-..-",
"ฌ": "-..-",
"ซ": "--..",
"ญ": ".---",
"ด": "-..",
"ถ": "-.-..",
"ฐ": "-.-..",
"ฑ": "-..--",
"ฒ": "-..--",
"ท": "-..--",
"ธ": "-..--",
"ณ": "-.",
"น": "-.",
"บ": "-...",
"ป": ".--.",
"ผ": "--.-",
"ฝ": "-.-.-",
"พ": ".--..",
"ภ": ".--..",
"ฟ": "..-.",
"ม": "--",
"ย": "-.--",
"ร": ".-.",
"ล": ".-..",
"ฬ": ".-..",
"ว": ".--",
"ศ": "...",
"ษ": "...",
"ส": "...",
"ห": "....",
"ฮ": "--.--",
"ฎ": "-..",
"ต": "-",
"ฏ": "-",
"ฤ": ".-.--",
"่": "..-",
"้": "...-",
"๊": "--...",
"๋": ".-.-.",
"ั": ".--.-",
"็": "---..",
"์": "--..-",
"ั้": ".---.",
"ฯ": "--.-.",
"ฯลฯ": "---.-",
"ๆ": "---.-",
"ะ": ".-...",
"า": ".-",
"ิ": "..-..",
"ี": "..",
"ึ": "..--.",
"ื": "..--",
"ุ": "..-.-",
"ู": "---.",
"เ": ".",
"แ": ".-.-",
"โ": "---",
"ไ": ".-..-",
"ใ": ".-..-",
"ำ": "...-.",
"อ": "-...-",
}

ENGLISH_MORSE_CODE = {
"A": ".-",
"B": "-...",
"C": "-.-.",
"D": "-..",
"E": ".",
"F": "..-.",
"G": "--.",
"H": "....",
"I": "..",
"J": ".---",
"K": "-.-",
"L": ".-..",
"M": "--",
"N": "-.",
"O": "---",
"P": ".--.",
"Q": "--.-",
"R": ".-.",
"S": "...",
"T": "-",
"U": "..-",
"V": "...-",
"W": ".--",
"X": "-..-",
"Y": "-.--",
"Z": "--..",
"0": "-----",
",": "--..--",
"1": ".----",
".": ".-.-.-",
"2": "..---",
"?": "..--..",
"3": "...--",
";": "-.-.-.",
"4": "....-",
":": "---...",
"5": ".....",
"'": ".----.",
"6": "-....",
"-": "-....-",
"7": "--...",
"/": "-..-.",
"8": "---..",
"(": "-.--.-",
}

decodingeng = {}
for key, val in ENGLISH_MORSE_CODE.items():
decodingeng[val] = key

decodingthai = {}
for key, val in THAI_MORSE_CODE.items():
decodingthai[val.replace(" ", "")] = key

for key, val in THAI_MORSE_CODE.items():
THAI_MORSE_CODE[key] = val.replace(" ", "")


def morse_encode(text: str, lang: str = "th") -> str:
"""
Convert text to Morse code (support Thai and English)
:param str text: Text
:param str lang: Language Code (*th* is Thai and *en* is English)
:return: Morse code
:rtype: str
:Example:
::
from pythainlp.util.morse import morse_encode
print(morse_encode("แมว", lang="th"))
# output: .-.- -- .--
print(morse_encode("cat", lang="en"))
# output: -.-. .- -
"""
if lang == "th": # Thai
return " ".join(
map(lambda x, g=THAI_MORSE_CODE.get: g(x, " "), text.upper())
)
elif lang == "en": # English
return " ".join(
map(lambda x, g=ENGLISH_MORSE_CODE.get: g(x, " "), text.upper())
)
else:
raise NotImplementedError(f"This function doesn't support {lang}.")


def morse_decode(morse_text: str, lang: str = "th") -> str:
"""
Simple Convert Morse code to text
Thai still have some wrong character problem that\
can fix by spell corrector.
:param str morse_text: Morse code
:param str lang: Language Code (*th* is Thai and *en* is English)
:return: Text
:rtype: str
:Example:
::
from pythainlp.util.morse import morse_decode
print(morse_decode(".-.- -- .--", lang="th"))
# output: แมว
print(morse_decode("-.-. .- -", lang="en"))
# output: CAT
"""
if lang == "th":
ans = "".join(
map(lambda x, g=decodingthai.get: g(x, ""), morse_text.split(" "))
)
return "".join(ans.split())
elif lang == "en":
ans = "".join(
map(lambda x, g=decodingeng.get: g(x, " "), morse_text.split(" "))
)
return " ".join(ans.split())
else:
raise NotImplementedError(f"This function doesn't support {lang}.")
41 changes: 25 additions & 16 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,54 +18,55 @@
arabic_digit_to_thai_digit,
bahttext,
collate,
countthai,
convert_years,
count_thai_chars,
countthai,
dict_trie,
display_thai_char,
digit_to_text,
emoji_to_thai,
eng_to_thai,
find_keyword,
ipa_to_rtgs,
is_native_thai,
isthai,
isthaichar,
normalize,
now_reign_year,
num_to_thaiword,
maiyamok,
nectec_to_ipa,
rank,
reign_year_to_ad,
remove_dangling,
remove_dup_spaces,
remove_tone_ipa,
remove_tonemark,
remove_trailing_repeat_consonants,
remove_zw,
rhyme,
text_to_arabic_digit,
text_to_num,
text_to_thai_digit,
thaiword_to_date,
thai_digit_to_arabic_digit,
thai_keyboard_dist,
thai_to_eng,
thai_strftime,
thai_strptime,
thai_word_tone_detector,
thaiword_to_date,
thaiword_to_num,
thaiword_to_time,
time_to_thaiword,
thai_to_eng,
tis620_to_utf8,
to_idna,
thaiword_to_num,
thai_keyboard_dist,
text_to_num,
words_to_num,
tone_detector,
sound_syllable,
syllable_length,
syllable_open_close_detector,
tone_detector,
thai_word_tone_detector,
convert_years,
thai_strptime,
nectec_to_ipa,
ipa_to_rtgs,
remove_tone_ipa,
tis620_to_utf8,
remove_trailing_repeat_consonants,
words_to_num,
)
from pythainlp.util.morse import morse_decode, morse_encode
from pythainlp.util.spell_words import spell_word


Expand Down Expand Up @@ -835,5 +836,13 @@ def test_remove_repeat_consonants(self):
"อืมมม คุณมีบุคลิกที่เริ่ด ฉันจะให้เกรดดีกับคุณ\nนี่เป็นความลับ",
)

def test_morse_encode(self):
self.assertEqual(morse_encode("แมว", lang="th"), ".-.- -- .--")
self.assertEqual(morse_encode("cat", lang="en"), "-.-. .- -")

def test_morse_decode(self):
self.assertEqual(morse_decode(".-.- -- .--", lang="th"), "แมว")
self.assertEqual(morse_decode("-.-. .- -", lang="en"), "CAT")

# def test_abbreviation_to_full_text(self):
# self.assertIsInstance(abbreviation_to_full_text("รร.ของเราน่าอยู่", list))

0 comments on commit b3c1143

Please sign in to comment.