Skip to content

Commit

Permalink
Add pythainlp.morpheme.nighit
Browse files Browse the repository at this point in the history
  • Loading branch information
wannaphong committed Jan 1, 2024
1 parent 1e94d32 commit dd2ddaa
Show file tree
Hide file tree
Showing 5 changed files with 244 additions and 0 deletions.
8 changes: 8 additions & 0 deletions docs/api/morpheme.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.. currentmodule:: pythainlp.morpheme

pythainlp.morpheme
==================

The `pythainlp.benchmarks` module is collect functions for morpheme analysis, word formation and more for Thai language.

.. autofunction:: nighit
155 changes: 155 additions & 0 deletions notebooks/create_words.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from pythainlp.transliterate import pronunciate\n",
"from pythainlp import thai_consonants"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'พุด-ทะ'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pronunciate(\"พุทธ\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'บู-ชา'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pronunciate(\"บูชา\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'อะ-นุก'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pronunciate(\"อนุค\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def nighit(w1,w2): # read: https://www.trueplookpanya.com/learning/detail/1180\n",
" if not str(w1).endswith('ํ') and len(w1)!=2:\n",
" raise NotImplementedError(f\"The function doesn't support {w1}.\")\n",
" list_w1 = list(w1)\n",
" list_w2 = list(w2)\n",
" newword = list()\n",
" newword.append(list_w1[0])\n",
" newword.append(\"\")\n",
" consonant_start = [i for i in list_w2 if i in set(thai_consonants)][0]\n",
" if consonant_start in [\"\",\"\",\"\",\"\",\"\"]:\n",
" newword.append(\"\")\n",
" elif consonant_start in [\"\",\"\",\"\",\"\"]:\n",
" newword.append(\"\")\n",
" elif consonant_start in [\"\",\"\",\"\",\"\"]:\n",
" newword.append(\"\")\n",
" elif consonant_start in [\"\",\"\",\"\",\"\",\"\"]:\n",
" newword.append(\"\")\n",
" elif consonant_start in [\"\",\"\",\"\",\"\"]:\n",
" newword.append(\"\")\n",
" elif consonant_start in [\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\"]:\n",
" newword.append(\"\")\n",
" else:\n",
" raise NotImplementedError(f\"The function doesn't support {w1} and {w2}.\")\n",
" newword.extend(list_w2)\n",
" return ''.join(newword)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"assert nighit(\"สํ\",\"คีต\")==\"สังคีต\"\n",
"assert nighit(\"สํ\",\"จร\")==\"สัญจร\"\n",
"assert nighit(\"สํ\",\"ฐาน\")==\"สัณฐาน\"\n",
"assert nighit(\"สํ\",\"นิษฐาน\")==\"สันนิษฐาน\"\n",
"assert nighit(\"สํ\",\"ปทา\")==\"สัมปทา\"\n",
"assert nighit(\"สํ\",\"โยค\")==\"สังโยค\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.13 ('base')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "a1d6ff38954a1cdba4cf61ffa51e42f4658fc35985cd256cd89123cae8466a39"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
8 changes: 8 additions & 0 deletions pythainlp/morpheme/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0

"""
PyThaiNLP morpheme
"""
from pythainlp.morpheme.word_formation import nighit
57 changes: 57 additions & 0 deletions pythainlp/morpheme/word_formation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0
from pythainlp.transliterate import pronunciate
from pythainlp import thai_consonants


def nighit(w1: str,w2: str)->str:
"""
Nighit (นิคหิต or ํ ) is the niggahita in Thai language for create new \
words from Pali language in Thai.
The function use simple method to create new Thai word from two words \
that the root is from Pali language.
Read more: https://www.trueplookpanya.com/learning/detail/1180
:param str w1: A Thai word that has a nighit.
:param str w2: A Thai word.
:return: Thai word.
:rtype: str
:Example:
::
from pythainlp.morpheme import nighit
assert nighit("สํ","คีต")=="สังคีต"
assert nighit("สํ","จร")=="สัญจร"
assert nighit("สํ","ฐาน")=="สัณฐาน"
assert nighit("สํ","นิษฐาน")=="สันนิษฐาน"
assert nighit("สํ","ปทา")=="สัมปทา"
assert nighit("สํ","โยค")=="สังโยค"
"""
if not str(w1).endswith('ํ') and len(w1)!=2:
raise NotImplementedError(f"The function doesn't support {w1}.")
list_w1 = list(w1)
list_w2 = list(w2)
newword = list()
newword.append(list_w1[0])
newword.append("ั")
consonant_start = [i for i in list_w2 if i in set(thai_consonants)][0]
if consonant_start in ["ก","ช","ค","ข","ง"]:
newword.append("ง")
elif consonant_start in ["จ","ฉ","ช","ฌ"]:
newword.append("ญ")
elif consonant_start in ["ฎ","ฐ","ฑ","ณ"]:
newword.append("ณ")
elif consonant_start in ["ด","ถ","ท","ธ","น"]:
newword.append("น")
elif consonant_start in ["ป","ผ","พ","ภ"]:
newword.append("ม")
elif consonant_start in ["ย","ร","ล","ฬ","ว","ศ","ษ","ส","ห"]:
newword.append("ง")
else:
raise NotImplementedError(f"""
The function doesn't support {w1} and {w2}.
""")
newword.extend(list_w2)
return ''.join(newword)
16 changes: 16 additions & 0 deletions tests/test_morpheme.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0

import unittest
from pythainlp.morpheme import nighit


class TestMorphemePackage(unittest.TestCase):
def test_nighit(self):
self.assertEqual(nighit("สํ","คีต"), "สังคีต")
self.assertEqual(nighit("สํ","จร"), "สัญจร")
self.assertEqual(nighit("สํ","ฐาน"), "สัณฐาน")
self.assertEqual(nighit("สํ","นิษฐาน"), "สันนิษฐาน")
self.assertEqual(nighit("สํ","ปทา"), "สัมปทา")
self.assertEqual(nighit("สํ","โยค"), "สังโยค")

0 comments on commit dd2ddaa

Please sign in to comment.