From a875560f1f4ed40d9fe843db91d1f60d694c1e32 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 11 Feb 2024 11:46:51 +0000 Subject: [PATCH] Update README and license header --- Dockerfile | 3 + README.md | 60 ++++++++----------- README_TH.md | 32 ++++------ docs/conf.py | 6 +- pyproject.toml | 3 + pythainlp/__init__.py | 2 +- pythainlp/__main__.py | 2 +- pythainlp/ancient/__init__.py | 2 +- pythainlp/ancient/aksonhan.py | 2 +- pythainlp/augment/__init__.py | 2 +- pythainlp/augment/lm/__init__.py | 2 +- pythainlp/augment/lm/fasttext.py | 2 +- pythainlp/augment/lm/phayathaibert.py | 2 +- pythainlp/augment/lm/wangchanberta.py | 2 +- pythainlp/augment/word2vec/__init__.py | 2 +- pythainlp/augment/word2vec/bpemb_wv.py | 2 +- pythainlp/augment/word2vec/core.py | 2 +- pythainlp/augment/word2vec/ltw2v.py | 2 +- pythainlp/augment/word2vec/thai2fit.py | 2 +- pythainlp/augment/wordnet.py | 2 +- pythainlp/benchmarks/__init__.py | 2 +- pythainlp/benchmarks/word_tokenization.py | 2 +- pythainlp/chat/__init__.py | 2 +- pythainlp/chat/core.py | 2 +- pythainlp/classify/__init__.py | 2 +- pythainlp/classify/param_free.py | 2 +- pythainlp/cli/__init__.py | 2 +- pythainlp/cli/benchmark.py | 2 +- pythainlp/cli/data.py | 2 +- pythainlp/cli/soundex.py | 2 +- pythainlp/cli/tag.py | 2 +- pythainlp/cli/tokenize.py | 2 +- pythainlp/cls/__init__.py | 2 +- pythainlp/coref/__init__.py | 2 +- pythainlp/coref/_fastcoref.py | 2 +- pythainlp/coref/core.py | 2 +- pythainlp/coref/han_coref.py | 2 +- pythainlp/corpus/__init__.py | 2 +- pythainlp/corpus/common.py | 2 +- pythainlp/corpus/conceptnet.py | 2 +- pythainlp/corpus/core.py | 2 +- pythainlp/corpus/icu.py | 2 +- pythainlp/corpus/oscar.py | 2 +- pythainlp/corpus/th_en_translit.py | 2 +- pythainlp/corpus/tnc.py | 2 +- pythainlp/corpus/ttc.py | 2 +- pythainlp/corpus/util.py | 2 +- pythainlp/corpus/volubilis.py | 2 +- pythainlp/corpus/wikipedia.py | 2 +- pythainlp/corpus/wordnet.py | 2 +- pythainlp/el/__init__.py | 2 +- pythainlp/el/_multiel.py | 2 +- pythainlp/el/core.py | 2 +- pythainlp/generate/__init__.py | 2 +- pythainlp/generate/core.py | 2 +- pythainlp/generate/thai2fit.py | 2 +- pythainlp/generate/wangchanglm.py | 2 +- pythainlp/khavee/__init__.py | 2 +- pythainlp/khavee/core.py | 2 +- pythainlp/khavee/example.py | 2 +- pythainlp/morpheme/__init__.py | 2 +- pythainlp/morpheme/thaiwordcheck.py | 2 +- pythainlp/morpheme/word_formation.py | 2 +- pythainlp/parse/__init__.py | 2 +- pythainlp/parse/core.py | 2 +- pythainlp/phayathaibert/__init__.py | 2 +- pythainlp/phayathaibert/core.py | 2 +- pythainlp/soundex/__init__.py | 2 +- pythainlp/soundex/core.py | 2 +- pythainlp/soundex/lk82.py | 2 +- pythainlp/soundex/metasound.py | 2 +- pythainlp/soundex/prayut_and_somchaip.py | 2 +- pythainlp/soundex/sound.py | 2 +- pythainlp/soundex/udom83.py | 2 +- pythainlp/spell/__init__.py | 2 +- pythainlp/spell/core.py | 2 +- pythainlp/spell/phunspell.py | 2 +- pythainlp/spell/symspellpy.py | 2 +- pythainlp/spell/tltk.py | 2 +- .../spell/wanchanberta_thai_grammarly.py | 2 +- pythainlp/summarize/__init__.py | 2 +- pythainlp/summarize/core.py | 2 +- pythainlp/summarize/freq.py | 2 +- pythainlp/summarize/keybert.py | 2 +- pythainlp/summarize/mt5.py | 2 +- pythainlp/tag/__init__.py | 2 +- pythainlp/tag/_tag_perceptron.py | 2 +- pythainlp/tag/blackboard.py | 2 +- pythainlp/tag/chunk.py | 2 +- pythainlp/tag/crfchunk.py | 2 +- pythainlp/tag/locations.py | 2 +- pythainlp/tag/named_entity.py | 2 +- pythainlp/tag/orchid.py | 2 +- pythainlp/tag/perceptron.py | 2 +- pythainlp/tag/pos_tag.py | 2 +- pythainlp/tag/thai_nner.py | 2 +- pythainlp/tag/thainer.py | 2 +- pythainlp/tag/tltk.py | 2 +- pythainlp/tag/unigram.py | 2 +- pythainlp/tag/wangchanberta_onnx.py | 2 +- pythainlp/tokenize/__init__.py | 2 +- pythainlp/tokenize/_utils.py | 2 +- pythainlp/tokenize/attacut.py | 2 +- pythainlp/tokenize/core.py | 2 +- pythainlp/tokenize/crfcls.py | 2 +- pythainlp/tokenize/crfcut.py | 2 +- pythainlp/tokenize/deepcut.py | 2 +- pythainlp/tokenize/etcc.py | 2 +- pythainlp/tokenize/han_solo.py | 2 +- pythainlp/tokenize/longest.py | 2 +- pythainlp/tokenize/multi_cut.py | 2 +- pythainlp/tokenize/nercut.py | 2 +- pythainlp/tokenize/newmm.py | 2 +- pythainlp/tokenize/nlpo3.py | 2 +- pythainlp/tokenize/oskut.py | 2 +- pythainlp/tokenize/pyicu.py | 2 +- pythainlp/tokenize/sefr_cut.py | 2 +- pythainlp/tokenize/ssg.py | 2 +- pythainlp/tokenize/tcc.py | 2 +- pythainlp/tokenize/tcc_p.py | 2 +- pythainlp/tokenize/thaisumcut.py | 2 +- pythainlp/tokenize/tltk.py | 2 +- pythainlp/tokenize/wtsplit.py | 2 +- pythainlp/tools/__init__.py | 2 +- pythainlp/tools/misspell.py | 2 +- pythainlp/tools/path.py | 2 +- pythainlp/translate/__init__.py | 2 +- pythainlp/translate/core.py | 2 +- pythainlp/translate/en_th.py | 2 +- pythainlp/translate/th_fr.py | 2 +- pythainlp/translate/zh_th.py | 2 +- pythainlp/transliterate/__init__.py | 2 +- pythainlp/transliterate/core.py | 2 +- pythainlp/transliterate/ipa.py | 2 +- pythainlp/transliterate/iso_11940.py | 2 +- pythainlp/transliterate/lookup.py | 2 +- pythainlp/transliterate/pyicu.py | 2 +- pythainlp/transliterate/royin.py | 2 +- pythainlp/transliterate/spoonerism.py | 2 +- pythainlp/transliterate/thai2rom.py | 2 +- pythainlp/transliterate/thai2rom_onnx.py | 2 +- pythainlp/transliterate/thaig2p.py | 2 +- pythainlp/transliterate/tltk.py | 2 +- pythainlp/transliterate/w2p.py | 2 +- pythainlp/transliterate/wunsen.py | 2 +- pythainlp/ulmfit/__init__.py | 2 +- pythainlp/ulmfit/core.py | 2 +- pythainlp/ulmfit/preprocess.py | 2 +- pythainlp/ulmfit/tokenizer.py | 2 +- pythainlp/util/__init__.py | 2 +- pythainlp/util/abbreviation.py | 2 +- pythainlp/util/collate.py | 2 +- pythainlp/util/date.py | 2 +- pythainlp/util/digitconv.py | 2 +- pythainlp/util/emojiconv.py | 2 +- pythainlp/util/encoding.py | 2 +- pythainlp/util/keyboard.py | 2 +- pythainlp/util/keywords.py | 2 +- pythainlp/util/morse.py | 2 +- pythainlp/util/normalize.py | 2 +- pythainlp/util/numtoword.py | 2 +- pythainlp/util/phoneme.py | 2 +- pythainlp/util/pronounce.py | 2 +- .../util/remove_trailing_repeat_consonants.py | 2 +- pythainlp/util/spell_words.py | 2 +- pythainlp/util/strftime.py | 2 +- pythainlp/util/syllable.py | 2 +- pythainlp/util/thai.py | 2 +- pythainlp/util/thaiwordcheck.py | 2 +- pythainlp/util/time.py | 2 +- pythainlp/util/trie.py | 2 +- pythainlp/util/wordtonum.py | 2 +- pythainlp/wangchanberta/__init__.py | 2 +- pythainlp/wangchanberta/core.py | 2 +- pythainlp/word_vector/__init__.py | 2 +- pythainlp/word_vector/core.py | 2 +- pythainlp/wsd/__init__.py | 2 +- pythainlp/wsd/core.py | 2 +- setup.py | 2 +- tests/__init__.py | 2 +- tests/test_ancient.py | 2 +- tests/test_augment.py | 2 +- tests/test_benchmarks.py | 2 +- tests/test_classify.py | 2 +- tests/test_cli.py | 2 +- tests/test_coref.py | 2 +- tests/test_corpus.py | 2 +- tests/test_el.py | 2 +- tests/test_generate.py | 4 +- tests/test_khavee.py | 2 +- tests/test_misspell.py | 2 +- tests/test_morpheme.py | 2 +- tests/test_parse.py | 2 +- tests/test_soundex.py | 2 +- tests/test_spell.py | 2 +- tests/test_summarize.py | 2 +- tests/test_tag.py | 2 +- tests/test_tokenize.py | 2 +- tests/test_tools.py | 2 +- tests/test_util.py | 2 +- tests/test_wsd.py | 2 +- 201 files changed, 246 insertions(+), 252 deletions(-) diff --git a/Dockerfile b/Dockerfile index 35fe0d6c4..dc8162af7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 + FROM python:3.8-slim-buster COPY . . diff --git a/README.md b/README.md index 776ff9031..a3a45c45d 100644 --- a/README.md +++ b/README.md @@ -13,20 +13,19 @@ Chat on Matrix -PyThaiNLP is a Python package for text processing and linguistic analysis, similar to [NLTK](https://www.nltk.org/) with a focus on the Thai language. +PyThaiNLP is a Python package for text processing and linguistic analysis, similar to [NLTK](https://www.nltk.org/) with a focus on Thai language. PyThaiNLP เป็นไลบารีภาษาไพทอนสำหรับประมวลผลภาษาธรรมชาติ คล้ายกับ NLTK โดยเน้นภาษาไทย [ดูรายละเอียดภาษาไทยได้ที่ README_TH.MD](https://github.com/PyThaiNLP/pythainlp/blob/dev/README_TH.md) -**News** +## News > Now, You can contact with or ask any questions of the PyThaiNLP team. Chat on Matrix | Version | Description | Status | |:------:|:--:|:------:| -| [5.0](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) | +| [5.0.1](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) | | [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 5.1 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/900) | - ## Getting Started - PyThaiNLP 2 requires Python 3.7+. Python 2.7 users can use PyThaiNLP 1.6. See [2.0 change log](https://github.com/PyThaiNLP/pythainlp/issues/118) | [Upgrading from 1.7](https://pythainlp.github.io/docs/2.0/notes/pythainlp-1_7-2_0.html) | [Upgrading ThaiNER from 1.7](https://github.com/PyThaiNLP/pythainlp/wiki/Upgrade-ThaiNER-from-PyThaiNLP-1.7-to-PyThaiNLP-2.0) @@ -37,24 +36,20 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนสำหร ## Capabilities -PyThaiNLP provides standard NLP functions for Thai, for example part-of-speech tagging, linguistic unit segmentation (syllable, word, or sentence). Some of these functions are also available via the command-line interface. +PyThaiNLP provides standard linguistic analysis for Thai language and standard Thai locale utility functions. +Some of these functions are also available via the command-line interface (run `thainlp` in your shell). -
- List of Features +Partial list of features: - Convenient character and word classes, like Thai consonants (`pythainlp.thai_consonants`), vowels (`pythainlp.thai_vowels`), digits (`pythainlp.thai_digits`), and stop words (`pythainlp.corpus.thai_stopwords`) -- comparable to constants like `string.letters`, `string.digits`, and `string.punctuation` -- Thai linguistic unit segmentation/tokenization, including sentence (`sent_tokenize`), word (`word_tokenize`), and subword segmentations based on Thai Character Cluster (`subword_tokenize`) -- Thai part-of-speech tagging (`pos_tag`) -- Thai spelling suggestion and correction (`spell` and `correct`) -- Thai transliteration (`transliterate`) -- Thai soundex (`soundex`) with three engines (`lk82`, `udom83`, `metasound`) -- Thai collation (sorted by dictionary order) (`collate`) -- Read out number to Thai words (`bahttext`, `num_to_thaiword`) -- Thai datetime formatting (`thai_strftime`) +- Linguistic unit segmentation at different levels: sentence (`sent_tokenize`), word (`word_tokenize`), and subword (`subword_tokenize`) +- Part-of-speech tagging (`pos_tag`) +- Spelling suggestion and correction (`spell` and `correct`) +- Phonetic algorithm and transliteration (`soundex` and `transliterate`) +- Collation (sorted by dictionary order) (`collate`) +- Number read out (`num_to_thaiword` and `bahttext`) +- Datetime formatting (`thai_strftime`) - Thai-English keyboard misswitched fix (`eng_to_thai`, `thai_to_eng`) -- Command-line interface for basic functions, like tokenization and POS tagging (run `thainlp` in your shell) -
- ## Installation @@ -78,23 +73,20 @@ Some functionalities, like Thai WordNet, may require extra packages. To install pip install pythainlp[extra1,extra2,...] ``` -
- List of possible extras +Possible `extras`: -- `full` (install everything) -- `attacut` (to support attacut, a fast and accurate tokenizer) -- `benchmarks` (for [word tokenization benchmarking](tokenization-benchmark.md)) -- `icu` (for ICU, International Components for Unicode, support in transliteration and tokenization) -- `ipa` (for IPA, International Phonetic Alphabet, support in transliteration) -- `ml` (to support ULMFiT models for classification) -- `thai2fit` (for Thai word vector) -- `thai2rom` (for machine-learnt romanization) -- `wordnet` (for Thai WordNet API) -
+- `full` (install everything) +- `attacut` (to support attacut, a fast and accurate tokenizer) +- `benchmarks` (for [word tokenization benchmarking](tokenization-benchmark.md)) +- `icu` (for ICU, International Components for Unicode, support in transliteration and tokenization) +- `ipa` (for IPA, International Phonetic Alphabet, support in transliteration) +- `ml` (to support ULMFiT models for classification) +- `thai2fit` (for Thai word vector) +- `thai2rom` (for machine-learnt romanization) +- `wordnet` (for Thai WordNet API) For dependency details, look at the `extras` variable in [`setup.py`](https://github.com/PyThaiNLP/pythainlp/blob/dev/setup.py). - ## Data Directory - Some additional data, like word lists and language models, may be automatically downloaded during runtime. @@ -102,22 +94,22 @@ For dependency details, look at the `extras` variable in [`setup.py`](https://gi - The data directory can be changed by specifying the environment variable `PYTHAINLP_DATA_DIR`. - See the data catalog (`db.json`) at https://github.com/PyThaiNLP/pythainlp-corpus - ## Command-Line Interface Some of PyThaiNLP functionalities can be used via command line with the `thainlp` command. For example, to display a catalog of datasets: + ```sh thainlp data catalog ``` To show how to use: + ```sh thainlp help ``` - ## Licenses | | License | @@ -127,7 +119,6 @@ thainlp help | Language models created by PyThaiNLP | [Creative Commons Attribution 4.0 International Public License (CC-by)](https://creativecommons.org/licenses/by/4.0/) | | Other corpora and models that may be included in PyThaiNLP | See [Corpus License](https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/corpus/corpus_license.md) | - ## Contribute to PyThaiNLP - Please fork and create a pull request :) @@ -137,7 +128,6 @@ thainlp help You can read [INTHEWILD.md](https://github.com/PyThaiNLP/pythainlp/blob/dev/INTHEWILD.md). - ## Citations If you use `PyThaiNLP` in your project or publication, please cite the library as follows: diff --git a/README_TH.md b/README_TH.md index c12920d1b..c34173936 100644 --- a/README_TH.md +++ b/README_TH.md @@ -14,18 +14,17 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนสำหรับประมวลผลภาษาธรรมชาติ โดยเน้นภาษาไทย -**ข่าวสาร** +## ข่าวสาร > คุณสามารถพูดคุยหรือแชทกับทีม PyThaiNLP หรือผู้สนับสนุนคนอื่น ๆ ได้ที่ Chat on Matrix | รุ่น | คำอธิบาย | สถานะ | |:------:|:--:|:------:| -| [5.0](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) | +| [5.0.1](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) | | [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 5.1 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/900) | ติดตามพวกเราบน [PyThaiNLP Facebook page](https://www.facebook.com/pythainlp/) เพื่อรับข่าวสารเพิ่มเติม - ## เริ่มต้นกับ PyThaiNLP พวกเราได้จัดทำ [PyThaiNLP Get Started Tutorial](https://pythainlp.github.io/tutorials/notebooks/pythainlp_get_started.html) สำหรับสำรวจความสามารถของ PyThaiNLP; พวกเรามีเอกสารสอนใช้งาน สามารถศึกษาได้ที่ [หน้า tutorial](https://pythainlp.github.io/tutorials). @@ -34,7 +33,6 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนสำหร พวกเราพยายามทำให้โมดูลใช้งานได้ง่ายที่สุดเท่าที่จะเป็นไปได้; ตัวอย่างเช่น บางชุดข้อมูล (เช่น รายการคำและตัวแบบภาษา) จะถูกดาวน์โหลดอัตโนมัติเมื่อมีการเรียกใช้งาน โดย PyThaiNLP จะจัดเก็บข้อมูลเหล่านั้นไว้ในโฟลเดอร์ `~/pythainlp-data` เป็นค่าเริ่มต้น แต่ผู้ใช้งานสามารถระบุตำแหน่งที่ต้องการได้เองผ่านค่า environment variable `PYTHAINLP_DATA_DIR` อ่านรายละเอียดคลังข้อมูลเพิ่มเติมได้ที่ [PyThaiNLP/pythainlp-corpus](https://github.com/PyThaiNLP/pythainlp-corpus). - ## ความสามารถ PyThaiNLP มีความสามารถพื้นฐานสำหรับการประมวลผลภาษาไทย ตัวอย่างเช่นการกำกับหน้าที่ของคำ (part-of-speech tagging) การแบ่งหน่วยของข้อความตามหลักภาษาศาสตร์ (พยางค์ คำ และประโยค) บางความสามารถสามารถใช้งานได้ผ่านทางคอมมานด์ไลน์ @@ -84,35 +82,35 @@ pip install pythainlp[extra1,extra2,...]
รายการสำหรับติดตั้งผ่าน extras -- `full` (ติดตั้งทุกอย่าง) -- `attacut` (เพื่อสนับสนุน attacut ซึ่งเป็นตัวตัดคำที่ทำงานได้รวดเร็วและมีประสิทธิภาพ) -- `benchmarks` (สำหรับ [word tokenization benchmarking](tokenization-benchmark.md)) -- `icu` (สำหรับการรองรับ ICU หรือ International Components for Unicode ในการถอดเสียงเป็นอักษรและการตัดแบ่งคำ) -- `ipa` (สำหรับการรองรับ IPA หรือ International Phonetic Alphabet ในการถอดเสียงเป็นอักษร) -- `ml` (เพื่อให้สนับสนุนตัวแบบภาษา ULMFiT สำหรับการจำแนกข้อความ) -- `thai2fit` (สำหรับ Thai word vector) -- `thai2rom` (สำหรับการถอดอักษรไทยเป็นอักษรโรมัน) -- `wordnet` (สำหรับ Thai WordNet API) +- `full` (ติดตั้งทุกอย่าง) +- `attacut` (เพื่อสนับสนุน attacut ซึ่งเป็นตัวตัดคำที่ทำงานได้รวดเร็วและมีประสิทธิภาพ) +- `benchmarks` (สำหรับ [word tokenization benchmarking](tokenization-benchmark.md)) +- `icu` (สำหรับการรองรับ ICU หรือ International Components for Unicode ในการถอดเสียงเป็นอักษรและการตัดแบ่งคำ) +- `ipa` (สำหรับการรองรับ IPA หรือ International Phonetic Alphabet ในการถอดเสียงเป็นอักษร) +- `ml` (เพื่อให้สนับสนุนตัวแบบภาษา ULMFiT สำหรับการจำแนกข้อความ) +- `thai2fit` (สำหรับ Thai word vector) +- `thai2rom` (สำหรับการถอดอักษรไทยเป็นอักษรโรมัน) +- `wordnet` (สำหรับ Thai WordNet API)
สำหรับโมดูลที่ต้องการ สามารถดูรายละเอียดได้ที่ตัวแปร `extras` ใน [`setup.py`](https://github.com/PyThaiNLP/pythainlp/blob/dev/setup.py). - ## Command-line บางความสามารถของ PyThaiNLP สามารถใช้งานผ่าน command line ได้โดยใช้ `thainlp` ตัวอย่าง, แสดงรายละเอียดของชุดข้อมูล: + ```sh thainlp data catalog ``` แสดงวิธีใช้งาน: + ```sh thainlp help ``` - ## ผู้ใช้งาน Python 2 - PyThaiNLP 2 สนับสนุน Python 3.6 ขึ้นไป บางความสามารถ สามารถใช้งานกับ Python 3 รุ่นก่อนหน้าได้ แต่ไม่ได้มีการทดสอบว่าใช้งานได้หรือไม่ อ่านเพิ่มเติม [1.7 -> 2.0 change log](https://github.com/PyThaiNLP/pythainlp/issues/118). @@ -120,7 +118,6 @@ thainlp help - [Upgrade ThaiNER from 1.7](https://github.com/PyThaiNLP/pythainlp/wiki/Upgrade-ThaiNER-from-PyThaiNLP-1.7-to-PyThaiNLP-2.0) - ผู้ใช้งาน Python 2.7 สามารถใช้งาน PyThaiNLP 1.6 - ## การอ้างอิง หากคุณใช้ซอฟต์แวร์ `PyThaiNLP` ในโครงงานหรืองานวิจัยของคุณ คุณสามารถอ้างอิงได้ตามนี้ @@ -184,7 +181,6 @@ Wannaphong Phatthiyaphaibun, Korakot Chaovavanich, Charin Polpanumas, Arthit Sur คุณสามารถอ่านได้ที่ [INTHEWILD.md](https://github.com/PyThaiNLP/pythainlp/blob/dev/INTHEWILD.md) - ## สัญญาอนุญาต | | สัญญาอนุญาต | @@ -194,12 +190,10 @@ Wannaphong Phatthiyaphaibun, Korakot Chaovavanich, Charin Polpanumas, Arthit Sur | Language models created by PyThaiNLP | [Creative Commons Attribution 4.0 International Public License (CC-by)](https://creativecommons.org/licenses/by/4.0/) | | สำหรับฐานข้อมูลภาษาและโมเดลอื่นที่อาจมาพร้อมกับซอฟต์แวร์ PyThaiNLP | ดู [Corpus License](https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/corpus/corpus_license.md) | - ## บัตรโมเดล สำหรับรายละเอียดทางเทคนิค ข้อควรระวัง และข้อคำนึงทางจริยธรรมของตัวแบบ (โมเดล) ที่ใช้ใน PyThaiNLP กรุณาดูที่ [Model cards](https://github.com/PyThaiNLP/pythainlp/wiki/Model-Cards) - ## ผู้สนับสนุน [![VISTEC-depa Thailand Artificial Intelligence Research Institute](https://airesearch.in.th/assets/img/logo/airesearch-logo.svg)](https://airesearch.in.th/) diff --git a/docs/conf.py b/docs/conf.py index 2755ae110..9c0f71d52 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 # # Configuration file for the Sphinx documentation builder. # http://www.sphinx-doc.org/en/master/config @@ -21,8 +23,8 @@ # -- Project information ----------------------------------------------------- project = "PyThaiNLP" -copyright = "2019, pythainlp_builders" -author = "pythainlp_builders" +copyright = "2016-2024 PyThaiNLP Project" +author = "PyThaiNLP Project" curyear = datetime.today().year copyright = f"2017-{curyear}, {project} (Apache Software License 2.0)" diff --git a/pyproject.toml b/pyproject.toml index 1051262e1..faa102a79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 + [tool.ruff] line-length = 79 indent-width = 4 diff --git a/pythainlp/__init__.py b/pythainlp/__init__.py index c5990ac43..2d2b0dfdc 100644 --- a/pythainlp/__init__.py +++ b/pythainlp/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 __version__ = "5.0.1" diff --git a/pythainlp/__main__.py b/pythainlp/__main__.py index dd8b250e7..8f7217b3a 100644 --- a/pythainlp/__main__.py +++ b/pythainlp/__main__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import argparse import sys diff --git a/pythainlp/ancient/__init__.py b/pythainlp/ancient/__init__.py index 3d92c73ee..5c45e6602 100644 --- a/pythainlp/ancient/__init__.py +++ b/pythainlp/ancient/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Ancient versions of the Thai language diff --git a/pythainlp/ancient/aksonhan.py b/pythainlp/ancient/aksonhan.py index fccedc014..57389d1aa 100644 --- a/pythainlp/ancient/aksonhan.py +++ b/pythainlp/ancient/aksonhan.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from pythainlp.util import Trie from pythainlp import thai_consonants, thai_tonemarks diff --git a/pythainlp/augment/__init__.py b/pythainlp/augment/__init__.py index 4e6c9753d..455d8ef49 100644 --- a/pythainlp/augment/__init__.py +++ b/pythainlp/augment/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai text augment diff --git a/pythainlp/augment/lm/__init__.py b/pythainlp/augment/lm/__init__.py index 8d4e1a363..d806ffbb1 100644 --- a/pythainlp/augment/lm/__init__.py +++ b/pythainlp/augment/lm/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Language Models diff --git a/pythainlp/augment/lm/fasttext.py b/pythainlp/augment/lm/fasttext.py index b6c7c75dd..45b2b792f 100644 --- a/pythainlp/augment/lm/fasttext.py +++ b/pythainlp/augment/lm/fasttext.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import itertools from typing import List, Tuple diff --git a/pythainlp/augment/lm/phayathaibert.py b/pythainlp/augment/lm/phayathaibert.py index 96d39f206..ad9af5c67 100644 --- a/pythainlp/augment/lm/phayathaibert.py +++ b/pythainlp/augment/lm/phayathaibert.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List diff --git a/pythainlp/augment/lm/wangchanberta.py b/pythainlp/augment/lm/wangchanberta.py index 4a57de67f..90afca84c 100644 --- a/pythainlp/augment/lm/wangchanberta.py +++ b/pythainlp/augment/lm/wangchanberta.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List diff --git a/pythainlp/augment/word2vec/__init__.py b/pythainlp/augment/word2vec/__init__.py index 13f65b818..ddfb20721 100644 --- a/pythainlp/augment/word2vec/__init__.py +++ b/pythainlp/augment/word2vec/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Word2Vec diff --git a/pythainlp/augment/word2vec/bpemb_wv.py b/pythainlp/augment/word2vec/bpemb_wv.py index 9897a50c9..e0a13029d 100644 --- a/pythainlp/augment/word2vec/bpemb_wv.py +++ b/pythainlp/augment/word2vec/bpemb_wv.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple from pythainlp.augment.word2vec.core import Word2VecAug diff --git a/pythainlp/augment/word2vec/core.py b/pythainlp/augment/word2vec/core.py index 4bd45027e..9f0a4fa1f 100644 --- a/pythainlp/augment/word2vec/core.py +++ b/pythainlp/augment/word2vec/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple import itertools diff --git a/pythainlp/augment/word2vec/ltw2v.py b/pythainlp/augment/word2vec/ltw2v.py index 53bcf02fd..bc2027b84 100644 --- a/pythainlp/augment/word2vec/ltw2v.py +++ b/pythainlp/augment/word2vec/ltw2v.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple from pythainlp.augment.word2vec.core import Word2VecAug diff --git a/pythainlp/augment/word2vec/thai2fit.py b/pythainlp/augment/word2vec/thai2fit.py index 81a05bffe..e7f61eb54 100644 --- a/pythainlp/augment/word2vec/thai2fit.py +++ b/pythainlp/augment/word2vec/thai2fit.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple from pythainlp.augment.word2vec.core import Word2VecAug diff --git a/pythainlp/augment/wordnet.py b/pythainlp/augment/wordnet.py index 052a923aa..4112ec7b9 100644 --- a/pythainlp/augment/wordnet.py +++ b/pythainlp/augment/wordnet.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thank https://dev.to/ton_ami/text-data-augmentation-synonym-replacement-4h8l diff --git a/pythainlp/benchmarks/__init__.py b/pythainlp/benchmarks/__init__.py index 5b564808e..a9d17d316 100644 --- a/pythainlp/benchmarks/__init__.py +++ b/pythainlp/benchmarks/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Performance benchmarking. diff --git a/pythainlp/benchmarks/word_tokenization.py b/pythainlp/benchmarks/word_tokenization.py index 5db36d3cd..9ac673701 100644 --- a/pythainlp/benchmarks/word_tokenization.py +++ b/pythainlp/benchmarks/word_tokenization.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import re diff --git a/pythainlp/chat/__init__.py b/pythainlp/chat/__init__.py index 7fc1d0865..912c90d84 100644 --- a/pythainlp/chat/__init__.py +++ b/pythainlp/chat/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ pythainlp.chat diff --git a/pythainlp/chat/core.py b/pythainlp/chat/core.py index 37b02a2dd..e79e67b47 100644 --- a/pythainlp/chat/core.py +++ b/pythainlp/chat/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import torch diff --git a/pythainlp/classify/__init__.py b/pythainlp/classify/__init__.py index 8dfc706a7..c5fa6ee0c 100644 --- a/pythainlp/classify/__init__.py +++ b/pythainlp/classify/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ pythainlp.classify diff --git a/pythainlp/classify/param_free.py b/pythainlp/classify/param_free.py index 49212884d..87247da74 100644 --- a/pythainlp/classify/param_free.py +++ b/pythainlp/classify/param_free.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import gzip diff --git a/pythainlp/cli/__init__.py b/pythainlp/cli/__init__.py index 913cf6098..983a28d62 100644 --- a/pythainlp/cli/__init__.py +++ b/pythainlp/cli/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """Command line helpers.""" import sys diff --git a/pythainlp/cli/benchmark.py b/pythainlp/cli/benchmark.py index 116eebae1..f7b28bbfb 100644 --- a/pythainlp/cli/benchmark.py +++ b/pythainlp/cli/benchmark.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import argparse diff --git a/pythainlp/cli/data.py b/pythainlp/cli/data.py index 948bf0211..40bc3175d 100644 --- a/pythainlp/cli/data.py +++ b/pythainlp/cli/data.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Command line for PyThaiNLP's dataset/corpus management. diff --git a/pythainlp/cli/soundex.py b/pythainlp/cli/soundex.py index 6b9b14b3b..587fd9498 100644 --- a/pythainlp/cli/soundex.py +++ b/pythainlp/cli/soundex.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Command line for PyThaiNLP's soundex. diff --git a/pythainlp/cli/tag.py b/pythainlp/cli/tag.py index 236b8ba4f..6cb0b54b3 100644 --- a/pythainlp/cli/tag.py +++ b/pythainlp/cli/tag.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Command line for PyThaiNLP's taggers. diff --git a/pythainlp/cli/tokenize.py b/pythainlp/cli/tokenize.py index c59473ea9..2f4199748 100644 --- a/pythainlp/cli/tokenize.py +++ b/pythainlp/cli/tokenize.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Command line for PyThaiNLP's tokenizers. diff --git a/pythainlp/cls/__init__.py b/pythainlp/cls/__init__.py index 5625323de..d4cc162f7 100644 --- a/pythainlp/cls/__init__.py +++ b/pythainlp/cls/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ pythainlp.cls diff --git a/pythainlp/coref/__init__.py b/pythainlp/coref/__init__.py index e109147e3..2a2bac107 100644 --- a/pythainlp/coref/__init__.py +++ b/pythainlp/coref/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ PyThaiNLP Coreference Resolution diff --git a/pythainlp/coref/_fastcoref.py b/pythainlp/coref/_fastcoref.py index f759bf57f..368ce7b8c 100644 --- a/pythainlp/coref/_fastcoref.py +++ b/pythainlp/coref/_fastcoref.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List import spacy diff --git a/pythainlp/coref/core.py b/pythainlp/coref/core.py index daf7a3fe9..4ca9f4029 100644 --- a/pythainlp/coref/core.py +++ b/pythainlp/coref/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List diff --git a/pythainlp/coref/han_coref.py b/pythainlp/coref/han_coref.py index 0f334abf9..dc3368d7f 100644 --- a/pythainlp/coref/han_coref.py +++ b/pythainlp/coref/han_coref.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import spacy from pythainlp.coref._fastcoref import FastCoref diff --git a/pythainlp/corpus/__init__.py b/pythainlp/corpus/__init__.py index 7ee6d043a..8f6b01580 100644 --- a/pythainlp/corpus/__init__.py +++ b/pythainlp/corpus/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Corpus related functions. diff --git a/pythainlp/corpus/common.py b/pythainlp/corpus/common.py index ec9a056a1..a44544aa6 100644 --- a/pythainlp/corpus/common.py +++ b/pythainlp/corpus/common.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ diff --git a/pythainlp/corpus/conceptnet.py b/pythainlp/corpus/conceptnet.py index 35f879e93..6b9491406 100644 --- a/pythainlp/corpus/conceptnet.py +++ b/pythainlp/corpus/conceptnet.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Get data from ConceptNet API at http://conceptnet.io diff --git a/pythainlp/corpus/core.py b/pythainlp/corpus/core.py index 18e2ec7ba..4a9f88215 100644 --- a/pythainlp/corpus/core.py +++ b/pythainlp/corpus/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Corpus related functions. diff --git a/pythainlp/corpus/icu.py b/pythainlp/corpus/icu.py index db27efce5..352b3696a 100644 --- a/pythainlp/corpus/icu.py +++ b/pythainlp/corpus/icu.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Provides an optional word list from International Components for Unicode (ICU) dictionary. diff --git a/pythainlp/corpus/oscar.py b/pythainlp/corpus/oscar.py index 930305490..08e5177e1 100644 --- a/pythainlp/corpus/oscar.py +++ b/pythainlp/corpus/oscar.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai unigram word frequency from OSCAR Corpus (words tokenized using ICU) diff --git a/pythainlp/corpus/th_en_translit.py b/pythainlp/corpus/th_en_translit.py index fc25a1ad7..d1af6ef91 100644 --- a/pythainlp/corpus/th_en_translit.py +++ b/pythainlp/corpus/th_en_translit.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai-English Transliteration Dictionary v1.4 diff --git a/pythainlp/corpus/tnc.py b/pythainlp/corpus/tnc.py index 545a75867..0c250b141 100644 --- a/pythainlp/corpus/tnc.py +++ b/pythainlp/corpus/tnc.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project. +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project. # SPDX-License-Identifier: Apache-2.0 """ Thai National Corpus word frequency diff --git a/pythainlp/corpus/ttc.py b/pythainlp/corpus/ttc.py index 633aac132..e46afa6d8 100644 --- a/pythainlp/corpus/ttc.py +++ b/pythainlp/corpus/ttc.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai Textbook Corpus (TTC) word frequency diff --git a/pythainlp/corpus/util.py b/pythainlp/corpus/util.py index 5c4f01fd6..f12e7a71a 100644 --- a/pythainlp/corpus/util.py +++ b/pythainlp/corpus/util.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Tool for creating word lists diff --git a/pythainlp/corpus/volubilis.py b/pythainlp/corpus/volubilis.py index fc7c63b9a..af62d27e0 100644 --- a/pythainlp/corpus/volubilis.py +++ b/pythainlp/corpus/volubilis.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Provides an optional word list from the Volubilis dictionary. diff --git a/pythainlp/corpus/wikipedia.py b/pythainlp/corpus/wikipedia.py index 96f43cdc8..8bd362667 100644 --- a/pythainlp/corpus/wikipedia.py +++ b/pythainlp/corpus/wikipedia.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Provides an optional word list from Thai Wikipedia titles. diff --git a/pythainlp/corpus/wordnet.py b/pythainlp/corpus/wordnet.py index 7044e7b6f..7884aa272 100644 --- a/pythainlp/corpus/wordnet.py +++ b/pythainlp/corpus/wordnet.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ NLTK WordNet wrapper diff --git a/pythainlp/el/__init__.py b/pythainlp/el/__init__.py index bc305e556..93ffffb89 100644 --- a/pythainlp/el/__init__.py +++ b/pythainlp/el/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ pythainlp.el diff --git a/pythainlp/el/_multiel.py b/pythainlp/el/_multiel.py index 43e155fb4..90fd51967 100644 --- a/pythainlp/el/_multiel.py +++ b/pythainlp/el/_multiel.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 diff --git a/pythainlp/el/core.py b/pythainlp/el/core.py index b70bf7a71..34375e354 100644 --- a/pythainlp/el/core.py +++ b/pythainlp/el/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Union diff --git a/pythainlp/generate/__init__.py b/pythainlp/generate/__init__.py index d7845a976..d7e4b8ca9 100644 --- a/pythainlp/generate/__init__.py +++ b/pythainlp/generate/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai Text Generation diff --git a/pythainlp/generate/core.py b/pythainlp/generate/core.py index 208fa100a..0d589291e 100644 --- a/pythainlp/generate/core.py +++ b/pythainlp/generate/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Text generator using n-gram language model diff --git a/pythainlp/generate/thai2fit.py b/pythainlp/generate/thai2fit.py index 7b8f3db75..240c0b28b 100644 --- a/pythainlp/generate/thai2fit.py +++ b/pythainlp/generate/thai2fit.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai2fit: Thai Wikipeida Language Model for Text Generation diff --git a/pythainlp/generate/wangchanglm.py b/pythainlp/generate/wangchanglm.py index b988fc10a..e99fcd2e7 100644 --- a/pythainlp/generate/wangchanglm.py +++ b/pythainlp/generate/wangchanglm.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import re import torch diff --git a/pythainlp/khavee/__init__.py b/pythainlp/khavee/__init__.py index b67f4b62e..af777b3b0 100644 --- a/pythainlp/khavee/__init__.py +++ b/pythainlp/khavee/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 __all__ = ["KhaveeVerifier"] diff --git a/pythainlp/khavee/core.py b/pythainlp/khavee/core.py index febfdaa60..f3a76eb6b 100644 --- a/pythainlp/khavee/core.py +++ b/pythainlp/khavee/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 # ruff: noqa: C901 diff --git a/pythainlp/khavee/example.py b/pythainlp/khavee/example.py index bd8751650..c05b8b03b 100644 --- a/pythainlp/khavee/example.py +++ b/pythainlp/khavee/example.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import core diff --git a/pythainlp/morpheme/__init__.py b/pythainlp/morpheme/__init__.py index d04bcb5ba..94eb54822 100644 --- a/pythainlp/morpheme/__init__.py +++ b/pythainlp/morpheme/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ diff --git a/pythainlp/morpheme/thaiwordcheck.py b/pythainlp/morpheme/thaiwordcheck.py index b2b71b39f..2159bef08 100644 --- a/pythainlp/morpheme/thaiwordcheck.py +++ b/pythainlp/morpheme/thaiwordcheck.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Check if a word is a "native Thai word" diff --git a/pythainlp/morpheme/word_formation.py b/pythainlp/morpheme/word_formation.py index 85c8c453d..f3308c8cc 100644 --- a/pythainlp/morpheme/word_formation.py +++ b/pythainlp/morpheme/word_formation.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from pythainlp import thai_consonants diff --git a/pythainlp/parse/__init__.py b/pythainlp/parse/__init__.py index 257a1adf0..032af55d0 100644 --- a/pythainlp/parse/__init__.py +++ b/pythainlp/parse/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ diff --git a/pythainlp/parse/core.py b/pythainlp/parse/core.py index 3180baf48..137b86381 100644 --- a/pythainlp/parse/core.py +++ b/pythainlp/parse/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Union diff --git a/pythainlp/phayathaibert/__init__.py b/pythainlp/phayathaibert/__init__.py index 3dc2139c4..abd53e05d 100644 --- a/pythainlp/phayathaibert/__init__.py +++ b/pythainlp/phayathaibert/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ diff --git a/pythainlp/phayathaibert/core.py b/pythainlp/phayathaibert/core.py index 6a756c51f..f3563a45e 100644 --- a/pythainlp/phayathaibert/core.py +++ b/pythainlp/phayathaibert/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import Callable, List, Tuple, Union diff --git a/pythainlp/soundex/__init__.py b/pythainlp/soundex/__init__.py index 752e6ace2..c86b855eb 100644 --- a/pythainlp/soundex/__init__.py +++ b/pythainlp/soundex/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai soundex diff --git a/pythainlp/soundex/core.py b/pythainlp/soundex/core.py index eb88b81ca..52a8bc5d2 100644 --- a/pythainlp/soundex/core.py +++ b/pythainlp/soundex/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai soundex diff --git a/pythainlp/soundex/lk82.py b/pythainlp/soundex/lk82.py index 78dc64800..399267dea 100644 --- a/pythainlp/soundex/lk82.py +++ b/pythainlp/soundex/lk82.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai soundex - LK82 system diff --git a/pythainlp/soundex/metasound.py b/pythainlp/soundex/metasound.py index 3e8c609bb..d6d572070 100644 --- a/pythainlp/soundex/metasound.py +++ b/pythainlp/soundex/metasound.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai soundex - MetaSound system diff --git a/pythainlp/soundex/prayut_and_somchaip.py b/pythainlp/soundex/prayut_and_somchaip.py index 231c91c15..f935cb89b 100644 --- a/pythainlp/soundex/prayut_and_somchaip.py +++ b/pythainlp/soundex/prayut_and_somchaip.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai-English Cross-Language Transliterated Word Retrieval diff --git a/pythainlp/soundex/sound.py b/pythainlp/soundex/sound.py index 0926ea3ef..e5e9a62eb 100644 --- a/pythainlp/soundex/sound.py +++ b/pythainlp/soundex/sound.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List import panphon diff --git a/pythainlp/soundex/udom83.py b/pythainlp/soundex/udom83.py index fd0e5f6db..7e53bec88 100644 --- a/pythainlp/soundex/udom83.py +++ b/pythainlp/soundex/udom83.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai soundex - Udom83 system diff --git a/pythainlp/spell/__init__.py b/pythainlp/spell/__init__.py index c6c5be584..351379f7a 100644 --- a/pythainlp/spell/__init__.py +++ b/pythainlp/spell/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Spell checking and correction. diff --git a/pythainlp/spell/core.py b/pythainlp/spell/core.py index 9378ec11b..774e48ccd 100644 --- a/pythainlp/spell/core.py +++ b/pythainlp/spell/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Spell checking functions diff --git a/pythainlp/spell/phunspell.py b/pythainlp/spell/phunspell.py index 5f76a3b81..028569ea2 100644 --- a/pythainlp/spell/phunspell.py +++ b/pythainlp/spell/phunspell.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Phunspell diff --git a/pythainlp/spell/symspellpy.py b/pythainlp/spell/symspellpy.py index 81e876cc3..d9fb72b00 100644 --- a/pythainlp/spell/symspellpy.py +++ b/pythainlp/spell/symspellpy.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ symspellpy diff --git a/pythainlp/spell/tltk.py b/pythainlp/spell/tltk.py index eaf80a218..604652a73 100644 --- a/pythainlp/spell/tltk.py +++ b/pythainlp/spell/tltk.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ TLTK diff --git a/pythainlp/spell/wanchanberta_thai_grammarly.py b/pythainlp/spell/wanchanberta_thai_grammarly.py index 9a5e70394..ecf9d7856 100644 --- a/pythainlp/spell/wanchanberta_thai_grammarly.py +++ b/pythainlp/spell/wanchanberta_thai_grammarly.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Two-stage Thai Misspelling Correction based on Pre-trained Language Models diff --git a/pythainlp/summarize/__init__.py b/pythainlp/summarize/__init__.py index a876da510..056c81c46 100644 --- a/pythainlp/summarize/__init__.py +++ b/pythainlp/summarize/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Text summarization diff --git a/pythainlp/summarize/core.py b/pythainlp/summarize/core.py index 9208f32be..c3da14e4b 100644 --- a/pythainlp/summarize/core.py +++ b/pythainlp/summarize/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Text summarization and keyword extraction diff --git a/pythainlp/summarize/freq.py b/pythainlp/summarize/freq.py index bcf9c69cc..5ab798f36 100644 --- a/pythainlp/summarize/freq.py +++ b/pythainlp/summarize/freq.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Summarization by frequency of words diff --git a/pythainlp/summarize/keybert.py b/pythainlp/summarize/keybert.py index 59a8b0976..766c22525 100644 --- a/pythainlp/summarize/keybert.py +++ b/pythainlp/summarize/keybert.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Minimal re-implementation of KeyBERT. diff --git a/pythainlp/summarize/mt5.py b/pythainlp/summarize/mt5.py index bbf42acd2..de1e6ecbf 100644 --- a/pythainlp/summarize/mt5.py +++ b/pythainlp/summarize/mt5.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Summarization by mT5 model diff --git a/pythainlp/tag/__init__.py b/pythainlp/tag/__init__.py index 959ffabe3..021f6f894 100644 --- a/pythainlp/tag/__init__.py +++ b/pythainlp/tag/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Linguistic and other taggers. diff --git a/pythainlp/tag/_tag_perceptron.py b/pythainlp/tag/_tag_perceptron.py index 871bde3b1..068b2b186 100644 --- a/pythainlp/tag/_tag_perceptron.py +++ b/pythainlp/tag/_tag_perceptron.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Perceptron Tagger. diff --git a/pythainlp/tag/blackboard.py b/pythainlp/tag/blackboard.py index f3ad8a74e..091820348 100644 --- a/pythainlp/tag/blackboard.py +++ b/pythainlp/tag/blackboard.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple diff --git a/pythainlp/tag/chunk.py b/pythainlp/tag/chunk.py index 73223fb0e..37cb6f03d 100644 --- a/pythainlp/tag/chunk.py +++ b/pythainlp/tag/chunk.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple diff --git a/pythainlp/tag/crfchunk.py b/pythainlp/tag/crfchunk.py index 7e33594b5..239bdf254 100644 --- a/pythainlp/tag/crfchunk.py +++ b/pythainlp/tag/crfchunk.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import Dict, List, Tuple from pycrfsuite import Tagger as CRFTagger diff --git a/pythainlp/tag/locations.py b/pythainlp/tag/locations.py index c332f608c..3bd55c4e1 100644 --- a/pythainlp/tag/locations.py +++ b/pythainlp/tag/locations.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Recognizes locations in text diff --git a/pythainlp/tag/named_entity.py b/pythainlp/tag/named_entity.py index 4c2e58b0d..dfaefec7b 100644 --- a/pythainlp/tag/named_entity.py +++ b/pythainlp/tag/named_entity.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Named-entity recognizer diff --git a/pythainlp/tag/orchid.py b/pythainlp/tag/orchid.py index 0ab856e89..2698a139f 100644 --- a/pythainlp/tag/orchid.py +++ b/pythainlp/tag/orchid.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Data preprocessing for ORCHID corpus diff --git a/pythainlp/tag/perceptron.py b/pythainlp/tag/perceptron.py index df8cf73b2..f6984249c 100644 --- a/pythainlp/tag/perceptron.py +++ b/pythainlp/tag/perceptron.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Perceptron part-of-speech tagger diff --git a/pythainlp/tag/pos_tag.py b/pythainlp/tag/pos_tag.py index 6b3748c70..f9dfe9b46 100644 --- a/pythainlp/tag/pos_tag.py +++ b/pythainlp/tag/pos_tag.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple diff --git a/pythainlp/tag/thai_nner.py b/pythainlp/tag/thai_nner.py index 43d9693fe..0e69b3075 100644 --- a/pythainlp/tag/thai_nner.py +++ b/pythainlp/tag/thai_nner.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple from thai_nner import NNER diff --git a/pythainlp/tag/thainer.py b/pythainlp/tag/thainer.py index 0d303e8df..97e1f15bf 100644 --- a/pythainlp/tag/thainer.py +++ b/pythainlp/tag/thainer.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Named-entity recognizer diff --git a/pythainlp/tag/tltk.py b/pythainlp/tag/tltk.py index 6049fb4e6..9c9530822 100644 --- a/pythainlp/tag/tltk.py +++ b/pythainlp/tag/tltk.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple, Union try: diff --git a/pythainlp/tag/unigram.py b/pythainlp/tag/unigram.py index f5d69fad7..e68f70d16 100644 --- a/pythainlp/tag/unigram.py +++ b/pythainlp/tag/unigram.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Unigram Part-Of-Speech tagger diff --git a/pythainlp/tag/wangchanberta_onnx.py b/pythainlp/tag/wangchanberta_onnx.py index 0d090b367..c394a5e40 100644 --- a/pythainlp/tag/wangchanberta_onnx.py +++ b/pythainlp/tag/wangchanberta_onnx.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List import json diff --git a/pythainlp/tokenize/__init__.py b/pythainlp/tokenize/__init__.py index 5fed5e42e..6ec96955c 100644 --- a/pythainlp/tokenize/__init__.py +++ b/pythainlp/tokenize/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Tokenizers at different levels of linguistic analysis. diff --git a/pythainlp/tokenize/_utils.py b/pythainlp/tokenize/_utils.py index 6530c2e0c..30d77f1e8 100644 --- a/pythainlp/tokenize/_utils.py +++ b/pythainlp/tokenize/_utils.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Utility functions for tokenize module. diff --git a/pythainlp/tokenize/attacut.py b/pythainlp/tokenize/attacut.py index 35feaca5a..f70d7d431 100644 --- a/pythainlp/tokenize/attacut.py +++ b/pythainlp/tokenize/attacut.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -* -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Wrapper for AttaCut - Fast and Reasonably Accurate Word Tokenizer for Thai diff --git a/pythainlp/tokenize/core.py b/pythainlp/tokenize/core.py index 910bfede3..993bcff64 100644 --- a/pythainlp/tokenize/core.py +++ b/pythainlp/tokenize/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Generic functions of tokenizers diff --git a/pythainlp/tokenize/crfcls.py b/pythainlp/tokenize/crfcls.py index c46b5d442..8adfa6f01 100644 --- a/pythainlp/tokenize/crfcls.py +++ b/pythainlp/tokenize/crfcls.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Clause segmenter diff --git a/pythainlp/tokenize/crfcut.py b/pythainlp/tokenize/crfcut.py index 47331da82..e89cdbfa9 100644 --- a/pythainlp/tokenize/crfcut.py +++ b/pythainlp/tokenize/crfcut.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ CRFCut - Thai sentence segmenter. diff --git a/pythainlp/tokenize/deepcut.py b/pythainlp/tokenize/deepcut.py index 12f0b7ae7..38178f344 100644 --- a/pythainlp/tokenize/deepcut.py +++ b/pythainlp/tokenize/deepcut.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Wrapper for deepcut Thai word segmentation. deepcut is a diff --git a/pythainlp/tokenize/etcc.py b/pythainlp/tokenize/etcc.py index 88041473f..26c4cea4a 100644 --- a/pythainlp/tokenize/etcc.py +++ b/pythainlp/tokenize/etcc.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Segmenting text into Enhanced Thai Character Clusters (ETCCs) diff --git a/pythainlp/tokenize/han_solo.py b/pythainlp/tokenize/han_solo.py index d9d20b5c1..8b876597e 100644 --- a/pythainlp/tokenize/han_solo.py +++ b/pythainlp/tokenize/han_solo.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-FileCopyrightText: Copyright 2019 Ponrawee Prasertsom # SPDX-License-Identifier: Apache-2.0 """ diff --git a/pythainlp/tokenize/longest.py b/pythainlp/tokenize/longest.py index 016c910c9..1e4192b87 100644 --- a/pythainlp/tokenize/longest.py +++ b/pythainlp/tokenize/longest.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Dictionary-based longest-matching Thai word segmentation. Implementation is based diff --git a/pythainlp/tokenize/multi_cut.py b/pythainlp/tokenize/multi_cut.py index 526618ecc..ae7ea4528 100644 --- a/pythainlp/tokenize/multi_cut.py +++ b/pythainlp/tokenize/multi_cut.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Multi cut -- Thai word segmentation with maximum matching. diff --git a/pythainlp/tokenize/nercut.py b/pythainlp/tokenize/nercut.py index 3c4e015f7..d30188f87 100644 --- a/pythainlp/tokenize/nercut.py +++ b/pythainlp/tokenize/nercut.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ nercut 0.2 diff --git a/pythainlp/tokenize/newmm.py b/pythainlp/tokenize/newmm.py index 77233ac8a..504ed50be 100644 --- a/pythainlp/tokenize/newmm.py +++ b/pythainlp/tokenize/newmm.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Dictionary-based maximal matching word segmentation, constrained by diff --git a/pythainlp/tokenize/nlpo3.py b/pythainlp/tokenize/nlpo3.py index b2231c771..839ca5bd5 100644 --- a/pythainlp/tokenize/nlpo3.py +++ b/pythainlp/tokenize/nlpo3.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from sys import stderr from typing import List diff --git a/pythainlp/tokenize/oskut.py b/pythainlp/tokenize/oskut.py index b8ccaf08e..ffe9bc61f 100644 --- a/pythainlp/tokenize/oskut.py +++ b/pythainlp/tokenize/oskut.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Wrapper OSKut (Out-of-domain StacKed cut for Word Segmentation). diff --git a/pythainlp/tokenize/pyicu.py b/pythainlp/tokenize/pyicu.py index 78a7ddad7..44a124fc6 100644 --- a/pythainlp/tokenize/pyicu.py +++ b/pythainlp/tokenize/pyicu.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Wrapper for PyICU word segmentation. This wrapper module uses diff --git a/pythainlp/tokenize/sefr_cut.py b/pythainlp/tokenize/sefr_cut.py index 1d19a12d6..34579e2eb 100644 --- a/pythainlp/tokenize/sefr_cut.py +++ b/pythainlp/tokenize/sefr_cut.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Wrapper for SEFR CUT Thai word segmentation. SEFR CUT is a diff --git a/pythainlp/tokenize/ssg.py b/pythainlp/tokenize/ssg.py index b91cc0e87..70074fd0b 100644 --- a/pythainlp/tokenize/ssg.py +++ b/pythainlp/tokenize/ssg.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List diff --git a/pythainlp/tokenize/tcc.py b/pythainlp/tokenize/tcc.py index bfeab7aca..c22477bca 100644 --- a/pythainlp/tokenize/tcc.py +++ b/pythainlp/tokenize/tcc.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ The implementation of tokenizer according to Thai Character Clusters (TCCs) diff --git a/pythainlp/tokenize/tcc_p.py b/pythainlp/tokenize/tcc_p.py index 55d30099f..21a9cdef6 100644 --- a/pythainlp/tokenize/tcc_p.py +++ b/pythainlp/tokenize/tcc_p.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ The implementation of tokenizer according to Thai Character Clusters (TCCs) diff --git a/pythainlp/tokenize/thaisumcut.py b/pythainlp/tokenize/thaisumcut.py index 3c48a6e6d..2ab8152a3 100644 --- a/pythainlp/tokenize/thaisumcut.py +++ b/pythainlp/tokenize/thaisumcut.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-FileCopyrightText: Copyright 2020 Nakhun Chumpolsathien # SPDX-License-Identifier: Apache-2.0 """ diff --git a/pythainlp/tokenize/tltk.py b/pythainlp/tokenize/tltk.py index 2a70a1b91..a8a6b0c56 100644 --- a/pythainlp/tokenize/tltk.py +++ b/pythainlp/tokenize/tltk.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List try: diff --git a/pythainlp/tokenize/wtsplit.py b/pythainlp/tokenize/wtsplit.py index 4955a2d64..027f02eb6 100644 --- a/pythainlp/tokenize/wtsplit.py +++ b/pythainlp/tokenize/wtsplit.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Where's the Point? Self-Supervised Multilingual Punctuation-Agnostic Sentence Segmentation diff --git a/pythainlp/tools/__init__.py b/pythainlp/tools/__init__.py index d818ab69a..0e07a37b5 100644 --- a/pythainlp/tools/__init__.py +++ b/pythainlp/tools/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 __all__ = [ "PYTHAINLP_DEFAULT_DATA_DIR", diff --git a/pythainlp/tools/misspell.py b/pythainlp/tools/misspell.py index a5d9cb851..e9c5b3061 100644 --- a/pythainlp/tools/misspell.py +++ b/pythainlp/tools/misspell.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List import numpy as np diff --git a/pythainlp/tools/path.py b/pythainlp/tools/path.py index 1b2b8fabc..b3124c16e 100644 --- a/pythainlp/tools/path.py +++ b/pythainlp/tools/path.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ PyThaiNLP data tools diff --git a/pythainlp/translate/__init__.py b/pythainlp/translate/__init__.py index d0c12bf8d..10f011eda 100644 --- a/pythainlp/translate/__init__.py +++ b/pythainlp/translate/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Language translation. diff --git a/pythainlp/translate/core.py b/pythainlp/translate/core.py index d41659b1b..8482d4714 100644 --- a/pythainlp/translate/core.py +++ b/pythainlp/translate/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 diff --git a/pythainlp/translate/en_th.py b/pythainlp/translate/en_th.py index 439661135..2074c2112 100644 --- a/pythainlp/translate/en_th.py +++ b/pythainlp/translate/en_th.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ English-Thai Machine Translation diff --git a/pythainlp/translate/th_fr.py b/pythainlp/translate/th_fr.py index d1a7e1fe7..e9a97aa21 100644 --- a/pythainlp/translate/th_fr.py +++ b/pythainlp/translate/th_fr.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai-French Machine Translation diff --git a/pythainlp/translate/zh_th.py b/pythainlp/translate/zh_th.py index 74322f88f..daacd7c07 100644 --- a/pythainlp/translate/zh_th.py +++ b/pythainlp/translate/zh_th.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Lalita Chinese-Thai Machine Translation diff --git a/pythainlp/transliterate/__init__.py b/pythainlp/transliterate/__init__.py index dfc9aae00..bf5789d3d 100644 --- a/pythainlp/transliterate/__init__.py +++ b/pythainlp/transliterate/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Transliteration. diff --git a/pythainlp/transliterate/core.py b/pythainlp/transliterate/core.py index 385127a0e..b1d3d502e 100644 --- a/pythainlp/transliterate/core.py +++ b/pythainlp/transliterate/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 DEFAULT_ROMANIZE_ENGINE = "royin" diff --git a/pythainlp/transliterate/ipa.py b/pythainlp/transliterate/ipa.py index b04877e6f..840dce484 100644 --- a/pythainlp/transliterate/ipa.py +++ b/pythainlp/transliterate/ipa.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Transliterating text to International Phonetic Alphabet (IPA) diff --git a/pythainlp/transliterate/iso_11940.py b/pythainlp/transliterate/iso_11940.py index bf0f93555..d741e8f2d 100644 --- a/pythainlp/transliterate/iso_11940.py +++ b/pythainlp/transliterate/iso_11940.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Transliterating Thai text using ISO 11940 diff --git a/pythainlp/transliterate/lookup.py b/pythainlp/transliterate/lookup.py index 0d946c96b..7d8082103 100644 --- a/pythainlp/transliterate/lookup.py +++ b/pythainlp/transliterate/lookup.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Look up romanized Thai words in a predefined dictionary compiled by Wannaphong, 2022. diff --git a/pythainlp/transliterate/pyicu.py b/pythainlp/transliterate/pyicu.py index a344b2636..8423b2bcc 100644 --- a/pythainlp/transliterate/pyicu.py +++ b/pythainlp/transliterate/pyicu.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Transliterating text to International Phonetic Alphabet (IPA) diff --git a/pythainlp/transliterate/royin.py b/pythainlp/transliterate/royin.py index 88f0ff054..3489a13c3 100644 --- a/pythainlp/transliterate/royin.py +++ b/pythainlp/transliterate/royin.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ The Royal Thai General System of Transcription (RTGS) diff --git a/pythainlp/transliterate/spoonerism.py b/pythainlp/transliterate/spoonerism.py index e450aeefd..de994941b 100644 --- a/pythainlp/transliterate/spoonerism.py +++ b/pythainlp/transliterate/spoonerism.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from pythainlp.transliterate import pronunciate from pythainlp import thai_consonants diff --git a/pythainlp/transliterate/thai2rom.py b/pythainlp/transliterate/thai2rom.py index 2791e2d3f..f3fea086a 100644 --- a/pythainlp/transliterate/thai2rom.py +++ b/pythainlp/transliterate/thai2rom.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Romanization of Thai words based on machine-learnt engine ("thai2rom") diff --git a/pythainlp/transliterate/thai2rom_onnx.py b/pythainlp/transliterate/thai2rom_onnx.py index 6ee96b537..147457392 100644 --- a/pythainlp/transliterate/thai2rom_onnx.py +++ b/pythainlp/transliterate/thai2rom_onnx.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Romanization of Thai words based on machine-learnt engine in ONNX runtime ("thai2rom") diff --git a/pythainlp/transliterate/thaig2p.py b/pythainlp/transliterate/thaig2p.py index 661c26f88..5923b897e 100644 --- a/pythainlp/transliterate/thaig2p.py +++ b/pythainlp/transliterate/thaig2p.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai Grapheme-to-Phoneme (Thai G2P) diff --git a/pythainlp/transliterate/tltk.py b/pythainlp/transliterate/tltk.py index 86e453149..92008b4df 100644 --- a/pythainlp/transliterate/tltk.py +++ b/pythainlp/transliterate/tltk.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 try: from tltk.nlp import g2p, th2ipa, th2roman diff --git a/pythainlp/transliterate/w2p.py b/pythainlp/transliterate/w2p.py index b6f5880d1..96034602f 100644 --- a/pythainlp/transliterate/w2p.py +++ b/pythainlp/transliterate/w2p.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai Word-to-Phoneme (Thai W2P) diff --git a/pythainlp/transliterate/wunsen.py b/pythainlp/transliterate/wunsen.py index ccde30b31..dcfa05a78 100644 --- a/pythainlp/transliterate/wunsen.py +++ b/pythainlp/transliterate/wunsen.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Transliterating Japanese/Korean/Mandarin/Vietnamese romanization text diff --git a/pythainlp/ulmfit/__init__.py b/pythainlp/ulmfit/__init__.py index 4fdf3b518..ce272b249 100644 --- a/pythainlp/ulmfit/__init__.py +++ b/pythainlp/ulmfit/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Universal Language Model Fine-tuning for Text Classification (ULMFiT). diff --git a/pythainlp/ulmfit/core.py b/pythainlp/ulmfit/core.py index 834ecf17b..f1e6d7a91 100644 --- a/pythainlp/ulmfit/core.py +++ b/pythainlp/ulmfit/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Universal Language Model Fine-tuning for Text Classification (ULMFiT). diff --git a/pythainlp/ulmfit/preprocess.py b/pythainlp/ulmfit/preprocess.py index 6a3c5a027..41b319121 100644 --- a/pythainlp/ulmfit/preprocess.py +++ b/pythainlp/ulmfit/preprocess.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Preprocessing for ULMFiT diff --git a/pythainlp/ulmfit/tokenizer.py b/pythainlp/ulmfit/tokenizer.py index 0f726184c..9801cb0cc 100644 --- a/pythainlp/ulmfit/tokenizer.py +++ b/pythainlp/ulmfit/tokenizer.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Tokenzier classes for ULMFiT diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py index f8b0aacfa..6807c9d53 100644 --- a/pythainlp/util/__init__.py +++ b/pythainlp/util/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Utility functions, like date conversion and digit conversion diff --git a/pythainlp/util/abbreviation.py b/pythainlp/util/abbreviation.py index 1c48e3d94..f84297e63 100644 --- a/pythainlp/util/abbreviation.py +++ b/pythainlp/util/abbreviation.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai abbreviation tools diff --git a/pythainlp/util/collate.py b/pythainlp/util/collate.py index 0e1e1856c..7c2395d57 100644 --- a/pythainlp/util/collate.py +++ b/pythainlp/util/collate.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai collation (sorted according to Thai dictionary order) diff --git a/pythainlp/util/date.py b/pythainlp/util/date.py index 78c4c4dc6..52723c2bb 100644 --- a/pythainlp/util/date.py +++ b/pythainlp/util/date.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai date/time conversion. diff --git a/pythainlp/util/digitconv.py b/pythainlp/util/digitconv.py index b0cfad432..301985b63 100644 --- a/pythainlp/util/digitconv.py +++ b/pythainlp/util/digitconv.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Convert digits diff --git a/pythainlp/util/emojiconv.py b/pythainlp/util/emojiconv.py index 7f6ed2302..060a93c82 100644 --- a/pythainlp/util/emojiconv.py +++ b/pythainlp/util/emojiconv.py @@ -1,5 +1,5 @@ # -*- coding_utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Convert emojis diff --git a/pythainlp/util/encoding.py b/pythainlp/util/encoding.py index 470628a0e..e6e5a39f5 100644 --- a/pythainlp/util/encoding.py +++ b/pythainlp/util/encoding.py @@ -1,5 +1,5 @@ # -*- coding_utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 def tis620_to_utf8(text: str)->str: """ diff --git a/pythainlp/util/keyboard.py b/pythainlp/util/keyboard.py index c5fb7ecb2..4f7ccfaa2 100644 --- a/pythainlp/util/keyboard.py +++ b/pythainlp/util/keyboard.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Functions related to keyboard layout. diff --git a/pythainlp/util/keywords.py b/pythainlp/util/keywords.py index fba02749b..ec07d8327 100644 --- a/pythainlp/util/keywords.py +++ b/pythainlp/util/keywords.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from collections import Counter from typing import Dict, List diff --git a/pythainlp/util/morse.py b/pythainlp/util/morse.py index 89c42871f..07241fa12 100644 --- a/pythainlp/util/morse.py +++ b/pythainlp/util/morse.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 THAI_MORSE_CODE = { diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py index a225d949b..3276e6e8d 100644 --- a/pythainlp/util/normalize.py +++ b/pythainlp/util/normalize.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Text normalization diff --git a/pythainlp/util/numtoword.py b/pythainlp/util/numtoword.py index b339b0795..0b7f30ffe 100644 --- a/pythainlp/util/numtoword.py +++ b/pythainlp/util/numtoword.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Convert number value to Thai read out diff --git a/pythainlp/util/phoneme.py b/pythainlp/util/phoneme.py index a2e66b9b4..075d43a9d 100644 --- a/pythainlp/util/phoneme.py +++ b/pythainlp/util/phoneme.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Phonemes util diff --git a/pythainlp/util/pronounce.py b/pythainlp/util/pronounce.py index 85f0c71ed..db0869b7f 100644 --- a/pythainlp/util/pronounce.py +++ b/pythainlp/util/pronounce.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List diff --git a/pythainlp/util/remove_trailing_repeat_consonants.py b/pythainlp/util/remove_trailing_repeat_consonants.py index c3af5ae68..e0770703e 100644 --- a/pythainlp/util/remove_trailing_repeat_consonants.py +++ b/pythainlp/util/remove_trailing_repeat_consonants.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Removement of repeated consonants at the end of words diff --git a/pythainlp/util/spell_words.py b/pythainlp/util/spell_words.py index e11716777..86f255dc7 100644 --- a/pythainlp/util/spell_words.py +++ b/pythainlp/util/spell_words.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import re from typing import List diff --git a/pythainlp/util/strftime.py b/pythainlp/util/strftime.py index c90074e7d..8bd67aba0 100644 --- a/pythainlp/util/strftime.py +++ b/pythainlp/util/strftime.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai date/time formatting. diff --git a/pythainlp/util/syllable.py b/pythainlp/util/syllable.py index 79aafff3c..ef0a31f2f 100644 --- a/pythainlp/util/syllable.py +++ b/pythainlp/util/syllable.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Syllable tools diff --git a/pythainlp/util/thai.py b/pythainlp/util/thai.py index 272765fcc..ae95c04e6 100644 --- a/pythainlp/util/thai.py +++ b/pythainlp/util/thai.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Check if it is Thai text diff --git a/pythainlp/util/thaiwordcheck.py b/pythainlp/util/thaiwordcheck.py index 5ae88efe2..0fe3f296b 100644 --- a/pythainlp/util/thaiwordcheck.py +++ b/pythainlp/util/thaiwordcheck.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import warnings diff --git a/pythainlp/util/time.py b/pythainlp/util/time.py index 001ab676a..26e97cf6c 100644 --- a/pythainlp/util/time.py +++ b/pythainlp/util/time.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Spell out time as Thai words. diff --git a/pythainlp/util/trie.py b/pythainlp/util/trie.py index ddde363c3..ef01099ae 100644 --- a/pythainlp/util/trie.py +++ b/pythainlp/util/trie.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Trie data structure. diff --git a/pythainlp/util/wordtonum.py b/pythainlp/util/wordtonum.py index f333ef2ae..acb0b8217 100644 --- a/pythainlp/util/wordtonum.py +++ b/pythainlp/util/wordtonum.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Convert number in words to a computable number value diff --git a/pythainlp/wangchanberta/__init__.py b/pythainlp/wangchanberta/__init__.py index fde8d9e93..5efeb502c 100644 --- a/pythainlp/wangchanberta/__init__.py +++ b/pythainlp/wangchanberta/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 __all__ = [ "NamedEntityRecognition", diff --git a/pythainlp/wangchanberta/core.py b/pythainlp/wangchanberta/core.py index a389d015e..afae29c40 100644 --- a/pythainlp/wangchanberta/core.py +++ b/pythainlp/wangchanberta/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple, Union import re diff --git a/pythainlp/word_vector/__init__.py b/pythainlp/word_vector/__init__.py index 268023bb6..fea315f4b 100644 --- a/pythainlp/word_vector/__init__.py +++ b/pythainlp/word_vector/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ thai2fit - Thai word vector. diff --git a/pythainlp/word_vector/core.py b/pythainlp/word_vector/core.py index de127edf2..03932a735 100644 --- a/pythainlp/word_vector/core.py +++ b/pythainlp/word_vector/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple diff --git a/pythainlp/wsd/__init__.py b/pythainlp/wsd/__init__.py index 03ac08fe6..357b4bc44 100644 --- a/pythainlp/wsd/__init__.py +++ b/pythainlp/wsd/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Thai Word Sense Disambiguation (WSD) diff --git a/pythainlp/wsd/core.py b/pythainlp/wsd/core.py index 6acd0ea9e..71ac033f3 100644 --- a/pythainlp/wsd/core.py +++ b/pythainlp/wsd/core.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 from typing import List, Tuple, Union diff --git a/setup.py b/setup.py index 34c22c684..a7e314fad 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Setup script for PyThaiNLP. diff --git a/tests/__init__.py b/tests/__init__.py index 63796acb2..95434038f 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ Unit test. diff --git a/tests/test_ancient.py b/tests/test_ancient.py index dbcb1e3bc..349a7d3e5 100644 --- a/tests/test_ancient.py +++ b/tests/test_ancient.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest from pythainlp.ancient import aksonhan_to_current diff --git a/tests/test_augment.py b/tests/test_augment.py index 28c9a76de..5b2d81380 100644 --- a/tests/test_augment.py +++ b/tests/test_augment.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py index db945da48..c14a1a94d 100644 --- a/tests/test_benchmarks.py +++ b/tests/test_benchmarks.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_classify.py b/tests/test_classify.py index 37084d63e..e993a9a9a 100644 --- a/tests/test_classify.py +++ b/tests/test_classify.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_cli.py b/tests/test_cli.py index b1b8dff34..4f9258536 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_coref.py b/tests/test_coref.py index 3329e5e40..2aaf2a8ff 100644 --- a/tests/test_coref.py +++ b/tests/test_coref.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_corpus.py b/tests/test_corpus.py index 8e488692d..031e7a883 100644 --- a/tests/test_corpus.py +++ b/tests/test_corpus.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import os diff --git a/tests/test_el.py b/tests/test_el.py index c41d42bc7..9266ca639 100644 --- a/tests/test_el.py +++ b/tests/test_el.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_generate.py b/tests/test_generate.py index 4be6976af..6114300fe 100644 --- a/tests/test_generate.py +++ b/tests/test_generate.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest @@ -15,11 +15,13 @@ def test_unigram(self): self.assertIsNotNone(_tnc_unigram.gen_sentence("ผม", output_str=False)) self.assertIsNotNone(_tnc_unigram.gen_sentence()) self.assertIsNotNone(_tnc_unigram.gen_sentence(duplicate=True)) + _ttc_unigram = Unigram("ttc") self.assertIsNotNone(_ttc_unigram.gen_sentence("ผม")) self.assertIsNotNone(_ttc_unigram.gen_sentence("ผม", output_str=False)) self.assertIsNotNone(_ttc_unigram.gen_sentence()) self.assertIsNotNone(_ttc_unigram.gen_sentence(duplicate=True)) + _oscar_unigram = Unigram("oscar") self.assertIsNotNone(_oscar_unigram.gen_sentence("ผม")) self.assertIsNotNone( diff --git a/tests/test_khavee.py b/tests/test_khavee.py index 05e1619b0..7dc5538d8 100644 --- a/tests/test_khavee.py +++ b/tests/test_khavee.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_misspell.py b/tests/test_misspell.py index 2995f32f0..cc479abd2 100644 --- a/tests/test_misspell.py +++ b/tests/test_misspell.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_morpheme.py b/tests/test_morpheme.py index af8250360..7a2f054eb 100644 --- a/tests/test_morpheme.py +++ b/tests/test_morpheme.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_parse.py b/tests/test_parse.py index d247b2c55..265bcfc32 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_soundex.py b/tests/test_soundex.py index 16da586c4..35bd718be 100644 --- a/tests/test_soundex.py +++ b/tests/test_soundex.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_spell.py b/tests/test_spell.py index 6918a3e04..094bbf45c 100644 --- a/tests/test_spell.py +++ b/tests/test_spell.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_summarize.py b/tests/test_summarize.py index 6d5cf32e7..faaafc082 100644 --- a/tests/test_summarize.py +++ b/tests/test_summarize.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_tag.py b/tests/test_tag.py index 550c6105c..666f613e3 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index 2f420792f..ad5a1f5e9 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_tools.py b/tests/test_tools.py index 760c1243a..048da92d4 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_util.py b/tests/test_util.py index 9e4bc18ac..a19651a60 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 """ diff --git a/tests/test_wsd.py b/tests/test_wsd.py index 9795e80e3..b02f57fcf 100644 --- a/tests/test_wsd.py +++ b/tests/test_wsd.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project +# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project # SPDX-License-Identifier: Apache-2.0 import unittest