Skip to content

Commit f0cc7cf

Browse files
committedAug 17, 2020
adding the new tests file
1 parent 05069e2 commit f0cc7cf

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed
 

‎test.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import unittest
2+
from tkseem.util import remove_tashkeel
Has conversations. Original line has conversations.
3+
import tkseem as tk
4+
5+
6+
class TestUnit(unittest.TestCase):
7+
def test_tashkeel(self):
8+
self.assertEqual(
9+
remove_tashkeel("مِكَرٍّ مِفَرٍّ مُقبِلٍ مُدبِرٍ مَعًا"),
10+
"مكر مفر مقبل مدبر معا",
11+
"Remove Tashkeel is not working",
12+
)
13+
14+
15+
class TokenizersTestUnit(unittest.TestCase):
16+
sample_text = "مرحبا أيها الأصدقاء"
17+
token = "نص"
18+
tokenizer = None
19+
token_id = None
20+
21+
def test_tokenize(self):
22+
tokenized = self.tokenizer.tokenize(self.sample_text)
23+
print(f"{self.tokenizer} tokenize() output:", tokenized)
24+
return self.assertIsNotNone(tokenized)
25+
26+
def test_detokenize(self):
27+
tokenized = self.tokenizer.tokenize(self.sample_text)
28+
detokenized = self.tokenizer.detokenize(tokenized)
29+
print(
30+
f"{self.tokenizer} detokenize() output on the previously tokenized text:",
31+
detokenized,
32+
)
33+
return self.assertIsNotNone(detokenized)
34+
35+
def test_token_to_id(self):
36+
self.token_id = self.tokenizer.token_to_id(self.token)
37+
print(f"{self.tokenizer} token_to_id() output:", self.token_id)
38+
return self.assertIsNotNone(self.token_id)
39+
40+
41+
word_tokenizer = tk.WordTokenizer()
42+
word_tokenizer.train("tasks/samples/data.txt")
43+
TokenizersTestUnit.tokenizer = word_tokenizer
44+
unittest.main(argv=["first-arg-is-ignored"], exit=False)
45+

0 commit comments

Comments
 (0)
Please sign in to comment.