Skip to content

Commit 9610b54

Browse files
authored
Disable custom_dict for nlpo3 engine in word_tokenize()
Currently cannot handle custom_dict from inside word_tokenize(), due to difference in type.
1 parent 4b80066 commit 9610b54

File tree

1 file changed

+15
-13
lines changed

1 file changed

+15
-13
lines changed

pythainlp/tokenize/core.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def word_tokenize(
128128
129129
:param str text: text to be tokenized
130130
:param str engine: name of the tokenizer to be used
131-
:param pythainlp.util.Trie custom_dict: dictionary trie
131+
:param pythainlp.util.Trie custom_dict: dictionary trie (some engine may not support)
132132
:param bool keep_whitespace: True to keep whitespace, a common mark
133133
for end of phrase in Thai.
134134
Otherwise, whitespace is omitted.
@@ -290,18 +290,20 @@ def word_tokenize(
290290
segments = segment(text)
291291
elif engine == "nlpo3":
292292
from pythainlp.tokenize.nlpo3 import segment
293-
294-
if isinstance(custom_dict, str):
295-
segments = segment(text, custom_dict=custom_dict)
296-
elif not isinstance(custom_dict, str) and not custom_dict:
297-
raise ValueError(
298-
f"""Tokenizer \"{engine}\":
299-
custom_dict must be a str.
300-
It is a dictionary name as assigned with load_dict().
301-
See pythainlp.tokenize.nlpo3.load_dict()"""
302-
)
303-
else:
304-
segments = segment(text)
293+
# Currently cannot handle custom_dict from inside word_tokenize(),
294+
# due to difference in type.
295+
#if isinstance(custom_dict, str):
296+
# segments = segment(text, custom_dict=custom_dict)
297+
#elif not isinstance(custom_dict, str) and not custom_dict:
298+
# raise ValueError(
299+
# f"""Tokenizer \"{engine}\":
300+
# custom_dict must be a str.
301+
# It is a dictionary name as assigned with load_dict().
302+
# See pythainlp.tokenize.nlpo3.load_dict()"""
303+
# )
304+
#else:
305+
# segments = segment(text)
306+
segments = segment(text)
305307
else:
306308
raise ValueError(
307309
f"""Tokenizer \"{engine}\" not found.

0 commit comments

Comments
 (0)