@@ -128,7 +128,7 @@ def word_tokenize(
128
128
129
129
:param str text: text to be tokenized
130
130
:param str engine: name of the tokenizer to be used
131
- :param pythainlp.util.Trie custom_dict: dictionary trie
131
+ :param pythainlp.util.Trie custom_dict: dictionary trie (some engine may not support)
132
132
:param bool keep_whitespace: True to keep whitespace, a common mark
133
133
for end of phrase in Thai.
134
134
Otherwise, whitespace is omitted.
@@ -290,18 +290,20 @@ def word_tokenize(
290
290
segments = segment (text )
291
291
elif engine == "nlpo3" :
292
292
from pythainlp .tokenize .nlpo3 import segment
293
-
294
- if isinstance (custom_dict , str ):
295
- segments = segment (text , custom_dict = custom_dict )
296
- elif not isinstance (custom_dict , str ) and not custom_dict :
297
- raise ValueError (
298
- f"""Tokenizer \" { engine } \" :
299
- custom_dict must be a str.
300
- It is a dictionary name as assigned with load_dict().
301
- See pythainlp.tokenize.nlpo3.load_dict()"""
302
- )
303
- else :
304
- segments = segment (text )
293
+ # Currently cannot handle custom_dict from inside word_tokenize(),
294
+ # due to difference in type.
295
+ #if isinstance(custom_dict, str):
296
+ # segments = segment(text, custom_dict=custom_dict)
297
+ #elif not isinstance(custom_dict, str) and not custom_dict:
298
+ # raise ValueError(
299
+ # f"""Tokenizer \"{engine}\":
300
+ # custom_dict must be a str.
301
+ # It is a dictionary name as assigned with load_dict().
302
+ # See pythainlp.tokenize.nlpo3.load_dict()"""
303
+ # )
304
+ #else:
305
+ # segments = segment(text)
306
+ segments = segment (text )
305
307
else :
306
308
raise ValueError (
307
309
f"""Tokenizer \" { engine } \" not found.
0 commit comments