File tree 3 files changed +11
-3
lines changed
3 files changed +11
-3
lines changed Original file line number Diff line number Diff line change
1
+ include tkseem/dictionaries/*.pl
Original file line number Diff line number Diff line change 13
13
license = 'MIT' ,
14
14
packages = ['tkseem' ],
15
15
install_requires = required ,
16
+ include_package_data = True ,
16
17
zip_safe = False )
Original file line number Diff line number Diff line change @@ -52,8 +52,13 @@ def __init__(
52
52
self .clean = clean
53
53
self .normalize = normalize
54
54
self .split = split
55
- self .norm_dict = pickle .load (open ("dictionaries/normalization_dictionary.pl" , "rb" ))
56
- self .cached = pickle .load (open ("dictionaries/cached.pl" , "rb" ))
55
+
56
+ # relative path
57
+ self .rel_path = os .path .dirname (__file__ )
58
+ norm_dict_path = os .path .join (self .rel_path , 'dictionaries/normalization_dictionary.pl' )
59
+ cach_dict_path = os .path .join (self .rel_path , 'dictionaries/cached.pl' )
60
+ self .norm_dict = pickle .load (open (norm_dict_path , "rb" ))
61
+ self .cached = pickle .load (open (cach_dict_path , "rb" ))
57
62
58
63
if self .segment :
59
64
print ("Initializing Farasa" )
@@ -593,9 +598,10 @@ class AutoTokenizer(BaseTokenizer):
593
598
""" Auto tokenization using a saved dictionary
594
599
"""
595
600
596
- def train (self , vocab_path = "dictionaries/vocab.pl" ):
601
+ def train (self ):
597
602
"""Use a default dictionary for training"""
598
603
print ("Training AutoTokenizer..." )
604
+ vocab_path = os .path .join (self .rel_path , 'dictionaries/vocab.pl' )
599
605
self .vocab = self ._truncate_dict (pickle .load (open (vocab_path , "rb" )))
600
606
601
607
def tokenize (self , text , cache = False ):
You can’t perform that action at this time.
0 commit comments