Skip to content

Commit 1c7fb09

Browse files
committed
include dictionaries to the wheel using manifest.
1 parent 116d3e0 commit 1c7fb09

File tree

3 files changed

+11
-3
lines changed

3 files changed

+11
-3
lines changed

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
include tkseem/dictionaries/*.pl

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@
1313
license='MIT',
1414
packages=['tkseem'],
1515
install_requires=required,
16+
include_package_data=True,
1617
zip_safe=False)

tkseem/tokenizers.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,13 @@ def __init__(
5252
self.clean = clean
5353
self.normalize = normalize
5454
self.split = split
55-
self.norm_dict = pickle.load(open("dictionaries/normalization_dictionary.pl", "rb"))
56-
self.cached = pickle.load(open("dictionaries/cached.pl", "rb"))
55+
56+
# relative path
57+
self.rel_path = os.path.dirname(__file__)
58+
norm_dict_path = os.path.join(self.rel_path, 'dictionaries/normalization_dictionary.pl')
59+
cach_dict_path = os.path.join(self.rel_path, 'dictionaries/cached.pl')
60+
self.norm_dict = pickle.load(open(norm_dict_path, "rb"))
61+
self.cached = pickle.load(open(cach_dict_path, "rb"))
5762

5863
if self.segment:
5964
print("Initializing Farasa")
@@ -593,9 +598,10 @@ class AutoTokenizer(BaseTokenizer):
593598
""" Auto tokenization using a saved dictionary
594599
"""
595600

596-
def train(self, vocab_path="dictionaries/vocab.pl"):
601+
def train(self):
597602
"""Use a default dictionary for training"""
598603
print("Training AutoTokenizer...")
604+
vocab_path = os.path.join(self.rel_path, 'dictionaries/vocab.pl')
599605
self.vocab = self._truncate_dict(pickle.load(open(vocab_path, "rb")))
600606

601607
def tokenize(self, text, cache=False):

0 commit comments

Comments
 (0)