include dictionaries to the wheel using manifest.

zaidalyafeai · zaidalyafeai · commit 1c7fb0902737 · 2020-07-29T04:58:32.000+03:00
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1 @@
+include tkseem/dictionaries/*.pl
diff --git a/setup.py b/setup.py
@@ -13,4 +13,5 @@
       license='MIT',
       packages=['tkseem'],
       install_requires=required,
+      include_package_data=True,
       zip_safe=False)
diff --git a/tkseem/tokenizers.py b/tkseem/tokenizers.py
@@ -52,8 +52,13 @@ def __init__(
         self.clean = clean
         self.normalize = normalize
         self.split = split
-        self.norm_dict = pickle.load(open("dictionaries/normalization_dictionary.pl", "rb"))
-        self.cached = pickle.load(open("dictionaries/cached.pl", "rb"))
+
+        # relative path
+        self.rel_path = os.path.dirname(__file__)
+        norm_dict_path = os.path.join(self.rel_path, 'dictionaries/normalization_dictionary.pl')
+        cach_dict_path = os.path.join(self.rel_path, 'dictionaries/cached.pl')
+        self.norm_dict = pickle.load(open(norm_dict_path, "rb"))
+        self.cached = pickle.load(open(cach_dict_path, "rb"))
 
         if self.segment:
             print("Initializing Farasa")
@@ -593,9 +598,10 @@ class AutoTokenizer(BaseTokenizer):
     """ Auto tokenization using a saved dictionary 
     """
 
-    def train(self, vocab_path="dictionaries/vocab.pl"):
+    def train(self):
         """Use a default dictionary for training"""
         print("Training AutoTokenizer...")
+        vocab_path = os.path.join(self.rel_path, 'dictionaries/vocab.pl')
         self.vocab = self._truncate_dict(pickle.load(open(vocab_path, "rb")))
 
     def tokenize(self, text, cache=False):