PyThaiNLP · wannaphong · Jul 21, 2024 · Apr 26, 2024 · May 6, 2024 · May 6, 2024
diff --git a/.gitignore b/.gitignore
@@ -117,4 +117,5 @@ cython_debug/
 notebooks/iso_11940-dev.ipynb
 
 # vscode devcontainer
-.devcontainer/
+.devcontainer/
+notebooks/d.model
diff --git a/notebooks/test_gzip_classify.ipynb b/notebooks/test_gzip_classify.ipynb
@@ -60,11 +60,60 @@
    "source": [
     "model.predict(\"ฉันดีใจ\", k=1)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "5a97f0d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.save(\"d.model\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "6e183243",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model2 = pythainlp.classify.param_free.GzipModel(model_path=\"d.model\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b30af6f0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Positive'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model2.predict(x1=\"ฉันดีใจ\", k=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e72a33b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3.8.13 ('base')",
    "language": "python",
    "name": "python3"
   },
@@ -78,7 +127,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "a1d6ff38954a1cdba4cf61ffa51e42f4658fc35985cd256cd89123cae8466a39"
+   }
   }
  },
  "nbformat": 4,

diff --git a/pythainlp/classify/param_free.py b/pythainlp/classify/param_free.py
@@ -5,6 +5,7 @@
 import gzip
 from typing import List, Tuple
 import numpy as np
+import json
 
 
 class GzipModel:
@@ -14,11 +15,15 @@ class GzipModel:
     (Jiang et al., Findings 2023)
 
     :param list training_data: list [(text_sample,label)]
+    :param str model_path: Path for loading model (if you saved the model)
     """
 
-    def __init__(self, training_data: List[Tuple[str, str]]):
-        self.training_data = np.array(training_data)
-        self.Cx2_list = self.train()
+    def __init__(self, training_data: List[Tuple[str, str]] = None, model_path: str = None):
+        if model_path is not None:
+            self.load(model_path)
+        else:
+            self.training_data = np.array(training_data)
+            self.Cx2_list = self.train()
 
     def train(self):
         Cx2_list = []
@@ -72,3 +77,19 @@ def predict(self, x1: str, k: int = 1) -> str:
         predict_class = top_k_class[counts.argmax()]
 
         return predict_class
+
+    def save(self, path: str):
+        """
+        :param str path: path for save model
+        """
+        with open(path, "w") as f:
+            json.dump({
+                "training_data": self.training_data.tolist(),
+                "Cx2_list": self.Cx2_list
+            }, f, ensure_ascii=False)
+
+    def load(self, path: str):
+        with open(path, "r") as f:
+            data = json.load(f)
+            self.Cx2_list = data["Cx2_list"]
+            self.training_data = np.array(data["training_data"])