From 5ce054ace57b60e9e7e75e64dcbd6dfb9cb07720 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Wed, 18 Dec 2024 14:22:10 -0600 Subject: [PATCH 1/2] bug in build_tailored --- encexp/__init__.py | 2 +- encexp/text_repr.py | 55 +++++++++++++++++++++++---------------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/encexp/__init__.py b/encexp/__init__.py index a577539..5ab496c 100644 --- a/encexp/__init__.py +++ b/encexp/__init__.py @@ -17,4 +17,4 @@ if not '-m' in sys.argv: from encexp.text_repr import EncExp, EncExpT, SeqTM, TM -__version__ = "0.0.19" +__version__ = "0.0.20" diff --git a/encexp/text_repr.py b/encexp/text_repr.py index b3da669..d59fddb 100644 --- a/encexp/text_repr.py +++ b/encexp/text_repr.py @@ -477,28 +477,6 @@ def decision_function(self, texts): return np.c_[hy] return hy - def train_predict_decision_function(self, D, y=None): - """Train and predict the decision""" - if y is None: - y = np.array([x['klass'] for x in D]) - if not isinstance(y, np.ndarray): - y = np.array(y) - nclass = np.unique(y).shape[0] - X = self.transform(D) - if nclass == 2: - hy = np.empty(X.shape[0]) - else: - hy = np.empty((X.shape[0], nclass)) - kwargs = dict(random_state=0, shuffle=True) - if self.kfold_kwargs is not None: - kwargs.update(self.kfold_kwargs) - for tr, vs in self.kfold_class(**kwargs).split(X, y): - m = clone(self).estimator.fit(X[tr], y[tr]) - hy[vs] = m.decision_function(X[vs]) - if hy.ndim == 1: - return np.c_[hy] - return hy - def fill(self, inplace: bool=True, names: list=None): """Fill weights with the missing dimensions""" weights = self.weights @@ -528,10 +506,11 @@ def build_tailored(self, data, load=False, **kwargs): return None get_text = self.bow.get_text - if load and isinstance(self.tailored, str) and isfile(self.tailored): - _ = self.__class__(EncExp_filename=self.tailored) - self.__iadd__(_) - self._tailored_built = True + if isinstance(self.tailored, str) and isfile(self.tailored): + if load: + _ = self.__class__(EncExp_filename=self.tailored) + self.__iadd__(_) + self._tailored_built = True return None iden, path = mkstemp() with open(iden, 'w', encoding='utf-8') as fpt: @@ -649,7 +628,29 @@ def estimator(self): @estimator.setter def estimator(self, value): - self._estimator = value + self._estimator = value + + def train_predict_decision_function(self, D, y=None): + """Train and predict the decision""" + if y is None: + y = np.array([x['klass'] for x in D]) + if not isinstance(y, np.ndarray): + y = np.array(y) + nclass = np.unique(y).shape[0] + X = self.transform(D) + if nclass == 2: + hy = np.empty(X.shape[0]) + else: + hy = np.empty((X.shape[0], nclass)) + kwargs = dict(random_state=0, shuffle=True) + if self.kfold_kwargs is not None: + kwargs.update(self.kfold_kwargs) + for tr, vs in self.kfold_class(**kwargs).split(X, y): + m = clone(self).estimator.fit(X[tr], y[tr]) + hy[vs] = m.decision_function(X[vs]) + if hy.ndim == 1: + return np.c_[hy] + return hy def __sklearn_clone__(self): ins = super(EncExp, self).__sklearn_clone__() From 7646c30dd439d4a17320196a44f9d0cf71614746 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Wed, 18 Dec 2024 14:23:03 -0600 Subject: [PATCH 2/2] Moving methods to EncExp --- encexp/text_repr.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/encexp/text_repr.py b/encexp/text_repr.py index d59fddb..b253265 100644 --- a/encexp/text_repr.py +++ b/encexp/text_repr.py @@ -464,19 +464,6 @@ def transform(self, texts): return X / np.c_[_norm] return X - def predict(self, texts): - """Predict""" - X = self.transform(texts) - return self.estimator.predict(X) - - def decision_function(self, texts): - """Decision function""" - X = self.transform(texts) - hy = self.estimator.decision_function(X) - if hy.ndim == 1: - return np.c_[hy] - return hy - def fill(self, inplace: bool=True, names: list=None): """Fill weights with the missing dimensions""" weights = self.weights @@ -630,6 +617,19 @@ def estimator(self): def estimator(self, value): self._estimator = value + def predict(self, texts): + """Predict""" + X = self.transform(texts) + return self.estimator.predict(X) + + def decision_function(self, texts): + """Decision function""" + X = self.transform(texts) + hy = self.estimator.decision_function(X) + if hy.ndim == 1: + return np.c_[hy] + return hy + def train_predict_decision_function(self, D, y=None): """Train and predict the decision""" if y is None: