From 45c33e05b34126d3071c778a4792874430b85aab Mon Sep 17 00:00:00 2001 From: Roman Netrogolov <68499591+Roman223@users.noreply.github.com> Date: Tue, 27 Jun 2023 20:01:51 +0400 Subject: [PATCH] Model selection (#63) * 1. Selection of model for predict method. 2. Subset max shape changed to 2 to make some models working. * Update base.py Tests error fixed --- bamt/networks/base.py | 11 ++++++----- bamt/nodes/conditional_gaussian_node.py | 2 +- tests/sendingRegressors.py | 1 - 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bamt/networks/base.py b/bamt/networks/base.py index 84464ed..c090b5e 100644 --- a/bamt/networks/base.py +++ b/bamt/networks/base.py @@ -607,7 +607,8 @@ def wrapper(): def predict(self, test: pd.DataFrame, parall_count: int = 1, - progress_bar: bool = True) -> Dict[str, + progress_bar: bool = True, + models_dir: Optional[str] = None) -> Dict[str, Union[List[str], List[int], List[float]]]: @@ -630,7 +631,7 @@ def predict(self, from joblib import Parallel, delayed - def wrapper(bn, test: pd.DataFrame, columns: List[str]): + def wrapper(bn, test: pd.DataFrame, columns: List[str], models_dir: str): preds = {column_name: list() for column_name in columns} if len(test) == 1: @@ -639,7 +640,7 @@ def wrapper(bn, test: pd.DataFrame, columns: List[str]): for n, key in enumerate(columns): try: sample = bn.sample( - 1, evidence=test_row, predict=True, progress_bar=False) + 1, evidence=test_row, predict=True, progress_bar=False, models_dir=models_dir) if sample.empty: preds[key].append(np.nan) continue @@ -670,10 +671,10 @@ def wrapper(bn, test: pd.DataFrame, columns: List[str]): if progress_bar: processed_list = Parallel(n_jobs=parall_count)(delayed(wrapper)( - self, test.loc[[i]], columns) for i in tqdm(test.index, position=0, leave=True)) + self, test.loc[[i]], columns, models_dir) for i in tqdm(test.index, position=0, leave=True)) else: processed_list = Parallel(n_jobs=parall_count)( - delayed(wrapper)(self, test.loc[[i]], columns) for i in test.index) + delayed(wrapper)(self, test.loc[[i]], columns, models_dir) for i in test.index) for i in range(test.shape[0]): curr_pred = processed_list[i] diff --git a/bamt/nodes/conditional_gaussian_node.py b/bamt/nodes/conditional_gaussian_node.py index b737fc5..6bc750b 100644 --- a/bamt/nodes/conditional_gaussian_node.py +++ b/bamt/nodes/conditional_gaussian_node.py @@ -50,7 +50,7 @@ def fit_parameters( mask = (mask) & (data[col] == val) new_data = data[mask] key_comb = [str(x) for x in comb] - if new_data.shape[0] > 0: + if new_data.shape[0] > 1: if self.cont_parents: model = self.regressor model.fit(new_data[self.cont_parents].values, diff --git a/tests/sendingRegressors.py b/tests/sendingRegressors.py index c05e437..0168661 100644 --- a/tests/sendingRegressors.py +++ b/tests/sendingRegressors.py @@ -1,6 +1,5 @@ # import json -# import bamt.networks as Nets from bamt.networks.hybrid_bn import HybridBN import bamt.preprocessors as preprocessors