From c348b6b39c2f6113ed73191d3a51c71ae88354fa Mon Sep 17 00:00:00 2001 From: jrzkaminski <86363785+jrzkaminski@users.noreply.github.com> Date: Fri, 11 Aug 2023 16:46:44 +0300 Subject: [PATCH] Updated golem, added test (not working yet) --- bamt/utils/composite_utils/MLUtils.py | 4 +- pyproject.toml | 2 +- requirements.txt | 2 +- tests/test_networks.py | 102 ++++++++++++++++++++------ 4 files changed, 82 insertions(+), 28 deletions(-) diff --git a/bamt/utils/composite_utils/MLUtils.py b/bamt/utils/composite_utils/MLUtils.py index 4653a85..8e68366 100644 --- a/bamt/utils/composite_utils/MLUtils.py +++ b/bamt/utils/composite_utils/MLUtils.py @@ -30,8 +30,8 @@ except ModuleNotFoundError: LGBMRegressor = None LGBMClassifier = None - logger_network.warning( - "Install lightgbm (e.g. pip install lightgbm) to use LGBMRegressor and LGBMClassifier" + logger_network.info( + "Install lightgbm (e.g. pip install lightgbm) to enable LGBMRegressor and LGBMClassifier" ) diff --git a/pyproject.toml b/pyproject.toml index 03b22e1..5360757 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ scipy = "^1.8.0" pyvis = ">=0.2.1" missingno = "^0.5.1" pgmpy = "0.1.20" -thegolem = ">=0.3.1" +thegolem = ">=0.3.2" xgboost = ">=1.7.6" catboost = ">=1.0.6" lightgbm = {version = ">=3.3.5", optional = true } diff --git a/requirements.txt b/requirements.txt index 980967a..5e96457 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,6 @@ catboost>=1.0.6 joblib>=1.1.1 networkx>=3.1 tqdm>=4.65.0 -thegolem>=0.3.1 +thegolem>=0.3.2 typing>=3.7.4.3 xgboost>=1.7.6 diff --git a/tests/test_networks.py b/tests/test_networks.py index 8adc81e..16e7d61 100644 --- a/tests/test_networks.py +++ b/tests/test_networks.py @@ -3,7 +3,7 @@ import unittest from sklearn.tree import DecisionTreeRegressor -from catboost import CatBoostRegressor +from catboost import CatBoostRegressor, CatBoostClassifier from sklearn.ensemble import RandomForestRegressor from sklearn import preprocessing as pp @@ -19,8 +19,13 @@ from bamt.nodes.gaussian_node import GaussianNode from bamt.nodes.discrete_node import DiscreteNode from bamt.nodes.logit_node import LogitNode +from bamt.utils.composite_utils.CompositeGeneticOperators import ( + custom_mutation_add_model, + custom_crossover_all_model, +) from bamt import preprocessors from bamt.utils.MathUtils import precision_recall +from bamt.utils.composite_utils.CompositeModel import CompositeModel, CompositeNode logging.getLogger("network").setLevel(logging.CRITICAL) @@ -1059,7 +1064,7 @@ class TestBigBraveBN(unittest.SkipTest): class TestCompositeNetwork(unittest.TestCase): def setUp(self): - self.data = pd.read_csv(r"data/benchmark/healthcare.csv", index_col=0) + self.data = pd.read_csv(r"../data/benchmark/healthcare.csv", index_col=0) self.descriptor = { "types": { "A": "disc", @@ -1072,7 +1077,6 @@ def setUp(self): }, "signs": {"D": "pos", "I": "neg", "O": "pos", "T": "pos"}, } - self.bn = CompositeBN() self.reference_dag = [ ("A", "C"), ("A", "D"), @@ -1083,32 +1087,18 @@ def setUp(self): ("H", "D"), ("I", "T"), ("O", "T"), - ("A", "C"), - ("A", "D"), - ("A", "H"), - ("A", "O"), - ("C", "I"), - ("D", "I"), - ("H", "D"), - ("I", "T"), - ("O", "T"), ] - def test_learning(self): - encoder = pp.LabelEncoder() - p = bp.Preprocessor([("encoder", encoder)]) - - _, _ = p.apply(self.data) + self.comparative_dag = [("A", "C"), ("H", "C")] - info = p.info - - self.bn.add_nodes(info) + def test_learning(self): + bn, _ = self._get_starter_bn(self.data) - self.bn.add_edges(self.data, verbose=False) + bn.add_edges(self.data, verbose=False) - self.bn.fit_parameters(self.data) + bn.fit_parameters(self.data) - obtained_dag = self.bn.edges + obtained_dag = bn.edges num_edges = len(obtained_dag) self.assertGreaterEqual( num_edges, 1, msg="Obtained graph should have at least one edge." @@ -1121,7 +1111,7 @@ def test_learning(self): msg=f"Structural Hamming Distance should be less than 15, obtained SHD = {dist}", ) - for node in self.bn.nodes: + for node in bn.nodes: if type(node).__name__ == "CompositeContinuousNode": self.assertIsNotNone( node.regressor, @@ -1133,6 +1123,70 @@ def test_learning(self): msg="CompositeDiscreteNode does not have classifier", ) + def test_learning_models(self): + bn, p = self._get_starter_bn(self.data[["A", "C", "H"]]) + + parent_node_a = CompositeNode( + nodes_from=None, + content={ + "name": "A", + "type": p.nodes_types["A"], + "parent_model": None, + }, + ) + + parent_node_h = CompositeNode( + nodes_from=None, + content={ + "name": "H", + "type": p.nodes_types["H"], + "parent_model": None, + }, + ) + + child_node = CompositeNode( + nodes_from=[parent_node_a, parent_node_h], + content={ + "name": "C", + "type": p.nodes_types["C"], + "parent_model": CatBoostClassifier(), + }, + ) + + comp_model = CompositeModel(nodes=[parent_node_a, parent_node_h, child_node]) + + bn.add_edges( + self.data[["A", "C", "H"]], + verbose=True, + custom_mutations=[custom_mutation_add_model], + custom_crossovers=[custom_crossover_all_model], + custom_initial_structure=[comp_model], + ) + + output_structure = [ + tuple([str(item) for item in inner_list]) for inner_list in bn.edges + ] + + self.assertEqual( + output_structure, + self.comparative_dag, + msg="Obtained BN should have reference structure", + ) + + @staticmethod + def _get_starter_bn(data): + encoder = pp.LabelEncoder() + p = bp.Preprocessor([("encoder", encoder)]) + + _, _ = p.apply(data) + + info = p.info + + bn = CompositeBN() + bn.add_nodes(info) + + return bn, p + if __name__ == "__main__": unittest.main(verbosity=3)