Skip to content

Commit

Permalink
Updated golem, added test (not working yet)
Browse files Browse the repository at this point in the history
  • Loading branch information
jrzkaminski committed Aug 11, 2023
1 parent c4292ad commit c348b6b
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 28 deletions.
4 changes: 2 additions & 2 deletions bamt/utils/composite_utils/MLUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
except ModuleNotFoundError:
LGBMRegressor = None
LGBMClassifier = None
logger_network.warning(
"Install lightgbm (e.g. pip install lightgbm) to use LGBMRegressor and LGBMClassifier"
logger_network.info(
"Install lightgbm (e.g. pip install lightgbm) to enable LGBMRegressor and LGBMClassifier"
)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ scipy = "^1.8.0"
pyvis = ">=0.2.1"
missingno = "^0.5.1"
pgmpy = "0.1.20"
thegolem = ">=0.3.1"
thegolem = ">=0.3.2"
xgboost = ">=1.7.6"
catboost = ">=1.0.6"
lightgbm = {version = ">=3.3.5", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ catboost>=1.0.6
joblib>=1.1.1
networkx>=3.1
tqdm>=4.65.0
thegolem>=0.3.1
thegolem>=0.3.2
typing>=3.7.4.3
xgboost>=1.7.6
102 changes: 78 additions & 24 deletions tests/test_networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import unittest

from sklearn.tree import DecisionTreeRegressor
from catboost import CatBoostRegressor
from catboost import CatBoostRegressor, CatBoostClassifier
from sklearn.ensemble import RandomForestRegressor

from sklearn import preprocessing as pp
Expand All @@ -19,8 +19,13 @@
from bamt.nodes.gaussian_node import GaussianNode
from bamt.nodes.discrete_node import DiscreteNode
from bamt.nodes.logit_node import LogitNode
from bamt.utils.composite_utils.CompositeGeneticOperators import (
custom_mutation_add_model,
custom_crossover_all_model,
)
from bamt import preprocessors
from bamt.utils.MathUtils import precision_recall
from bamt.utils.composite_utils.CompositeModel import CompositeModel, CompositeNode

logging.getLogger("network").setLevel(logging.CRITICAL)

Expand Down Expand Up @@ -1059,7 +1064,7 @@ class TestBigBraveBN(unittest.SkipTest):

class TestCompositeNetwork(unittest.TestCase):
def setUp(self):
self.data = pd.read_csv(r"data/benchmark/healthcare.csv", index_col=0)
self.data = pd.read_csv(r"../data/benchmark/healthcare.csv", index_col=0)
self.descriptor = {
"types": {
"A": "disc",
Expand All @@ -1072,7 +1077,6 @@ def setUp(self):
},
"signs": {"D": "pos", "I": "neg", "O": "pos", "T": "pos"},
}
self.bn = CompositeBN()
self.reference_dag = [
("A", "C"),
("A", "D"),
Expand All @@ -1083,32 +1087,18 @@ def setUp(self):
("H", "D"),
("I", "T"),
("O", "T"),
("A", "C"),
("A", "D"),
("A", "H"),
("A", "O"),
("C", "I"),
("D", "I"),
("H", "D"),
("I", "T"),
("O", "T"),
]

def test_learning(self):
encoder = pp.LabelEncoder()
p = bp.Preprocessor([("encoder", encoder)])

_, _ = p.apply(self.data)
self.comparative_dag = [("A", "C"), ("H", "C")]

info = p.info

self.bn.add_nodes(info)
def test_learning(self):
bn, _ = self._get_starter_bn(self.data)

self.bn.add_edges(self.data, verbose=False)
bn.add_edges(self.data, verbose=False)

self.bn.fit_parameters(self.data)
bn.fit_parameters(self.data)

obtained_dag = self.bn.edges
obtained_dag = bn.edges
num_edges = len(obtained_dag)
self.assertGreaterEqual(
num_edges, 1, msg="Obtained graph should have at least one edge."
Expand All @@ -1121,7 +1111,7 @@ def test_learning(self):
msg=f"Structural Hamming Distance should be less than 15, obtained SHD = {dist}",
)

for node in self.bn.nodes:
for node in bn.nodes:
if type(node).__name__ == "CompositeContinuousNode":
self.assertIsNotNone(
node.regressor,
Expand All @@ -1133,6 +1123,70 @@ def test_learning(self):
msg="CompositeDiscreteNode does not have classifier",
)

def test_learning_models(self):
bn, p = self._get_starter_bn(self.data[["A", "C", "H"]])

parent_node_a = CompositeNode(
nodes_from=None,
content={
"name": "A",
"type": p.nodes_types["A"],
"parent_model": None,
},
)

parent_node_h = CompositeNode(
nodes_from=None,
content={
"name": "H",
"type": p.nodes_types["H"],
"parent_model": None,
},
)

child_node = CompositeNode(
nodes_from=[parent_node_a, parent_node_h],
content={
"name": "C",
"type": p.nodes_types["C"],
"parent_model": CatBoostClassifier(),
},
)

comp_model = CompositeModel(nodes=[parent_node_a, parent_node_h, child_node])

bn.add_edges(
self.data[["A", "C", "H"]],
verbose=True,
custom_mutations=[custom_mutation_add_model],
custom_crossovers=[custom_crossover_all_model],
custom_initial_structure=[comp_model],
)

output_structure = [
tuple([str(item) for item in inner_list]) for inner_list in bn.edges
]

self.assertEqual(
output_structure,
self.comparative_dag,
msg="Obtained BN should have reference structure",
)

@staticmethod
def _get_starter_bn(data):
encoder = pp.LabelEncoder()
p = bp.Preprocessor([("encoder", encoder)])

_, _ = p.apply(data)

info = p.info

bn = CompositeBN()
bn.add_nodes(info)

return bn, p


if __name__ == "__main__":
unittest.main(verbosity=3)

0 comments on commit c348b6b

Please sign in to comment.