diff --git a/.gitignore b/.gitignore index 44d3bac..a5a09e7 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ tests/bamt tutorials/Test.ipynb tutorials/bamt .DS_Store +/example_socio.ipynb diff --git a/bamt/builders/__init__.py b/bamt/builders/__init__.py new file mode 100644 index 0000000..a05b821 --- /dev/null +++ b/bamt/builders/__init__.py @@ -0,0 +1,4 @@ +__all__ = ["builders_base", + "evo_builder", + "hc_builder" + ] diff --git a/bamt/builders.py b/bamt/builders/builders_base.py similarity index 53% rename from bamt/builders.py rename to bamt/builders/builders_base.py index 504933f..8b61b97 100644 --- a/bamt/builders.py +++ b/bamt/builders/builders_base.py @@ -1,9 +1,5 @@ import itertools -from pgmpy.base import DAG -from pgmpy.estimators import HillClimbSearch -from bamt.redef_HC import hc as hc_method - from bamt.nodes.discrete_node import DiscreteNode from bamt.nodes.gaussian_node import GaussianNode from bamt.nodes.conditional_logit_node import ConditionalLogitNode @@ -16,7 +12,7 @@ from pandas import DataFrame from bamt.utils import GraphUtils as gru -from typing import Dict, List, Optional, Tuple, Callable, TypedDict, Union, Sequence +from typing import Dict, List, Optional, Tuple, Callable, TypedDict, Sequence, Union class ParamDict(TypedDict, total=False): @@ -110,8 +106,8 @@ def get_family(self): self.skeleton['V'][id].children = children ordered = gru.toporder(self.skeleton['V'], self.skeleton['E']) - notOrdered = [node.name for node in self.skeleton['V']] - mask = [notOrdered.index(name) for name in ordered] + not_ordered = [node.name for node in self.skeleton['V']] + mask = [not_ordered.index(name) for name in ordered] self.skeleton['V'] = [self.skeleton['V'][i] for i in mask] @@ -153,7 +149,7 @@ def overwrite_vertex( """ Level 2: Redefined nodes according structure (parents) :param classifier: an object to pass into logit, condLogit nodes - :param regressor: an object to pass into gaussianish nodes + :param regressor: an object to pass into gaussian nodes :param has_logit allows edges from cont to disc nodes :param use_mixture allows using Mixture """ @@ -203,189 +199,16 @@ def __init__(self, descriptor: Dict[str, Dict[str, str]]): super(EdgesDefiner, self).__init__(descriptor) -class HillClimbDefiner(VerticesDefiner, EdgesDefiner): - """ - Object to define structure and pass it into skeleton - """ - +class BaseDefiner(VerticesDefiner, EdgesDefiner): def __init__(self, data: DataFrame, descriptor: Dict[str, Dict[str, str]], scoring_function: Union[Tuple[str, Callable], Tuple[str]], regressor: Optional[object] = None): - """ - :param scoring_function: a tuple with following format (Name, scoring_function) - """ self.scoring_function = scoring_function - self.optimizer = HillClimbSearch(data) self.params = {'init_edges': None, 'init_nodes': None, 'remove_init_edges': True, 'white_list': None, 'bl_add': None} - super(HillClimbDefiner, self).__init__(descriptor, regressor=regressor) - - def apply_K2(self, - data: DataFrame, - init_edges: Optional[List[Tuple[str, - str]]], - progress_bar: bool, - remove_init_edges: bool, - white_list: Optional[List[Tuple[str, - str]]]): - """ - :param init_edges: list of tuples, a graph to start learning with - :param remove_init_edges: allows changes in a model defined by user - :param data: user's data - :param progress_bar: verbose regime - :param white_list: list of allowed edges - """ - import bamt.utils.GraphUtils as gru - if not all([i in ['disc', 'disc_num'] - for i in gru.nodes_types(data).values()]): - logger_builder.error( - f"K2 deals only with discrete data. Continuous data: {[col for col, type in gru.nodes_types(data).items() if type not in ['disc', 'disc_num']]}") - return None - - if len(self.scoring_function) != 2: - from pgmpy.estimators import K2Score - scoring_function = K2Score - else: - scoring_function = self.scoring_function[1] - - if not init_edges: - best_model = self.optimizer.estimate( - scoring_method=scoring_function(data), - black_list=self.black_list, - white_list=white_list, - show_progress=progress_bar - ) - else: - - if remove_init_edges: - startdag = DAG() - nodes = [str(v) for v in self.vertices] - startdag.add_nodes_from(nodes=nodes) - startdag.add_edges_from(ebunch=init_edges) - best_model = self.optimizer.estimate( - black_list=self.black_list, - white_list=white_list, - start_dag=startdag, - show_progress=False) - else: - best_model = self.optimizer.estimate( - black_list=self.black_list, - white_list=white_list, - fixed_edges=init_edges, - show_progress=False) - - structure = [list(x) for x in list(best_model.edges())] - self.skeleton['E'] = structure - - def apply_group1(self, - data: DataFrame, - progress_bar: bool, - init_edges: Optional[List[Tuple[str, - str]]], - remove_init_edges: bool, - white_list: Optional[List[Tuple[str, - str]]]): - """ - This method implements the group of scoring functions. - Group: - "MI" - Mutual Information, - "LL" - Log Likelihood, - "BIC" - Bayess Information Criteria, - "AIC" - Akaike information Criteria. - """ - column_name_dict = dict([(n.name, i) - for i, n in enumerate(self.vertices)]) - blacklist_new = [] - for pair in self.black_list: - blacklist_new.append( - (column_name_dict[pair[0]], column_name_dict[pair[1]])) - if white_list: - white_list_old = white_list[:] - white_list = [] - for pair in white_list_old: - white_list.append( - (column_name_dict[pair[0]], column_name_dict[pair[1]])) - if init_edges: - init_edges_old = init_edges[:] - init_edges = [] - for pair in init_edges_old: - init_edges.append( - (column_name_dict[pair[0]], column_name_dict[pair[1]])) - - bn = hc_method( - data, - metric=self.scoring_function[0], - restriction=white_list, - init_edges=init_edges, - remove_geo_edges=remove_init_edges, - black_list=blacklist_new, - debug=progress_bar) - structure = [] - nodes = sorted(list(bn.nodes())) - for rv in nodes: - for pa in bn.F[rv]['parents']: - structure.append([list(column_name_dict.keys())[list(column_name_dict.values()).index( - pa)], list(column_name_dict.keys())[list(column_name_dict.values()).index(rv)]]) - self.skeleton['E'] = structure - - -class HCStructureBuilder(HillClimbDefiner): - """ - Final object with build method - """ - - def __init__(self, data: DataFrame, - descriptor: Dict[str, Dict[str, str]], - scoring_function: Tuple[str, Callable], - regressor: Optional[object], - has_logit: bool, use_mixture: bool): - """ - :param data: train data - :param descriptor: map for data - """ - - super( - HCStructureBuilder, - self).__init__( - descriptor=descriptor, - data=data, - scoring_function=scoring_function, - regressor=regressor) - self.use_mixture = use_mixture - self.has_logit = has_logit - - def build(self, data: DataFrame, - progress_bar: bool, - classifier: Optional[object], - regressor: Optional[object], - params: Optional[ParamDict] = None): - if params: - for param, value in params.items(): - self.params[param] = value - - init_nodes = self.params.pop('init_nodes') - bl_add = self.params.pop('bl_add') - - # Level 1 - self.skeleton['V'] = self.vertices - - self.restrict(data, init_nodes, bl_add) - if self.scoring_function[0] == 'K2': - self.apply_K2(data=data, progress_bar=progress_bar, **self.params) - elif self.scoring_function[0] in ['MI', 'LL', 'BIC', 'AIC']: - self.apply_group1( - data=data, - progress_bar=progress_bar, - **self.params) - - # Level 2 - - self.get_family() - self.overwrite_vertex(has_logit=self.has_logit, - use_mixture=self.use_mixture, - classifier=classifier, - regressor=regressor) + super().__init__(descriptor, regressor=regressor) + self.optimizer = None # will be defined in subclasses diff --git a/bamt/builders/evo_builder.py b/bamt/builders/evo_builder.py new file mode 100644 index 0000000..93dc2e9 --- /dev/null +++ b/bamt/builders/evo_builder.py @@ -0,0 +1,195 @@ +from datetime import timedelta + +from pandas import DataFrame + +from bamt.builders.builders_base import BaseDefiner +from bamt.utils import EvoUtils as evo + +from golem.core.adapter import DirectAdapter +from golem.core.dag.verification_rules import has_no_cycle, has_no_self_cycled_nodes +from golem.core.optimisers.genetic.gp_optimizer import EvoGraphOptimizer +from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters +from golem.core.optimisers.genetic.operators.crossover import CrossoverTypesEnum +from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum +from golem.core.optimisers.objective import Objective, ObjectiveEvaluate +from golem.core.optimisers.optimization_parameters import GraphRequirements +from golem.core.optimisers.optimizer import GraphGenerationParams +from golem.core.optimisers.genetic.operators.selection import SelectionTypesEnum + +from typing import Dict, Optional + + +class EvoDefiner(BaseDefiner): + """ + Object that might take additional methods to decompose structure builder class + """ + def __init__(self, data: DataFrame, descriptor: Dict[str, Dict[str, str]], + regressor: Optional[object] = None): + + super().__init__(data, descriptor, regressor) + + +class EvoStructureBuilder(EvoDefiner): + """ + This class uses an evolutionary algorithm based on GOLEM to generate a Directed Acyclic Graph (DAG) that represents + the structure of a Bayesian Network. + + Attributes: + data (DataFrame): Input data used to build the structure. + descriptor (dict): Descriptor describing node types and signs. + regressor (object): A regression model for continuous nodes. + has_logit (bool): Indicates whether a logit link function should be used. + use_mixture (bool): Indicates whether a mixture model should be used. + """ + def __init__( + self, + data: DataFrame, + descriptor: Dict[str, Dict[str, str]], + regressor: Optional[object], + has_logit: bool, + use_mixture: bool): + super( + EvoStructureBuilder, + self).__init__( + data=data, + descriptor=descriptor, + regressor=regressor) + self.data = data + self.descriptor = descriptor + self.has_logit = has_logit + self.use_mixture = use_mixture + self.regressor = regressor + self.params = {'init_edges': None, + 'init_nodes': None, + 'remove_init_edges': True, + 'white_list': None, + 'bl_add': None} + self.default_n_jobs = -1 + self.default_pop_size = 15 + self.default_crossover_prob = 0.9 + self.default_mutation_prob = 0.8 + self.default_max_arity = 100 + self.default_max_depth = 100 + self.default_timeout = 180 + self.objective_metric = evo.K2_metric + self.default_crossovers = [CrossoverTypesEnum.exchange_edges, + CrossoverTypesEnum.exchange_parents_one, + CrossoverTypesEnum.exchange_parents_both] + self.default_mutations = [ + evo.custom_mutation_add, + evo.custom_mutation_delete, + evo.custom_mutation_reverse] + self.default_selection = [SelectionTypesEnum.tournament] + self.default_constraints = [ + has_no_self_cycled_nodes, + has_no_cycle, + evo.has_no_duplicates] + + def build(self, + data: DataFrame, + classifier: Optional[object], + regressor: Optional[object], + **kwargs): + """ + Builds the structure of a Bayesian network from the given data using an evolutionary algorithm. + + Args: + data (DataFrame): The data from which to build the structure. + classifier (Optional[object]): A classification model for discrete nodes. + regressor (Optional[object]): A regression model for continuous nodes. + + Additional optional parameters can be provided as keyword arguments (kwargs) to customize the evolutionary + algorithm used to generate the structure. These include parameters to control the size of the population, + the probabilities of crossover and mutation, constraints on the structure of the graph, and many others. + + The resulting structure is stored in the `skeleton` attribute of the `EvoStructureBuilder` object. + """ + # Get the list of node names + nodes_types = data.columns.to_list() + + # Create the initial population + initial = [ + evo.CustomGraphModel( + nodes=kwargs.get( + 'init_nodes', [ + evo.CustomGraphNode(node_type) for node_type in nodes_types]))] + + # Define the requirements for the evolutionary algorithm + requirements = GraphRequirements( + max_arity=kwargs.get( + 'max_arity', self.default_max_arity), max_depth=kwargs.get( + 'max_depth', self.default_max_depth), + timeout=timedelta( + minutes=kwargs.get( + 'timeout', self.default_timeout)), + n_jobs=kwargs.get('n_jobs', self.default_n_jobs)) + + # Set the parameters for the evolutionary algorithm + optimizer_parameters = GPAlgorithmParameters( + pop_size=kwargs.get('pop_size', self.default_pop_size), + crossover_prob=kwargs.get('crossover_prob', self.default_crossover_prob), + mutation_prob=kwargs.get('mutation_prob', self.default_mutation_prob), + genetic_scheme_type=GeneticSchemeTypesEnum.steady_state, + mutation_types=kwargs.get('custom_mutations', self.default_mutations), + crossover_types=kwargs.get('custom_crossovers', self.default_crossovers), + selection_types=kwargs.get('selection_type', self.default_selection)) + + # Set the adapter for the conversion between the graph and the data + # structures used by the optimizer + adapter = DirectAdapter( + base_graph_class=evo.CustomGraphModel, + base_node_class=evo.CustomGraphNode) + + # Set the constraints for the graph + + constraints = kwargs.get('custom_constraints', []) + + constraints.extend(self.default_constraints) + + if kwargs.get('blacklist', None) is not None: + constraints.append(evo.has_no_blacklist_edges) + if kwargs.get('whitelist', None) is not None: + constraints.append(evo.has_only_whitelist_edges) + + graph_generation_params = GraphGenerationParams( + adapter=adapter, + rules_for_constraint=constraints, + available_node_types=nodes_types) + + # Define the objective function to optimize + objective = Objective({'custom': kwargs.get( + 'custom_metric', self.objective_metric)}) + + # Initialize the optimizer + optimizer = EvoGraphOptimizer( + objective=objective, + initial_graphs=initial, + requirements=requirements, + graph_generation_params=graph_generation_params, + graph_optimizer_params=optimizer_parameters) + + # Define the function to evaluate the objective function + objective_eval = ObjectiveEvaluate( + objective, data=data) + + # Run the optimization + optimized_graph = optimizer.optimise(objective_eval)[0] + + # Get the best graph + best_graph_edge_list = optimized_graph.operator.get_edges() + best_graph_edge_list = self._convert_to_strings(best_graph_edge_list) + + # Convert the best graph to the format used by the Bayesian Network + self.skeleton['V'] = self.vertices + self.skeleton['E'] = best_graph_edge_list + + self.get_family() + self.overwrite_vertex(has_logit=self.has_logit, + use_mixture=self.use_mixture, + classifier=classifier, + regressor=regressor) + + @staticmethod + def _convert_to_strings(nested_list): + return [[str(item) for item in inner_list] + for inner_list in nested_list] diff --git a/bamt/builders/hc_builder.py b/bamt/builders/hc_builder.py new file mode 100644 index 0000000..0a5e80e --- /dev/null +++ b/bamt/builders/hc_builder.py @@ -0,0 +1,190 @@ +from pgmpy.base import DAG +from pgmpy.estimators import HillClimbSearch + +from bamt.builders.builders_base import ParamDict, BaseDefiner +from bamt.redef_HC import hc as hc_method + +from bamt.log import logger_builder +from pandas import DataFrame +from bamt.utils import GraphUtils as gru + +from typing import Dict, List, Optional, Tuple, Callable, Union + + +class HillClimbDefiner(BaseDefiner): + """ + Object to define structure and pass it into skeleton + """ + + def __init__(self, data: DataFrame, descriptor: Dict[str, Dict[str, str]], + scoring_function: Union[Tuple[str, Callable], Tuple[str]], + regressor: Optional[object] = None): + + super().__init__(data, descriptor, scoring_function, regressor) + self.optimizer = HillClimbSearch(data) + + def apply_K2(self, + data: DataFrame, + init_edges: Optional[List[Tuple[str, + str]]], + progress_bar: bool, + remove_init_edges: bool, + white_list: Optional[List[Tuple[str, + str]]]): + """ + :param init_edges: list of tuples, a graph to start learning with + :param remove_init_edges: allows changes in a model defined by user + :param data: user's data + :param progress_bar: verbose regime + :param white_list: list of allowed edges + """ + if not all([i in ['disc', 'disc_num'] + for i in gru.nodes_types(data).values()]): + logger_builder.error( + f"K2 deals only with discrete data. Continuous data: {[col for col, type in gru.nodes_types(data).items() if type not in ['disc', 'disc_num']]}") + return None + + if len(self.scoring_function) != 2: + from pgmpy.estimators import K2Score + scoring_function = K2Score + else: + scoring_function = self.scoring_function[1] + + if not init_edges: + best_model = self.optimizer.estimate( + scoring_method=scoring_function(data), + black_list=self.black_list, + white_list=white_list, + show_progress=progress_bar + ) + else: + + if remove_init_edges: + startdag = DAG() + nodes = [str(v) for v in self.vertices] + startdag.add_nodes_from(nodes=nodes) + startdag.add_edges_from(ebunch=init_edges) + best_model = self.optimizer.estimate( + black_list=self.black_list, + white_list=white_list, + start_dag=startdag, + show_progress=False) + else: + best_model = self.optimizer.estimate( + black_list=self.black_list, + white_list=white_list, + fixed_edges=init_edges, + show_progress=False) + + structure = [list(x) for x in list(best_model.edges())] + self.skeleton['E'] = structure + + def apply_group1(self, + data: DataFrame, + progress_bar: bool, + init_edges: Optional[List[Tuple[str, + str]]], + remove_init_edges: bool, + white_list: Optional[List[Tuple[str, + str]]]): + """ + This method implements the group of scoring functions. + Group: + "MI" - Mutual Information, + "LL" - Log Likelihood, + "BIC" - Bayesian Information Criteria, + "AIC" - Akaike information Criteria. + """ + column_name_dict = dict([(n.name, i) + for i, n in enumerate(self.vertices)]) + blacklist_new = [] + for pair in self.black_list: + blacklist_new.append( + (column_name_dict[pair[0]], column_name_dict[pair[1]])) + if white_list: + white_list_old = white_list[:] + white_list = [] + for pair in white_list_old: + white_list.append( + (column_name_dict[pair[0]], column_name_dict[pair[1]])) + if init_edges: + init_edges_old = init_edges[:] + init_edges = [] + for pair in init_edges_old: + init_edges.append( + (column_name_dict[pair[0]], column_name_dict[pair[1]])) + + bn = hc_method( + data, + metric=self.scoring_function[0], + restriction=white_list, + init_edges=init_edges, + remove_geo_edges=remove_init_edges, + black_list=blacklist_new, + debug=progress_bar) + structure = [] + nodes = sorted(list(bn.nodes())) + for rv in nodes: + for pa in bn.F[rv]['parents']: + structure.append([list(column_name_dict.keys())[list(column_name_dict.values()).index( + pa)], list(column_name_dict.keys())[list(column_name_dict.values()).index(rv)]]) + self.skeleton['E'] = structure + + +class HCStructureBuilder(HillClimbDefiner): + """ + Final object with build method + """ + + def __init__(self, data: DataFrame, + descriptor: Dict[str, Dict[str, str]], + scoring_function: Tuple[str, Callable], + regressor: Optional[object], + has_logit: bool, use_mixture: bool): + """ + :param data: train data + :param descriptor: map for data + """ + + super( + HCStructureBuilder, + self).__init__( + descriptor=descriptor, + data=data, + scoring_function=scoring_function, + regressor=regressor) + self.use_mixture = use_mixture + self.has_logit = has_logit + + def build(self, data: DataFrame, + progress_bar: bool, + classifier: Optional[object], + regressor: Optional[object], + params: Optional[ParamDict] = None, + **kwargs): + if params: + for param, value in params.items(): + self.params[param] = value + + init_nodes = self.params.pop('init_nodes') + bl_add = self.params.pop('bl_add') + + # Level 1 + self.skeleton['V'] = self.vertices + + self.restrict(data, init_nodes, bl_add) + if self.scoring_function[0] == 'K2': + self.apply_K2(data=data, progress_bar=progress_bar, **self.params) + elif self.scoring_function[0] in ['MI', 'LL', 'BIC', 'AIC']: + self.apply_group1( + data=data, + progress_bar=progress_bar, + **self.params) + + # Level 2 + + self.get_family() + self.overwrite_vertex(has_logit=self.has_logit, + use_mixture=self.use_mixture, + classifier=classifier, + regressor=regressor) diff --git a/bamt/networks/base.py b/bamt/networks/base.py index d79a724..84464ed 100644 --- a/bamt/networks/base.py +++ b/bamt/networks/base.py @@ -12,14 +12,17 @@ from joblib import Parallel, delayed from pyvis.network import Network from pyitlib import discrete_random_variable as drv +from pgmpy.estimators import K2Score -from bamt.builders import ParamDict +from bamt.builders.builders_base import ParamDict +from bamt.builders.hc_builder import HCStructureBuilder +from bamt.builders.evo_builder import EvoStructureBuilder from bamt.log import logger_network from bamt.config import config from bamt.nodes.base import BaseNode -import bamt.builders as Builders +import bamt.builders as builders from typing import Dict, Tuple, List, Callable, Optional, Type, Union, Any, Sequence @@ -91,19 +94,21 @@ def add_nodes(self, descriptor: Dict[str, Dict[str, str]]): return None self.descriptor = descriptor # LEVEL 1 - worker_1 = Builders.VerticesDefiner(descriptor, regressor=None) + worker_1 = builders.builders_base.VerticesDefiner( + descriptor, regressor=None) self.nodes = worker_1.vertices def add_edges(self, data: pd.DataFrame, scoring_function: Union[Tuple[str, Callable], - Tuple[str]], + Tuple[str]] = ('K2', K2Score), progress_bar: bool = True, classifier: Optional[object] = None, regressor: Optional[object] = None, params: Optional[ParamDict] = None, - optimizer: str = 'HC'): + optimizer: str = 'HC', + **kwargs): """ Base function for Structure learning scoring_function: tuple with the following format (NAME, scoring_function) or (NAME,) @@ -142,26 +147,34 @@ def add_edges(self, f"{self.type} BN does not support {'discrete' if self.type == 'Continuous' else 'continuous'} data") return None if optimizer == 'HC': - worker = Builders.HCStructureBuilder( + worker = HCStructureBuilder( data=data, descriptor=self.descriptor, scoring_function=scoring_function, has_logit=self.has_logit, use_mixture=self.use_mixture, regressor=regressor) + elif optimizer == 'Evo': + worker = EvoStructureBuilder( + data=data, + descriptor=self.descriptor, + has_logit=self.has_logit, + use_mixture=self.use_mixture, + regressor=regressor) - self.sf_name = scoring_function[0] + self.sf_name = scoring_function[0] - worker.build( - data=data, - params=params, - classifier=classifier, - regressor=regressor, - progress_bar=progress_bar) + worker.build( + data=data, + params=params, + classifier=classifier, + regressor=regressor, + progress_bar=progress_bar, + **kwargs) - # update family - self.nodes = worker.skeleton['V'] - self.edges = worker.skeleton['E'] + # update family + self.nodes = worker.skeleton['V'] + self.edges = worker.skeleton['E'] def calculate_weights(self, discretized_data: pd.DataFrame): """ @@ -280,7 +293,7 @@ def set_structure(self, if edges: self.set_edges(edges=edges) if overwrite: - builder = Builders.VerticesDefiner( + builder = builders.builders_base.VerticesDefiner( descriptor=self.descriptor, regressor=None) # init worker builder.skeleton['V'] = builder.vertices # 1 stage builder.skeleton['E'] = self.edges diff --git a/bamt/networks/hybrid_bn.py b/bamt/networks/hybrid_bn.py index 93c5e87..fc68402 100644 --- a/bamt/networks/hybrid_bn.py +++ b/bamt/networks/hybrid_bn.py @@ -19,4 +19,4 @@ def validate(self, descriptor: Dict[str, Dict[str, str]]) -> bool: types = descriptor['types'] s = set(types.values()) return True if ({'cont', 'disc', 'disc_num'} == s) or ( - {'cont', 'disc'} == s) or ({'cont', 'disc_num'} == s) else False + {'cont', 'disc'} == s) or ({'cont', 'disc_num'} == s) else False diff --git a/bamt/nodes/__init__.py b/bamt/nodes/__init__.py index 7933c0e..78c7b68 100644 --- a/bamt/nodes/__init__.py +++ b/bamt/nodes/__init__.py @@ -1,4 +1,10 @@ -__all__ = ['base', - 'conditional_gaussian_node', 'conditional_logit_node', 'conditional_mixture_gaussian_node', - 'discrete_node', 'logit_node', - 'gaussian_node', 'mixture_gaussian_node', ] +__all__ = [ + 'base', + 'conditional_gaussian_node', + 'conditional_logit_node', + 'conditional_mixture_gaussian_node', + 'discrete_node', + 'logit_node', + 'gaussian_node', + 'mixture_gaussian_node', +] diff --git a/bamt/nodes/conditional_gaussian_node.py b/bamt/nodes/conditional_gaussian_node.py index 67b32f6..b737fc5 100644 --- a/bamt/nodes/conditional_gaussian_node.py +++ b/bamt/nodes/conditional_gaussian_node.py @@ -30,7 +30,8 @@ def __init__(self, name, regressor: Optional[object] = None): self.type = 'ConditionalGaussian' + \ f" ({type(self.regressor).__name__})" - def fit_parameters(self, data: DataFrame) -> Dict[str, Dict[str, CondGaussParams]]: + def fit_parameters( + self, data: DataFrame) -> Dict[str, Dict[str, CondGaussParams]]: """ Train params for Conditional Gaussian Node. Return: @@ -74,8 +75,9 @@ def fit_parameters(self, data: DataFrame) -> Dict[str, Dict[str, CondGaussParams logger_nodes.warning( f"{self.name} {comb}::Pickle failed. BAMT will use Joblib. | " + str(serialization.args[0])) - path = self.get_path_joblib(node_name=self.name.replace(' ', '_'), - specific=comb) + path = self.get_path_joblib( + node_name=self.name.replace( + ' ', '_'), specific=comb) joblib.dump(model, path, compress=True, protocol=4) hycprob[str(key_comb)] = {'variance': variance, 'mean': np.nan, @@ -99,7 +101,7 @@ def fit_parameters(self, data: DataFrame) -> Dict[str, Dict[str, CondGaussParams return {"hybcprob": hycprob} def choose(self, - node_info: Dict[str, Dict[str,CondGaussParams]], + node_info: Dict[str, Dict[str, CondGaussParams]], pvals: List[Union[str, float]]) -> float: """ Return value from ConditionalLogit node diff --git a/bamt/nodes/conditional_logit_node.py b/bamt/nodes/conditional_logit_node.py index ac7c220..90e3731 100644 --- a/bamt/nodes/conditional_logit_node.py +++ b/bamt/nodes/conditional_logit_node.py @@ -27,7 +27,8 @@ def __init__(self, name: str, classifier: Optional[object] = None): self.classifier = classifier self.type = 'ConditionalLogit' + f" ({type(self.classifier).__name__})" - def fit_parameters(self, data: DataFrame) -> Dict[str, Dict[str, LogitParams]]: + def fit_parameters( + self, data: DataFrame) -> Dict[str, Dict[str, LogitParams]]: """ Train params on data Return: @@ -70,8 +71,9 @@ def fit_parameters(self, data: DataFrame) -> Dict[str, Dict[str, LogitParams]]: logger_nodes.warning( f"{self.name} {comb}::Pickle failed. BAMT will use Joblib. | " + str(serialization.args[0])) - path = self.get_path_joblib(node_name=self.name.replace(' ', '_'), - specific=comb) + path = self.get_path_joblib( + node_name=self.name.replace( + ' ', '_'), specific=comb) joblib.dump(model, path, compress=True, protocol=4) hycprob[str(key_comb)] = {'classes': classes, diff --git a/bamt/nodes/gaussian_node.py b/bamt/nodes/gaussian_node.py index 7c4cd2b..294fc09 100644 --- a/bamt/nodes/gaussian_node.py +++ b/bamt/nodes/gaussian_node.py @@ -51,8 +51,8 @@ def fit_parameters(self, data: DataFrame) -> GaussianParams: logger_nodes.warning( f"{self.name}::Pickle failed. BAMT will use Joblib. | " + str(serialization.args[0])) - path = self.get_path_joblib(node_name=self.name.replace(' ', '_'), - specific=f"{self.name.replace(' ', '_')}") + path = self.get_path_joblib(node_name=self.name.replace( + ' ', '_'), specific=f"{self.name.replace(' ', '_')}") joblib.dump(self.regressor, path, compress=True, protocol=4) return {'mean': np.nan, 'regressor_obj': path, diff --git a/bamt/nodes/logit_node.py b/bamt/nodes/logit_node.py index 12c120c..27fb5df 100644 --- a/bamt/nodes/logit_node.py +++ b/bamt/nodes/logit_node.py @@ -43,7 +43,9 @@ def fit_parameters(self, data: DataFrame) -> LogitParams: logger_nodes.warning( f"{self.name}::Pickle failed. BAMT will use Joblib. | " + str(serialization.args[0])) - path = self.get_path_joblib(self.name, specific=self.name.replace(' ', '_')) + path = self.get_path_joblib( + self.name, specific=self.name.replace( + ' ', '_')) joblib.dump(self.classifier, path, compress=True, protocol=4) serialization_name = 'joblib' diff --git a/bamt/preprocessors.py b/bamt/preprocessors.py index 8c1140b..4ba3c02 100644 --- a/bamt/preprocessors.py +++ b/bamt/preprocessors.py @@ -41,7 +41,7 @@ def code_categories(self, data: DataFrame, data = df[columns] # DATA TO CATEGORIZE encoder_dict = dict() - for col_name, column in data.iteritems(): + for col_name, column in data.items(): # Iterate over (column name, Series) pairs. try: df[col_name] = encoder.fit_transform(column.values) diff --git a/bamt/utils/EvoUtils.py b/bamt/utils/EvoUtils.py new file mode 100644 index 0000000..488912e --- /dev/null +++ b/bamt/utils/EvoUtils.py @@ -0,0 +1,113 @@ +import random + +import pandas as pd + +from pgmpy.estimators import K2Score +from pgmpy.models import BayesianNetwork +from golem.core.dag.convert import graph_structure_as_nx_graph +from golem.core.dag.graph_utils import ordered_subnodes_hierarchy +from golem.core.optimisers.graph import OptGraph, OptNode + + +class CustomGraphModel(OptGraph): + def evaluate(self, data: pd.DataFrame): + nodes = data.columns.to_list() + _, labels = graph_structure_as_nx_graph(self) + return len(nodes) + + +class CustomGraphNode(OptNode): + def __str__(self): + return f'{self.content["name"]}' + + +def K2_metric(graph: CustomGraphModel, data: pd.DataFrame): + graph_nx, labels = graph_structure_as_nx_graph(graph) + struct = [] + for meta_edge in graph_nx.edges(): + l1 = str(labels[meta_edge[0]]) + l2 = str(labels[meta_edge[1]]) + struct.append([l1, l2]) + + bn_model = BayesianNetwork(struct) + bn_model.add_nodes_from(data.columns) + + score = K2Score(data).score(bn_model) + return -score + + +def custom_mutation_add(graph: CustomGraphModel, **kwargs): + num_mut = 100 + try: + for _ in range(num_mut): + rid = random.choice(range(len(graph.nodes))) + random_node = graph.nodes[rid] + other_random_node = graph.nodes[random.choice( + range(len(graph.nodes)))] + nodes_not_cycling = (random_node.descriptive_id not in + [n.descriptive_id for n in ordered_subnodes_hierarchy(other_random_node)] and + other_random_node.descriptive_id not in + [n.descriptive_id for n in ordered_subnodes_hierarchy(random_node)]) + if nodes_not_cycling: + random_node.nodes_from.append(other_random_node) + break + + except Exception as ex: + print(f'Incorrect connection: {ex}') + return graph + + +def custom_mutation_delete(graph: OptGraph, **kwargs): + num_mut = 100 + try: + for _ in range(num_mut): + rid = random.choice(range(len(graph.nodes))) + random_node = graph.nodes[rid] + other_random_node = graph.nodes[random.choice( + range(len(graph.nodes)))] + if random_node.nodes_from is not None and other_random_node in random_node.nodes_from: + random_node.nodes_from.remove(other_random_node) + break + except Exception as ex: + print(ex) + return graph + + +def custom_mutation_reverse(graph: OptGraph, **kwargs): + num_mut = 100 + try: + for _ in range(num_mut): + rid = random.choice(range(len(graph.nodes))) + random_node = graph.nodes[rid] + other_random_node = graph.nodes[random.choice( + range(len(graph.nodes)))] + if random_node.nodes_from is not None and other_random_node in random_node.nodes_from: + random_node.nodes_from.remove(other_random_node) + other_random_node.nodes_from.append(random_node) + break + except Exception as ex: + print(ex) + return graph + + +def has_no_duplicates(graph): + _, labels = graph_structure_as_nx_graph(graph) + if len(labels.values()) != len(set(labels.values())): + raise ValueError('Custom graph has duplicates') + return True + + +def has_no_blacklist_edges(graph, blacklist): + nx_graph, _ = graph_structure_as_nx_graph(graph) + for edge in nx_graph.edges(): + if edge in blacklist: + raise ValueError('Graph contains blacklisted edges') + return True + + +def has_only_whitelist_edges(graph, whitelist): + nx_graph, _ = graph_structure_as_nx_graph(graph) + for edge in nx_graph.edges(): + if edge not in whitelist: + raise ValueError('Graph contains non-whitelisted edges') + return True diff --git a/bamt/utils/MathUtils.py b/bamt/utils/MathUtils.py index c64223d..62073ad 100644 --- a/bamt/utils/MathUtils.py +++ b/bamt/utils/MathUtils.py @@ -238,3 +238,40 @@ def get_brave_matrix( brave_matrix.loc[c1, c2] = br return brave_matrix + + +def _child_dict(net: list): + res_dict = dict() + for e0, e1 in net: + if e1 in res_dict: + res_dict[e1].append(e0) + else: + res_dict[e1] = [e0] + return res_dict + + +def precision_recall(pred_net: list, true_net: list, decimal=4): + pred_dict = _child_dict(pred_net) + true_dict = _child_dict(true_net) + corr_undirected = 0 + corr_dir = 0 + for e0, e1 in pred_net: + flag = True + if e1 in true_dict: + if e0 in true_dict[e1]: + corr_undirected += 1 + corr_dir += 1 + flag = False + if (e0 in true_dict) and flag: + if e1 in true_dict[e0]: + corr_undirected += 1 + pred_len = len(pred_net) + true_len = len(true_net) + shd = pred_len + true_len - corr_undirected - corr_dir + return {'AP': round(corr_undirected / pred_len, decimal), + 'AR': round(corr_undirected / true_len, decimal), + # 'F1_undir': round(2 * (corr_undirected / pred_len) * (corr_undirected / true_len) / (corr_undirected / pred_len + corr_undirected / true_len), decimal), + 'AHP': round(corr_dir / pred_len, decimal), + 'AHR': round(corr_dir / true_len, decimal), + # 'F1_directed': round(2*(corr_dir/pred_len)*(corr_dir/true_len)/(corr_dir/pred_len+corr_dir/true_len), decimal), + 'SHD': shd} diff --git a/docs/source/api/builders.rst b/docs/source/api/builders.rst index 23f0ca5..b525cc8 100644 --- a/docs/source/api/builders.rst +++ b/docs/source/api/builders.rst @@ -1,7 +1,17 @@ Builders ======== -.. automodule:: bamt.builders +.. automodule:: bamt.builders.builders_base + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: bamt.builders.hc_builder + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: bamt.builders.evo_builder :members: :undoc-members: :show-inheritance: \ No newline at end of file diff --git a/docs/source/models/bayesiannetworks/base_network.rst b/docs/source/models/bayesiannetworks/base_network.rst index e3df815..1d08daf 100644 --- a/docs/source/models/bayesiannetworks/base_network.rst +++ b/docs/source/models/bayesiannetworks/base_network.rst @@ -1,8 +1,11 @@ Bayesian Networks ================= +BaseNetwork class, Hill Climbing and Evolutionary Algorithms +------------------------------------------------------------ + BaseNetwork class ------------------ +~~~~~~~~~~~~~~~~~ All three BN types are based on an abstract class ``BaseNetwork``. This class provides the basic functions for all BN types. @@ -12,4 +15,64 @@ The ``DiscreteBN`` and ``ContinuousBN`` are two BN types that are used to repres .. autoclass:: bamt.networks.BaseNetwork :members: - :no-undoc-members: \ No newline at end of file + :no-undoc-members: + +Hill Climbing and Evolutionary Algorithms +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently BAMT employs Hill Climbing and Evolutionary Algorithms to learn the structure of the BNs. To use them, +you need to specify the ``optimizer`` parameter in ``add_edges`` method. Here is an example: + +For Example: + +.. code-block:: python + + from bamt.networks.discrete_bn import DiscreteBN + import bamt.preprocessors as pp + import pandas as pd + + asia = pd.read_csv('data.csv') + encoder = preprocessing.LabelEncoder() + discretizer = preprocessing.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile') + + p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)]) + discretized_data, est = p.apply(asia) + + bn = DiscreteBN() + info = p.info + info + + # add edges using Hill Climbing + bn.add_edges(discretized_data, optimizer='HC') + # add edges using Evolutionary Algorithm + bn.add_edges(discretized_data, optimizer='Evo') + + + +Evolutionary Algorithm has these additional parameters: + + :param data: The data from which to build the structure. + :type data: DataFrame + :param classifier: A classification model for discrete nodes, defaults to None. + :type classifier: Optional[object] + :param regressor: A regression model for continuous nodes, defaults to None. + :type regressor: Optional[object] + + :Keyword Args: + * *init_nodes* (list) -- Initial nodes to be included in the population. + * *max_arity* (int) -- Maximum arity for the evolutionary algorithm. + * *timeout* (int) -- Timeout for the evolutionary algorithm in minutes. + * *pop_size* (int) -- Population size for the evolutionary algorithm. + * *crossover_prob* (float) -- Crossover probability for the evolutionary algorithm. + * *mutation_prob* (float) -- Mutation probability for the evolutionary algorithm. + * *custom_mutations* (list) -- Custom mutation types for the evolutionary algorithm. + * *custom_crossovers* (list) -- Custom crossover types for the evolutionary algorithm. + * *selection_type* (SelectionTypesEnum) -- Selection type for the evolutionary algorithm. + * *blacklist* (list) -- Blacklist for the evolutionary algorithm. + * *whitelist* (list) -- Whitelist for the evolutionary algorithm. + * *custom_constraints* (list) -- Custom constraints for the evolutionary algorithm. + * *custom_metric* (function) -- Custom objective metric for the evolutionary algorithm. + + The resulting structure is stored in the `skeleton` attribute of the `EvoStructureBuilder` object. + +HillClimbing parameters are described below in DiscreteBN, ContinuousBN and HybridBN sections. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index d168d3c..5fac7cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ pyvis = ">=0.2.1" missingno = "^0.5.1" pgmpy = "0.1.20" pyitlib = "0.2.2" +thegolem = ">=0.3.1" [tool.poetry.dev-dependencies] pytest = "7.1.3" diff --git a/requirements.txt b/requirements.txt index 0680598..73ea14c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,10 +5,12 @@ pomegranate==0.14.8 gmr==1.6.2 scikit-learn>=0.24.0 scipy==1.9.3 -pyvis>=0.3.2 +pyvis>=0.2.1 pgmpy==0.1.20 pyitlib==0.2.3 catboost>=1.0.6 joblib>=1.1.1 networkx>=3.1 tqdm>=4.65.0 +thegolem>=0.3.1 +typing>=3.7.4.3 diff --git a/tests/sendingRegressors.py b/tests/sendingRegressors.py index ee92932..c05e437 100644 --- a/tests/sendingRegressors.py +++ b/tests/sendingRegressors.py @@ -41,9 +41,13 @@ bn.add_edges(discretized_data, scoring_function=("BIC",), progress_bar=False) -bn.set_regressor(regressors={'Depth': CatBoostRegressor(logging_level="Silent", allow_writing_files=False), - 'Gross': RandomForestRegressor(), - 'Porosity': DecisionTreeRegressor()}) +bn.set_regressor( + regressors={ + 'Depth': CatBoostRegressor( + logging_level="Silent", + allow_writing_files=False), + 'Gross': RandomForestRegressor(), + 'Porosity': DecisionTreeRegressor()}) bn.fit_parameters(hack_data) diff --git a/tests/test_builders.py b/tests/test_builders.py index 847b539..d117e31 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -1,3 +1,6 @@ +from contextlib import redirect_stdout, redirect_stderr +import os + import itertools import unittest @@ -5,10 +8,15 @@ import pandas as pd -import bamt.builders as builders +from bamt.builders.builders_base import StructureBuilder, VerticesDefiner +from bamt.builders.hc_builder import HCStructureBuilder, HillClimbDefiner +from bamt.builders.evo_builder import EvoStructureBuilder + from bamt.nodes.gaussian_node import GaussianNode from bamt.nodes.discrete_node import DiscreteNode +from bamt.utils.MathUtils import precision_recall + logging.getLogger("builder").setLevel(logging.CRITICAL) @@ -20,7 +28,7 @@ def setUp(self): "Node1": "disc", "Node2": "disc_num"}, "signs": {"Node0": "pos"}} - self.SB = builders.StructureBuilder(descriptor=self.descriptor) + self.SB = StructureBuilder(descriptor=self.descriptor) def test_restrict(self): self.SB.has_logit = True @@ -84,7 +92,7 @@ def setUp(self): "Node7": "disc_num"}, "signs": {"Node0": "pos", "Node1": "neg"}} - self.VD = builders.VerticesDefiner( + self.VD = VerticesDefiner( descriptor=self.descriptor, regressor=None) def test_first_level(self): @@ -214,9 +222,9 @@ def setUp(self): 0, 4, 0, 1, 2, 0, 0, 3]} def test_apply_K2(self): - hcd = builders.HillClimbDefiner(data=pd.DataFrame(self.data), - descriptor=self.descriptor, - scoring_function=("K2",)) + hcd = HillClimbDefiner(data=pd.DataFrame(self.data), + descriptor=self.descriptor, + scoring_function=("K2",)) hcd.apply_K2(data=pd.DataFrame(self.data), init_edges=None, @@ -245,9 +253,9 @@ def test_apply_K2(self): self.assertEqual(hcd.skeleton["E"], right_edges) def test_apply_group1(self): - hcd = builders.HillClimbDefiner(data=pd.DataFrame(self.data), - descriptor=self.descriptor, - scoring_function=("MI",)) + hcd = HillClimbDefiner(data=pd.DataFrame(self.data), + descriptor=self.descriptor, + scoring_function=("MI",)) hcd.restrict( data=pd.DataFrame( @@ -281,5 +289,55 @@ def test_apply_group1(self): self.assertEqual(hcd.skeleton["E"], right_edges) +class TestEvoStructureBuilder(unittest.TestCase): + + def setUp(self): + self.data = pd.read_csv(r"data/benchmark/asia.csv", index_col=0) + self.descriptor = {'types': {'asia': 'disc', + 'tub': 'disc', + 'smoke': 'disc', + 'lung': 'disc', + 'bronc': 'disc', + 'either': 'disc', + 'xray': 'disc', + 'dysp': 'disc'}, + 'signs': {}} + self.evo_builder = EvoStructureBuilder(data=self.data, + descriptor=self.descriptor, + regressor=None, + has_logit=True, + use_mixture=True) + # Replace this with your actual reference DAG + self.reference_dag = [ + ('asia', 'tub'), + ('tub', 'either'), + ('smoke', 'lung'), + ('smoke', 'bronc'), + ('lung', 'either'), + ('bronc', 'dysp'), + ('either', 'xray'), + ('either', 'dysp') + ] + + def test_build(self): + # placeholder kwargs + kwargs = {} + self.evo_builder.build( + data=self.data, + classifier=None, + regressor=None, + **kwargs) + + obtained_dag = self.evo_builder.skeleton['E'] + num_edges = len(obtained_dag) + self.assertGreaterEqual(num_edges, 1, msg="Obtained graph should have at least one edge.") + + dist = precision_recall(obtained_dag, self.reference_dag)['SHD'] + self.assertLess( + dist, + 15, + msg=f"Structural Hamming Distance should be less than 15, obtained SHD = {dist}") + + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_networks.py b/tests/test_networks.py index c41b20c..8eb367d 100644 --- a/tests/test_networks.py +++ b/tests/test_networks.py @@ -53,11 +53,17 @@ def prepare_bn_and_data(self): self.bn.add_nodes(info) - self.bn.add_edges(discretized_data, scoring_function=("BIC",), progress_bar=False) - - self.bn.set_regressor(regressors={'Depth': CatBoostRegressor(logging_level="Silent", allow_writing_files=False), - 'Gross': RandomForestRegressor(), - 'Porosity': DecisionTreeRegressor()}) + self.bn.add_edges( + discretized_data, scoring_function=( + "BIC",), progress_bar=False) + + self.bn.set_regressor( + regressors={ + 'Depth': CatBoostRegressor( + logging_level="Silent", + allow_writing_files=False), + 'Gross': RandomForestRegressor(), + 'Porosity': DecisionTreeRegressor()}) return hack_data @@ -186,7 +192,12 @@ def test_joblib_pathsave(self): hack_data = self.prepare_bn_and_data() self.bn.fit_parameters(hack_data) - self.assertGreater(self.bn.sample(100, progress_bar=False).size, 0, "Sampling is broken") + self.assertGreater( + self.bn.sample( + 100, + progress_bar=False).size, + 0, + "Sampling is broken") saveloc = self.bn.distributions["Gross"]['hybcprob']["['COMPRESSION']"]['regressor_obj'] diff --git a/tests/test_nodes.py b/tests/test_nodes.py index 8ff618b..f437725 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -157,7 +157,8 @@ def test_predict(self): class TestConditionalGaussianNode(unittest.TestCase): def setUp(self): - self.node = conditional_gaussian_node.ConditionalGaussianNode(name="test") + self.node = conditional_gaussian_node.ConditionalGaussianNode( + name="test") self.data_dict = { "node0": np.random.normal(1, 4, 30), "node1": np.random.normal(2, .1, 30), @@ -176,9 +177,11 @@ def setUp(self): def fit_parameters(self, regressor=None): if regressor is not None: self.node.regressor = regressor - self.node.type = 'ConditionalGaussian' + f" ({type(regressor).__name__})" + self.node.type = 'ConditionalGaussian' + \ + f" ({type(regressor).__name__})" - node_without_parents = conditional_gaussian_node.ConditionalGaussianNode(name="foster-son") + node_without_parents = conditional_gaussian_node.ConditionalGaussianNode( + name="foster-son") node_without_parents.children = ["node6", "node5"] params_parents = self.node.fit_parameters( @@ -272,7 +275,8 @@ def test_predict(self): class TestConditionalMixtureGaussianNode(unittest.TestCase): def setUp(self): - self.node = conditional_mixture_gaussian_node.ConditionalMixtureGaussianNode(name="test") + self.node = conditional_mixture_gaussian_node.ConditionalMixtureGaussianNode( + name="test") self.data_dict = { "node0": np.random.normal(1, 4, 30), "node1": np.random.normal(2, .1, 30), diff --git a/tutorials/example_geo.ipynb b/tutorials/example_geo.ipynb index 8df1a21..408312e 100644 --- a/tutorials/example_geo.ipynb +++ b/tutorials/example_geo.ipynb @@ -33,18 +33,12 @@ "import bamt.preprocessors as pp\n", "\n", "import pandas as pd\n", - "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn import preprocessing\n", - "from sklearn.metrics import accuracy_score, mean_squared_error\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.neighbors import KNeighborsClassifier\n", - "from sklearn.tree import DecisionTreeClassifier\n", - "from bamt.Builders import StructureBuilder\n", + "from sklearn.metrics import mean_squared_error\n", "\n", "from pgmpy.estimators import K2Score\n", - "from gmr import GMM\n", "import seaborn as sns" ] }, diff --git a/tutorials/regressor_exp.ipynb b/tutorials/regressor_exp.ipynb index 479d53b..377ea9a 100644 --- a/tutorials/regressor_exp.ipynb +++ b/tutorials/regressor_exp.ipynb @@ -32,19 +32,12 @@ "import bamt.preprocessors as pp\n", "\n", "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", "\n", "from sklearn import preprocessing\n", - "from sklearn.metrics import accuracy_score, mean_squared_error\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.neighbors import KNeighborsClassifier\n", - "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.metrics import mean_squared_error\n", "from xgboost import XGBRegressor\n", - "from bamt.Builders import StructureBuilder\n", "\n", "from pgmpy.estimators import K2Score\n", - "from gmr import GMM\n", "import seaborn as sns" ] },