Skip to content

Commit

Permalink
Merge branch 'master' into get_dist
Browse files Browse the repository at this point in the history
  • Loading branch information
jrzkaminski authored Aug 15, 2023
2 parents a25b4ef + 2e1b1d8 commit aba2926
Show file tree
Hide file tree
Showing 82 changed files with 2,735 additions and 902 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ tutorials/Test.ipynb
tutorials/bamt
.DS_Store
/example_socio.ipynb
/temp.ipynb
15 changes: 7 additions & 8 deletions bamt/builders/builders_base.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import itertools
from typing import Dict, List, Optional, Tuple, Callable, TypedDict, Sequence, Union

from pandas import DataFrame

from bamt.log import logger_builder
from bamt.nodes.conditional_gaussian_node import ConditionalGaussianNode
from bamt.nodes.conditional_logit_node import ConditionalLogitNode
from bamt.nodes.conditional_mixture_gaussian_node import ConditionalMixtureGaussianNode
from bamt.nodes.discrete_node import DiscreteNode
from bamt.nodes.gaussian_node import GaussianNode
from bamt.nodes.conditional_logit_node import ConditionalLogitNode
from bamt.nodes.logit_node import LogitNode
from bamt.nodes.mixture_gaussian_node import MixtureGaussianNode
from bamt.nodes.conditional_mixture_gaussian_node import ConditionalMixtureGaussianNode
from bamt.nodes.conditional_gaussian_node import ConditionalGaussianNode

from bamt.log import logger_builder
from pandas import DataFrame
from bamt.utils import GraphUtils as gru

from typing import Dict, List, Optional, Tuple, Callable, TypedDict, Sequence, Union


class ParamDict(TypedDict, total=False):
init_edges: Optional[Sequence[str]]
Expand Down
293 changes: 293 additions & 0 deletions bamt/builders/composite_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,293 @@
from datetime import timedelta
from typing import Dict, Optional, List, Tuple, Callable

from golem.core.adapter import DirectAdapter
from golem.core.dag.verification_rules import has_no_cycle, has_no_self_cycled_nodes
from golem.core.log import Log
from golem.core.optimisers.genetic.gp_optimizer import EvoGraphOptimizer
from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters
from golem.core.optimisers.genetic.operators.crossover import CrossoverTypesEnum
from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum
from golem.core.optimisers.genetic.operators.selection import SelectionTypesEnum
from golem.core.optimisers.objective import Objective, ObjectiveEvaluate
from golem.core.optimisers.optimization_parameters import GraphRequirements
from golem.core.optimisers.optimizer import GraphGenerationParams
from pandas import DataFrame
from sklearn import preprocessing

import bamt.preprocessors as pp
from bamt.builders.builders_base import VerticesDefiner, EdgesDefiner
from bamt.log import logger_builder
from bamt.nodes.composite_continuous_node import CompositeContinuousNode
from bamt.nodes.composite_discrete_node import CompositeDiscreteNode
from bamt.nodes.discrete_node import DiscreteNode
from bamt.nodes.gaussian_node import GaussianNode
from bamt.utils import EvoUtils as evo
from bamt.utils.composite_utils import CompositeGeneticOperators
from bamt.utils.composite_utils.CompositeModel import CompositeModel, CompositeNode


class CompositeDefiner(VerticesDefiner, EdgesDefiner):
"""
Object that might take additional methods to decompose structure builder class
"""

def __init__(
self,
descriptor: Dict[str, Dict[str, str]],
regressor: Optional[object] = None,
):
super().__init__(descriptor, regressor)

# Notice that vertices are used only by Builders
self.vertices = []

# LEVEL 1: Define a general type of node: Discrete or Сontinuous
for vertex, type in self.descriptor["types"].items():
if type in ["disc_num", "disc"]:
node = CompositeDiscreteNode(name=vertex)
elif type == "cont":
node = CompositeContinuousNode(name=vertex, regressor=regressor)
else:
msg = f"""First stage of automatic vertex detection failed on {vertex} due TypeError ({type}).
Set vertex manually (by calling set_nodes()) or investigate the error."""
logger_builder.error(msg)
continue

self.vertices.append(node)


class CompositeStructureBuilder(CompositeDefiner):
"""
This class uses an evolutionary algorithm based on GOLEM to generate a Directed Acyclic Graph (DAG) that represents
the structure of a Composite Bayesian Network.
Attributes:
data (DataFrame): Input data used to build the structure.
descriptor (dict): Descriptor describing node types and signs.
"""

def __init__(
self,
data: DataFrame,
descriptor: Dict[str, Dict[str, str]],
regressor: Optional[object],
):
super(CompositeStructureBuilder, self).__init__(
descriptor=descriptor, regressor=regressor
)
self.data = data
self.parent_models_dict = {}
self.descriptor = descriptor
self.regressor = regressor
self.params = {
"init_edges": None,
"init_nodes": None,
"remove_init_edges": True,
"white_list": None,
"bl_add": None,
}
self.default_n_jobs = -1
self.default_pop_size = 15
self.default_crossover_prob = 0.9
self.default_mutation_prob = 0.8
self.default_max_arity = 100
self.default_max_depth = 100
self.default_timeout = 180
self.default_num_of_generations = 50
self.default_early_stopping_iterations = 50
self.objective_metric = CompositeGeneticOperators.composite_metric
self.default_crossovers = [
CrossoverTypesEnum.exchange_edges,
CrossoverTypesEnum.exchange_parents_one,
CrossoverTypesEnum.exchange_parents_both,
CompositeGeneticOperators.custom_crossover_all_model,
]
self.default_mutations = [
CompositeGeneticOperators.custom_mutation_add_structure,
CompositeGeneticOperators.custom_mutation_delete_structure,
CompositeGeneticOperators.custom_mutation_reverse_structure,
CompositeGeneticOperators.custom_mutation_add_model,
]
self.default_selection = [SelectionTypesEnum.tournament]
self.default_constraints = [
has_no_self_cycled_nodes,
has_no_cycle,
evo.has_no_duplicates,
]
self.verbose = True
self.logging_level = 50

def overwrite_vertex(
self,
regressor: Optional[Callable],
):
for node_instance in self.vertices:
node = node_instance
if (
len(node_instance.cont_parents + node_instance.disc_parents) < 1
and type(node_instance).__name__ == "CompositeContinuousNode"
):
node = GaussianNode(name=node_instance.name, regressor=regressor)
elif (
len(node_instance.cont_parents + node_instance.disc_parents) < 1
and type(node_instance).__name__ == "CompositeDiscreteNode"
):
node = DiscreteNode(name=node_instance.name)
else:
continue
id_node = self.skeleton["V"].index(node_instance)
node.disc_parents = node_instance.disc_parents
node.cont_parents = node_instance.cont_parents
node.children = node_instance.children
self.skeleton["V"][id_node] = node

def build(
self,
data: DataFrame,
classifier: Optional[object],
regressor: Optional[object],
**kwargs,
):
"""
Calls the search method to execute all the evolutionary computations.
Args:
data (DataFrame): The data from which to build the structure.
classifier (Optional[object]): A classification model for discrete nodes.
regressor (Optional[object]): A regression model for continuous nodes.
"""
best_graph_edge_list, parent_models = self.search(data, **kwargs)

# Convert the best graph to the format used by the Bayesian Network
self.skeleton["V"] = self.vertices
self.skeleton["E"] = best_graph_edge_list
self.parent_models_dict = parent_models

self.get_family()
self.overwrite_vertex(regressor=regressor)

def search(self, data: DataFrame, **kwargs) -> [List[Tuple[str, str]], Dict]:
"""
Executes all the evolutionary computations and returns the best graph's edge list.
Args:
data (DataFrame): The data from which to build the structure.
Returns:
best_graph_edge_list (List[Tuple[str, str]]): The edge list of the best graph found by the search.
"""
# Get the list of node names
vertices = list(data.columns)

encoder = preprocessing.LabelEncoder()
p = pp.Preprocessor([("encoder", encoder)])
preprocessed_data, _ = p.apply(data)

# Create the initial population
initial = kwargs.get(
"custom_initial_structure",
[
CompositeModel(
nodes=[
CompositeNode(
nodes_from=None,
content={
"name": vertex,
"type": p.nodes_types[vertex],
"parent_model": None,
},
)
for vertex in vertices
]
)
],
)

# Define the requirements for the evolutionary algorithm
requirements = GraphRequirements(
max_arity=kwargs.get("max_arity", self.default_max_arity),
max_depth=kwargs.get("max_depth", self.default_max_depth),
num_of_generations=kwargs.get(
"num_of_generations", self.default_num_of_generations
),
timeout=timedelta(minutes=kwargs.get("timeout", self.default_timeout)),
early_stopping_iterations=kwargs.get(
"early_stopping_iterations", self.default_early_stopping_iterations
),
n_jobs=kwargs.get("n_jobs", self.default_n_jobs),
)

# Set the parameters for the evolutionary algorithm
optimizer_parameters = GPAlgorithmParameters(
pop_size=kwargs.get("pop_size", self.default_pop_size),
crossover_prob=kwargs.get("crossover_prob", self.default_crossover_prob),
mutation_prob=kwargs.get("mutation_prob", self.default_mutation_prob),
genetic_scheme_type=GeneticSchemeTypesEnum.steady_state,
mutation_types=kwargs.get("custom_mutations", self.default_mutations),
crossover_types=kwargs.get("custom_crossovers", self.default_crossovers),
selection_types=kwargs.get("selection_type", self.default_selection),
)

# Set the adapter for the conversion between the graph and the data
# structures used by the optimizer
adapter = DirectAdapter(
base_graph_class=CompositeModel, base_node_class=CompositeNode
)

# Set the constraints for the graph

constraints = kwargs.get("custom_constraints", [])

constraints.extend(self.default_constraints)

if kwargs.get("blacklist", None) is not None:
constraints.append(evo.has_no_blacklist_edges)
if kwargs.get("whitelist", None) is not None:
constraints.append(evo.has_only_whitelist_edges)

graph_generation_params = GraphGenerationParams(
adapter=adapter, rules_for_constraint=constraints
)

# Define the objective function to optimize
objective = Objective(
{"custom": kwargs.get("custom_metric", self.objective_metric)}
)

# Initialize the optimizer
optimizer = EvoGraphOptimizer(
objective=objective,
initial_graphs=initial,
requirements=requirements,
graph_generation_params=graph_generation_params,
graph_optimizer_params=optimizer_parameters,
)

# Define the function to evaluate the objective function
objective_eval = ObjectiveEvaluate(objective, data=preprocessed_data)

if not kwargs.get("verbose", self.verbose):
Log().reset_logging_level(logging_level=50)

# Run the optimization
optimized_graph = optimizer.optimise(objective_eval)[0]

parent_models = self._get_parent_models(optimized_graph)

# Get the best graph
best_graph_edge_list = optimized_graph.operator.get_edges()
best_graph_edge_list = self._convert_to_strings(best_graph_edge_list)

return best_graph_edge_list, parent_models

@staticmethod
def _convert_to_strings(nested_list):
return [[str(item) for item in inner_list] for inner_list in nested_list]

@staticmethod
def _get_parent_models(graph):
parent_models = {}
for node in graph.nodes:
parent_models[node.content["name"]] = node.content["parent_model"]
return parent_models
Loading

0 comments on commit aba2926

Please sign in to comment.