added large bn experiments

aimclub · Jul 4, 2024 · a9958e8 · a9958e8
1 parent a19c435
commit a9958e8
Show file tree

Hide file tree

Showing 169 changed files with 20,848 additions and 0 deletions.
diff --git a/paper_experiments/large_bayesian_networks_experiments/divided_bn.py b/paper_experiments/large_bayesian_networks_experiments/divided_bn.py
diff --git a/paper_experiments/large_bayesian_networks_experiments/f1_lbn.png b/paper_experiments/large_bayesian_networks_experiments/f1_lbn.png
diff --git a/paper_experiments/large_bayesian_networks_experiments/f1_undir_lbn.png b/paper_experiments/large_bayesian_networks_experiments/f1_undir_lbn.png
diff --git a/paper_experiments/large_bayesian_networks_experiments/golem/__init__.py b/paper_experiments/large_bayesian_networks_experiments/golem/__init__.py
@@ -0,0 +1 @@
+from golem import *
diff --git a/paper_experiments/large_bayesian_networks_experiments/golem/api/__init__.py b/paper_experiments/large_bayesian_networks_experiments/golem/api/__init__.py
diff --git a/paper_experiments/large_bayesian_networks_experiments/golem/api/api_utils/__init__.py b/paper_experiments/large_bayesian_networks_experiments/golem/api/api_utils/__init__.py
diff --git a/paper_experiments/large_bayesian_networks_experiments/golem/api/api_utils/api_params.py b/paper_experiments/large_bayesian_networks_experiments/golem/api/api_utils/api_params.py
@@ -0,0 +1,102 @@
+import datetime
+from collections import UserDict
+
+from typing import Dict, Any
+
+from golem.core.adapter.nx_adapter import BaseNetworkxAdapter
+from golem.core.dag.verification_rules import DEFAULT_DAG_RULES
+from golem.core.log import LoggerAdapter, default_log
+from golem.core.optimisers.dynamic_graph_requirements import DynamicGraphRequirements
+from golem.core.optimisers.genetic.gp_optimizer import EvoGraphOptimizer
+from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters
+from golem.core.optimisers.optimization_parameters import GraphRequirements
+from golem.core.optimisers.optimizer import GraphGenerationParams
+from golem.utilities.utilities import determine_n_jobs
+
+
+class ApiParams(UserDict):
+    """
+    Class to further distribute params specified params in API between the following classes:
+    `GraphRequirements`, `GraphGenerationParams`, `GPAlgorithmParameters`.
+    """
+    def __init__(self, input_params: Dict[str, Any], n_jobs: int = -1, timeout: float = 5):
+        self.log: LoggerAdapter = default_log(self)
+        self.n_jobs: int = determine_n_jobs(n_jobs)
+        self.timeout = timeout
+
+        self._input_params = input_params
+        self._input_params['timeout'] = timeout if isinstance(timeout, datetime.timedelta) else datetime.timedelta(minutes=timeout)
+        self._default_common_params = self.get_default_common_params()
+        super().__init__(self._input_params)
+
+    def get_default_common_params(self):
+        """ Common params that do not belong to any category
+        (from `GPAlgorithmParameters`, `GraphGenerationParams`, `GraphRequirements`). """
+        default_common_params = {
+            'optimizer': EvoGraphOptimizer,
+            'initial_graphs': list(),
+            'objective': None
+        }
+        self.log.info("EvoGraphOptimizer was used as default optimizer, "
+                      "will be overwritten by specified one if there is any.")
+        return default_common_params
+
+    def get_default_graph_generation_params(self):
+        """ Default graph generations params to minimize the number of arguments that must be specified in API.
+        Need to be hardcoded like that since the list of input arguments is not the same as the class fields list. """
+        default_graph_generation_params = {
+            'adapter': BaseNetworkxAdapter(),
+            'rules_for_constraint': DEFAULT_DAG_RULES,
+            'advisor': None,
+            'node_factory': None,
+            'random_graph_factory': None,
+            'available_node_types': None,
+            'remote_evaluator': None
+        }
+        self.log.info("BaseNetworkxAdapter was used as default adapter, "
+                      "will be overwritten by specified one if there is any.")
+        return default_graph_generation_params
+
+    def get_gp_algorithm_parameters(self) -> GPAlgorithmParameters:
+        default_gp_algorithm_params_dict = dict(list(vars(GPAlgorithmParameters()).items()))
+        k_pop = []
+        for k, v in self._input_params.items():
+            if k in default_gp_algorithm_params_dict:
+                default_gp_algorithm_params_dict[k] = self._input_params[k]
+                k_pop.append(k)
+        for k in k_pop:
+            self._input_params.pop(k)
+        return GPAlgorithmParameters(**default_gp_algorithm_params_dict)
+
+    def get_graph_generation_parameters(self) -> GraphGenerationParams:
+        default_graph_generation_params_dict = self.get_default_graph_generation_params()
+        k_pop = []
+        for k, v in self._input_params.items():
+            if k in default_graph_generation_params_dict:
+                default_graph_generation_params_dict[k] = self._input_params[k]
+                k_pop.append(k)
+        for k in k_pop:
+            self._input_params.pop(k)
+        ggp = GraphGenerationParams(**default_graph_generation_params_dict)
+        return ggp
+
+    def get_graph_requirements(self) -> GraphRequirements:
+        default_graph_requirements_params_dict = dict(list(vars(GraphRequirements()).items()))
+        # if there are any custom domain specific graph requirements params
+        is_custom_graph_requirements_params = \
+            any([k not in default_graph_requirements_params_dict for k in self._input_params])
+        for k, v in self._input_params.items():
+            # add all parameters except common left unused after GPAlgorithmParameters and GraphGenerationParams
+            # initialization, since it can be custom domain specific params
+            if k not in self._default_common_params:
+                default_graph_requirements_params_dict[k] = self._input_params[k]
+        if is_custom_graph_requirements_params:
+            return DynamicGraphRequirements(default_graph_requirements_params_dict)
+        else:
+            return GraphRequirements(**default_graph_requirements_params_dict)
+
+    def get_actual_common_params(self) -> Dict[str, Any]:
+        for k, v in self._input_params.items():
+            if k in self._default_common_params:
+                self._default_common_params[k] = v
+        return self._default_common_params
diff --git a/paper_experiments/large_bayesian_networks_experiments/golem/api/main.py b/paper_experiments/large_bayesian_networks_experiments/golem/api/main.py
@@ -0,0 +1,139 @@
+import logging
+from typing import Optional
+
+from golem.api.api_utils.api_params import ApiParams
+from golem.core.constants import DEFAULT_API_TIMEOUT_MINUTES
+from golem.core.log import Log, default_log
+from golem.utilities.utilities import set_random_seed
+
+
+class GOLEM:
+    """
+    Main class for GOLEM API.
+
+    Args:
+        :param timeout: timeout for optimization.
+        :param seed: value for a fixed random seed.
+        :param logging_level: logging levels are the same as in `logging <https://docs.python.org/3/library/logging.html>`_.
+
+            .. details:: Possible options:
+
+                - ``50`` -> critical
+                - ``40`` -> error
+                - ``30`` -> warning
+                - ``20`` -> info
+                - ``10`` -> debug
+                - ``0`` -> nonset
+        :param n_jobs: num of ``n_jobs`` for parallelization (set to ``-1`` to use all cpu's). Defaults to ``-1``.
+        :param graph_requirements_class: class to specify custom graph requirements.
+        Must be inherited from GraphRequirements class.
+
+        :param crossover_prob: crossover probability (chance that two individuals will be mated).
+
+        ``GPAlgorithmParameters`` parameters
+        :param mutation_prob: mutation probability (chance that an individual will be mutated).
+        :param variable_mutation_num: flag to apply mutation one or few times for individual in each iteration.
+        :param max_num_of_operator_attempts: max number of unsuccessful evo operator attempts before continuing.
+        :param mutation_strength: strength of mutation in tree (using in certain mutation types)
+        :param min_pop_size_with_elitism: minimal population size with which elitism is applicable
+        :param required_valid_ratio: ratio of valid individuals on next population to continue optimization.
+
+        Used in `ReproductionController` to compensate for invalid individuals. See the class for details.
+
+        :param adaptive_mutation_type: enables adaptive Mutation agent.
+        :param context_agent_type: enables graph encoding for Mutation agent.
+
+        Adaptive mutation agent uses specified algorithm. 'random' type is the default non-adaptive version.
+        Requires crossover_types to be CrossoverTypesEnum.none for correct adaptive learning,
+        so that fitness changes depend only on agent's actions (chosen mutations).
+        ``MutationAgentTypeEnum.bandit`` uses Multi-Armed Bandit (MAB) learning algorithm.
+        ``MutationAgentTypeEnum.contextual_bandit`` uses contextual MAB learning algorithm.
+        ``MutationAgentTypeEnum.neural_bandit`` uses contextual MAB learning algorithm with Deep Neural encoding.
+
+        Parameter `context_agent_type` specifies implementation of graph/node encoder for adaptive
+        mutation agent. It is relevant for contextual and neural bandits.
+
+        :param decaying_factor: decaying factor for Multi-Armed Bandits for managing the profit from operators
+        The smaller the value of decaying_factor, the larger the influence for the best operator.
+        :param window_size: the size of sliding window for Multi-Armed Bandits to decrease variance.
+        The window size is measured by the number of individuals to consider.
+
+
+        :param selection_types: Sequence of selection operators types
+        :param crossover_types: Sequence of crossover operators types
+        :param mutation_types: Sequence of mutation operators types
+        :param elitism_type: type of elitism operator evolution
+
+        :param regularization_type: type of regularization operator
+
+        Regularization attempts to cut off the subtrees of the graph. If the truncated graph
+        is not worse than the original, then it enters the new generation as a simpler solution.
+        Regularization is not used by default, it must be explicitly enabled.
+
+        :param genetic_scheme_type: type of genetic evolutionary scheme
+
+        The `generational` scheme is a standard scheme of the evolutionary algorithm.
+        It specifies that at each iteration the entire generation is updated.
+
+        In the `steady_state` individuals from previous populations are mixed with the ones from new population.
+        UUIDs of individuals do not repeat within one population.
+
+        The `parameter_free` scheme is same as `steady_state` for now.
+
+        ``GraphGenerationParams`` parameters
+        :param adapter: instance of domain graph adapter for adaptation
+         between domain and optimization graphs
+        :param rules_for_constraint: collection of constraints for graph verification
+        :param advisor: instance providing task and context-specific advices for graph changes
+        :param node_factory: instance for generating new nodes in the process of graph search
+        :param remote_evaluator: instance of delegate evaluator for evaluation of graphs
+
+        ``GraphRequirements`` parameters
+        :param start_depth: start value of adaptive tree depth
+        :param max_depth: max depth of the resulting graph
+        :param min_arity: min number of parents for node
+        :param max_arity: max number of parents for node
+
+        Also, custom domain specific parameters can be specified here. These parameters can be then used in
+        ``DynamicGraphRequirements`` as fields.
+    """
+    def __init__(self,
+                 timeout: Optional[float] = DEFAULT_API_TIMEOUT_MINUTES,
+                 seed: Optional[int] = None,
+                 logging_level: int = logging.INFO,
+                 n_jobs: int = -1,
+                 **all_parameters):
+        set_random_seed(seed)
+        self.log = self._init_logger(logging_level)
+
+        self.api_params = ApiParams(input_params=all_parameters,
+                                    n_jobs=n_jobs,
+                                    timeout=timeout)
+        self.gp_algorithm_parameters = self.api_params.get_gp_algorithm_parameters()
+        self.graph_generation_parameters = self.api_params.get_graph_generation_parameters()
+        self.graph_requirements = self.api_params.get_graph_requirements()
+
+    def optimise(self, **custom_optimiser_parameters):
+        """ Method to start optimisation process.
+        `custom_optimiser_parameters` parameters can be specified additionally to use it directly in optimiser.
+        """
+        common_params = self.api_params.get_actual_common_params()
+        optimizer_cls = common_params['optimizer']
+        objective = common_params['objective']
+        initial_graphs = common_params['initial_graphs']
+
+        self.optimiser = optimizer_cls(objective,
+                                       initial_graphs,
+                                       self.graph_requirements,
+                                       self.graph_generation_parameters,
+                                       self.gp_algorithm_parameters,
+                                       **custom_optimiser_parameters)
+
+        found_graphs = self.optimiser.optimise(objective)
+        return found_graphs
+
+    @staticmethod
+    def _init_logger(logging_level: int):
+        # reset logging level for Singleton
+        Log().reset_logging_level(logging_level)
+        return default_log(prefix='GOLEM logger')
diff --git a/paper_experiments/large_bayesian_networks_experiments/golem/core/__init__.py b/paper_experiments/large_bayesian_networks_experiments/golem/core/__init__.py
diff --git a/paper_experiments/large_bayesian_networks_experiments/golem/core/adapter/__init__.py b/paper_experiments/large_bayesian_networks_experiments/golem/core/adapter/__init__.py
@@ -0,0 +1,2 @@
+from .adapter import BaseOptimizationAdapter, DirectAdapter, IdentityAdapter
+from .adapt_registry import AdaptRegistry, register_native
diff --git a/paper_experiments/large_bayesian_networks_experiments/golem/core/adapter/adapt_registry.py b/paper_experiments/large_bayesian_networks_experiments/golem/core/adapter/adapt_registry.py
@@ -0,0 +1,147 @@
+from copy import copy
+from functools import partial
+from typing import Callable
+
+from golem.utilities.singleton_meta import SingletonMeta
+
+
+class AdaptRegistry(metaclass=SingletonMeta):
+    """Registry of callables that require adaptation of argument/return values.
+    AdaptRegistry together with :py:class:`golem.core.adapter.adapter.BaseOptimizationAdapter`
+    enables automatic transformation between internal and domain graph representations.
+
+    **Short description of the use-case**
+
+    Operators & verification rules that operate on internal representation
+    of graphs must be marked as native with decorator
+    :py:func:`golem.core.adapter.adapt_registry.register_native`.
+
+    Usually this is the case when users of the framework provide custom
+    operators for internal optimization graphs. When custom operators
+    operate on domain graphs, nothing is required.
+
+    **Extended description**
+
+    Optimiser operates with generic graph representation.
+    Because of this any domain function requires adaptation
+    of its graph arguments. Adapter can automatically adapt
+    arguments to generic form in such cases.
+
+    Important notions:
+
+    * 'Domain' functions operate with domain-specific graphs.
+    * 'Native' functions operate with generic graphs used by optimiser.
+    * 'External' functions are functions defined by users of optimiser.
+
+        Most notably, custom mutations and custom verifier rules.
+
+    * 'Internal' functions are those defined by graph optimiser.
+
+        Most notably, the default set of mutations and verifier rules.
+        All internal functions are native.
+
+    Adaptation registry usage and behavior:
+
+    * Domain functions are adapted by default.
+    * Native functions don't require adaptation of their arguments.
+    * External functions are considered 'domain' functions by default.
+
+        Hence, their arguments are adapted, unless users of optimiser
+        exclude them from the process of automatic adaptation.
+        It can be done by registering them as 'native'.
+
+    AdaptRegistry can be safely used with multiprocessing
+    insofar as all relevant functions are registered as native
+    in the main process before child processes are started.
+    """
+
+    _native_flag_attr_name_ = '_adapter_is_optimizer_native'
+
+    def __init__(self):
+        self._registered_native_callables = []
+
+    def register_native(self, fun: Callable) -> Callable:
+        """Registers callable object as an internal function
+        that can work with internal graph representation.
+        Hence, it doesn't require adaptation when called by the optimiser.
+
+        Implementation details.
+        Works by setting a special attribute on the object.
+        This attribute then is checked by ``is_native`` used by adapters.
+
+        Args:
+            fun: function or callable to be registered as native
+
+        Returns:
+            Callable: same function with special private attribute set
+        """
+        original_function = AdaptRegistry._get_underlying_func(fun)
+        setattr(original_function, AdaptRegistry._native_flag_attr_name_, True)
+        self._registered_native_callables.append(original_function)
+        return fun
+
+    def unregister_native(self, fun: Callable) -> Callable:
+        """Unregisters callable object. See ``register_native``.
+
+        Args:
+            fun: function or callable to be unregistered as native
+
+        Returns:
+            Callable: same function with special private attribute unset
+        """
+        original_function = AdaptRegistry._get_underlying_func(fun)
+        if hasattr(original_function, AdaptRegistry._native_flag_attr_name_):
+            delattr(original_function, AdaptRegistry._native_flag_attr_name_)
+            self._registered_native_callables.remove(original_function)
+        return fun
+
+    @staticmethod
+    def is_native(fun: Callable) -> bool:
+        """Tests callable object for a presence of specific attribute
+        that tells that this function must not be restored with Adapter.
+
+        Args:
+            fun: tested Callable (function, method, functools.partial, or any callable object)
+
+        Returns:
+            bool: True if the callable was registered as native, False otherwise.
+        """
+        original_function = AdaptRegistry._get_underlying_func(fun)
+        is_native = getattr(original_function, AdaptRegistry._native_flag_attr_name_, False)
+        return is_native
+
+    def clear_registered_callables(self):
+        # copy is to avoid removing elements from list while iterating
+        for f in copy(self._registered_native_callables):
+            self.unregister_native(f)
+
+    @staticmethod
+    def _get_underlying_func(obj: Callable) -> Callable:
+        """Recursively unpacks 'partial' and 'method' objects to get underlying function.
+
+        Args:
+            obj: callable to try unpacking
+
+        Returns:
+            Callable: unpacked function that underlies the callable, or the unchanged object itself
+        """
+        while True:
+            if isinstance(obj, partial):  # if it is a 'partial'
+                obj = obj.func
+            elif hasattr(obj, '__func__'):  # if it is a 'method'
+                obj = obj.__func__
+            else:
+                return obj  # return the unpacked underlying function or the original object
+
+
+def register_native(fun: Callable) -> Callable:
+    """Out-of-class version of the ``register_native``
+    function that's intended to be used as a decorator.
+
+    Args:
+        fun: function or callable to be registered as native
+
+    Returns:
+        Callable: same function with special private attribute set
+    """
+    return AdaptRegistry().register_native(fun)