diff --git a/alea/model.py b/alea/model.py index 7faf939c..4638678d 100644 --- a/alea/model.py +++ b/alea/model.py @@ -610,6 +610,87 @@ def get_model_from_name(statistical_model: str): return statistical_model_class +class CompoundStatisticalModel(StatisticalModel): + """Wrapper for creating a statistical model from a list of StatisticalModels + TODO: parameter overlap + TODO: likelihood name overlap + + + """ + + def __init__( + self, + model_list=[], + confidence_level: float = 0.9, + confidence_interval_kind: str = "central", # one of central, upper, lower + confidence_interval_threshold: Optional[Callable[[float], float]] = None, + asymptotic_dof: Optional[int] = 1, + ): + """Store stat model list + TODO: should we offer init service here? + """ + self.model_list = model_list + self._confidence_level = confidence_level + if confidence_interval_kind not in {"central", "upper", "lower"}: + raise ValueError("confidence_interval_kind must be one of central, upper, lower") + self._confidence_interval_kind = confidence_interval_kind + self.confidence_interval_threshold = confidence_interval_threshold + self.asymptotic_dof = asymptotic_dof + + self.parameters = Parameters() + for m in self.model_list: + for k, par in m.parameters.parameters.items(): + if k not in self.parameters.parameters: + self.parameters.add_parameter(par) + else: + assert self.parameters[k] == par + + def _define_parameters(self, parameter_definition, nominal_values=None): + raise NotImplementedError("Is this needed for the compound?") + + def generate_data(self, **kwargs): + ret = [] + for m in self.model_list: + ret.append(m.generate_data(**kwargs)) + return ret + + def ll(self, **kwargs): + if not set(kwargs.keys()) <= set(self.parameters.names): + raise ValueError( + set(kwargs.keys()) - set(self.parameters.names), "are not parameters of the model!" + ) + ret = 0 + for m in self.model_list: + mkwargs = {k: i for k, i in kwargs.items() if k in m.parameters.parameters.keys()} + ret += m.ll(**mkwargs) + return ret + + @property + def is_data_set(self): + return all([m.is_data_set for m in self.model_list]) + + @is_data_set.setter + def is_data_set(self, override): + raise NotImplementedError("You must set the data for this boolean to be true") + + @property + def data(self): + ret = [] + for m in self.model_list: + ret.append(m.data) + return ret + + @data.setter + def data(self, datas): + """Need to determine _how many_ of the datasets go to each? + + Perhaps just skip for now and let each likelihood term write its own data-set? + + """ + for m, data in zip(self.model_list, datas): + m.data = data + + class MinuitWrap: """Wrapper for functions to be called by Minuit. Initialized with a function f and a Parameters instance. diff --git a/alea/runner.py b/alea/runner.py index 1aa4e994..8d30225b 100644 --- a/alea/runner.py +++ b/alea/runner.py @@ -1,15 +1,16 @@ import time import inspect from copy import deepcopy -from typing import Optional, Dict, Union +from typing import Optional, Dict, Union, List from datetime import datetime import warnings +from functools import singledispatchmethod from tqdm import tqdm import numpy as np from inference_interface import toydata_from_file, numpy_to_toyfile -from alea.model import StatisticalModel +from alea.model import StatisticalModel, CompoundStatisticalModel from alea.utils import load_yaml @@ -71,7 +72,49 @@ class Runner: """ - def __init__( + def _assign_inference_choices( + self, + poi: str = "mu", + hypotheses: list = ["free"], + n_mc: int = 1, + common_hypothesis: Optional[dict] = None, + generate_values: Optional[Dict[str, float]] = None, + nominal_values: Optional[dict] = None, + compute_confidence_interval: bool = False, + confidence_level: float = 0.9, + confidence_interval_kind: str = "central", + toydata_mode: str = "generate_and_store", + toydata_filename: str = "test_toydata_filename.ii.h5", + only_toydata: bool = False, + output_filename: str = "test_output_filename.ii.h5", + seed: Optional[int] = None, + metadata: Optional[dict] = None, + ): + """Set many hypothesis properties (common for initialisers)""" + self.poi = poi + self.hypotheses = hypotheses if hypotheses else [] + self.common_hypothesis = common_hypothesis if common_hypothesis else {} + self.generate_values = generate_values if generate_values else {} + self._compute_confidence_interval = compute_confidence_interval + self._n_mc = n_mc + self._toydata_filename = toydata_filename + self._toydata_mode = toydata_mode + self._output_filename = output_filename + self.only_toydata = only_toydata + self.seed = seed + self._metadata = metadata if metadata else {} + + self._result_names, self._result_dtype = self._get_parameter_list() + + self._hypotheses_values = self._get_hypotheses() + + @singledispatchmethod # type: ignore + # (mypy complains about unsupported decorator) + def __init__(self, statistical_model): + raise NotImplementedError("statistical_model must be string or list of strings") + + @__init__.register + def single_init( self, statistical_model: str = "alea.examples.gaussian_model.GaussianModel", poi: str = "mu", @@ -87,7 +130,7 @@ def __init__( compute_confidence_interval: bool = False, confidence_level: float = 0.9, confidence_interval_kind: str = "central", - toydata_mode: str = "generate_and_store", + toydata_mode: str = "generate", toydata_filename: str = "test_toydata_filename.ii.h5", only_toydata: bool = False, output_filename: str = "test_output_filename.ii.h5", @@ -95,7 +138,8 @@ def __init__( metadata: Optional[dict] = None, ): """Initialize statistical model, parameters list, and generate values list.""" - self.poi = poi + + self.initialiser = self.single_init statistical_model_class = StatisticalModel.get_model_from_name(statistical_model) @@ -113,6 +157,7 @@ def __init__( "likelihood_config is duplicated, " "because statistical_model_config is provided!" ) + parameter_definition = model_config["parameter_definition"] likelihood_config = model_config["likelihood_config"] @@ -134,21 +179,24 @@ def __init__( **statistical_model_args, ) - self.hypotheses = hypotheses if hypotheses else [] - self.common_hypothesis = common_hypothesis if common_hypothesis else {} - self.generate_values = generate_values if generate_values else {} - self._compute_confidence_interval = compute_confidence_interval - self._n_mc = n_mc - self._toydata_filename = toydata_filename - self._toydata_mode = toydata_mode - self._output_filename = output_filename - self.only_toydata = only_toydata - self.seed = seed - self._metadata = metadata if metadata else {} - - self._result_names, self._result_dtype = self._get_parameter_list() - - self._hypotheses_values = self._get_hypotheses() + # assign a range of inference and running parameters: + self._assign_inference_choices( + poi=poi, + hypotheses=hypotheses, + n_mc=n_mc, + common_hypothesis=common_hypothesis, + generate_values=generate_values, + nominal_values=nominal_values, + compute_confidence_interval=compute_confidence_interval, + confidence_level=confidence_level, + confidence_interval_kind=confidence_interval_kind, + toydata_mode=toydata_mode, + toydata_filename=toydata_filename, + only_toydata=only_toydata, + output_filename=output_filename, + seed=seed, + metadata=metadata, + ) # find confidence_interval_thresholds function for the hypotheses from alea.submitters.local import NeymanConstructor @@ -164,6 +212,153 @@ def __init__( statistical_model_args.get("asymptotic_dof", 1), ) + @__init__.register + def multiple_init( + self, + statistical_models: list, + poi: str = "mu", + statistical_model_configs: Optional[list] = None, + likelihood_configs: Optional[list] = None, + statistical_models_args: Optional[list] = None, + compound_model_args: Optional[dict] = None, + parameter_definitions: Optional[list] = None, + hypotheses: list = ["free"], + n_mc: int = 1, + common_hypothesis: Optional[dict] = None, + generate_values: Optional[Dict[str, float]] = None, + nominal_values: Optional[dict] = None, + compute_confidence_interval: bool = False, + confidence_level: float = 0.9, + confidence_interval_kind: str = "central", + toydata_mode: str = "generate_and_store", + toydata_filename: str = "test_toydata_filename.ii.h5", + only_toydata: bool = False, + output_filename: str = "test_output_filename.ii.h5", + seed: Optional[int] = None, + metadata: Optional[dict] = None, + ): + """ + Initialise a runner with several models + Args: + statistical_models (list of strings): a list of statistical model class names + statistical_model_configs (list of strings): a list of names of the configs + for each stat. model + likelihood_configs (list of dicts): a list of configs for each model + (if statistical model configs not specified) + statistical_models_args (list of dicts): a list of configs for each model + (if statistical model configs not specified) + compound_model_args (dict): arguments that will be passed to the compound model-- + in particular, any neyman threshold + """ + print("hello! we are overriding the runner!", statistical_models) + self.initialiser = self.multiple_init + + statistical_model_classes = [ + StatisticalModel.get_model_from_name(sm) for sm in statistical_models + ] + N_models = len(statistical_model_classes) + + # load a range of statistical_model_configs: + + if statistical_model_configs is not None: + model_configs = [load_yaml(c) for c in statistical_model_configs] + if parameter_definitions is not None: + raise ValueError( + "parameter_definitions is duplicated, " + "because statistical_model_configs is provided!" + ) + if likelihood_configs is not None: + raise ValueError( + "likelihood_configs is duplicated, " + "because statistical_model_configs is provided!" + ) + parameter_definitions = [mc["parameter_definition"] for mc in model_configs] + likelihood_configs = [mc["likelihood_config"] for mc in model_configs] + + # update nominal_values into statistical_model_args + if statistical_models_args is None: + statistical_models_args = [{} for _ in range(N_models)] + if compound_model_args is None: + compound_model_args = {} + if likelihood_configs is None: + likelihood_configs = [{} for _ in range(N_models)] + if parameter_definitions is None: + parameter_definitions = [{} for _ in range(N_models)] + # nominal_values is keyword argument + self.nominal_values = nominal_values if nominal_values else {} + # initialize nominal_values only once + compound_model_args["nominal_values"] = self.nominal_values + # likelihood_config is keyword argument, because not all statistical model needs it + for sma, lc in zip(statistical_models_args, likelihood_configs): + sma["likelihood_config"] = lc + + if ( + (N_models != len(statistical_model_classes)) + or (N_models != len(parameter_definitions)) + or (N_models != len(statistical_models_args)) + ): + raise ValueError( + "The list of model classes, model configs, " + "parameter definitions and likelihood configs must be the same length" + "({:d}, {:d}, {:d}, {:d}, respectively)".format( + N_models, + len(statistical_model_classes), + len(parameter_definitions), + len(statistical_models_args), + ) + ) + models: List[StatisticalModel] = [] + print(confidence_level, confidence_interval_kind) + print(models.append) + print(statistical_model_classes) + for statistical_model_class, parameter_definition, statistical_model_args in zip( + statistical_model_classes, parameter_definitions, statistical_models_args + ): + # initialize statistical models + print("smc", statistical_model_class) + models.append( + statistical_model_class( + parameter_definition=parameter_definition, + confidence_level=confidence_level, + confidence_interval_kind=confidence_interval_kind, + **statistical_model_args, + ) + ) + self.model = CompoundStatisticalModel(models) + + # assign a range of inference and running parameters: + self._assign_inference_choices( + poi=poi, + hypotheses=hypotheses, + n_mc=n_mc, + common_hypothesis=common_hypothesis, + generate_values=generate_values, + nominal_values=nominal_values, + compute_confidence_interval=compute_confidence_interval, + confidence_level=confidence_level, + confidence_interval_kind=confidence_interval_kind, + toydata_mode=toydata_mode, + toydata_filename=toydata_filename, + only_toydata=only_toydata, + output_filename=output_filename, + seed=seed, + metadata=metadata, + ) + # find confidence_interval_thresholds function for the hypotheses + + from alea.submitters.local import NeymanConstructor + + self.confidence_interval_thresholds = NeymanConstructor.get_confidence_interval_thresholds( + self.poi, + self._hypotheses_values, + compound_model_args.get("limit_threshold", None), + nominal_values, + confidence_interval_kind, + confidence_level, + compound_model_args.get("limit_threshold_interpolation", False), + compound_model_args.get("asymptotic_dof", 1), + ) + def pre_process_poi(self, value, attribute_name): """Pre-process of poi_expectation for some attributes of runner.""" if not all([isinstance(v, (float, int)) for v in value.values()]): @@ -210,10 +405,21 @@ def hypotheses(self, values: list) -> None: self._hypotheses = values @staticmethod - def runner_arguments(): - """Get runner arguments and annotations.""" + def runner_arguments(model_type: str): + """Get runner arguments and annotations. + + args: + model_type (str): either single or combined + + """ # find run toyMC default args and annotations: # reference: https://docs.python.org/3/library/inspect.html#inspect.getfullargspec + if model_type == "single": + initialiser = Runner.single_init + elif model_type == "combined": + initialiser = Runner.multiple_init + else: + raise ValueError("argument must be one of single, combined") ( args, varargs, @@ -222,7 +428,7 @@ def runner_arguments(): kwonlyargs, kwonlydefaults, annotations, - ) = inspect.getfullargspec(Runner.__init__) + ) = inspect.getfullargspec(initialiser) # skip the first one because it is self(Runner itself) default_args = dict(zip(args[1:], defaults)) return args, default_args, annotations diff --git a/alea/submitter.py b/alea/submitter.py index 6580fc56..8b68f4bb 100644 --- a/alea/submitter.py +++ b/alea/submitter.py @@ -96,6 +96,15 @@ def __init__( "and appended alea/bin or .local/bin(pip install direction) to your $PATH." ) + try: + if type(eval(statistical_model)) == list: + self.model_type = "combined" + statistical_model = eval(statistical_model) + else: + self.model_type = "single" + except NameError: + self.model_type = "single" + self.statistical_model = statistical_model self.statistical_model_config = statistical_model_config self.poi = poi @@ -244,7 +253,7 @@ def str_to_arg(value: str, annotation): def merged_arguments_generator(self): """Generate the merged arguments for Runner from to_zip, to_vary and in_common.""" - _, default_args, _ = Runner.runner_arguments() + _, default_args, _ = Runner.runner_arguments(self.model_type) to_zip = self.computation_dict.get("to_zip", {}) to_vary = self.computation_dict.get("to_vary", {}) @@ -296,7 +305,7 @@ def merged_arguments_generator(self): # update template_path and limit_threshold in statistical_model_args if needed self.update_statistical_model_args(runner_args, self.template_path) # check if all arguments are supported - self.check_redunant_arguments(runner_args, self.allowed_special_args) + self.check_redunant_arguments(runner_args, self.model_type, self.allowed_special_args) yield runner_args @@ -333,7 +342,7 @@ def computation_tickets_generator(self): """ - _, _, annotations = Runner.runner_arguments() + _, _, annotations = Runner.runner_arguments(self.model_type) for runner_args in self.merged_arguments_generator(): for i_batch in range(runner_args.get("n_batch", 1)): @@ -529,8 +538,18 @@ def update_statistical_model_args( ) @staticmethod - def check_redunant_arguments(runner_args, allowed_special_args: List[str] = []): - signatures = inspect.signature(Runner.__init__) + def check_redunant_arguments(runner_args, model_type, allowed_special_args: List[str] = []): + """ + args: + runner_args + model_type: str either single or combined decides which runner init to check-- the initialiser changes + """ + if model_type == "single": + signatures = inspect.signature(Runner.single_init) + elif model_type == "combined": + signatures = inspect.signature(Runner.multiple_init) + else: + raise ValueError("model_type must be one of 'single' and 'multiple'") args = list(signatures.parameters.keys())[1:] + ["n_batch"] + allowed_special_args intended_args = set(runner_args.keys()) allowed_args = set(args) @@ -564,7 +583,17 @@ def runner_kwargs_from_script(sys_argv: Optional[List[str]] = None): the arguments of Runner.__init__. """ - signatures = inspect.signature(Runner.__init__) + if sys_argv is None: + signatures = inspect.signature(Runner.single_init) + elif ("--statistical_model" in sys_argv) and ("--statistical_models" in sys_argv): + raise ValueError("you must provide either statistical_model or statistical_models") + elif "--statistical_model" in sys_argv: + signatures = inspect.signature(Runner.single_init) + elif "--statistical_models" in sys_argv: + signatures = inspect.signature(Runner.multiple_init) + else: + raise ValueError("you must provide either statistical_model or statistical_models") + args = list(signatures.parameters.keys())[1:] parser = ArgumentParser(description="Command line running of alea-run_toymc") diff --git a/alea/submitters/local.py b/alea/submitters/local.py index b72e5184..d2e59c2f 100644 --- a/alea/submitters/local.py +++ b/alea/submitters/local.py @@ -36,7 +36,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @staticmethod - def initialized_runner(script: str, pop_limit_threshold: bool = False): + def initialize_runner(script: str, pop_limit_threshold: bool = False): """Initialize a Runner from a script. Args: @@ -49,7 +49,13 @@ def initialized_runner(script: str, pop_limit_threshold: bool = False): kwargs = Submitter.runner_kwargs_from_script(shlex.split(script)[2:]) if pop_limit_threshold: kwargs["statistical_model_args"].pop("limit_threshold", None) - runner = Runner(**kwargs) + if "statistical_model" in kwargs and "statistical_models" in kwargs: + raise ValueError( + "you must provide either statistical_model or statistical_models not both" + ) + statistical_model = kwargs.pop("statistical_model", None) + statistical_model = kwargs.pop("statistical_models", statistical_model) + runner = Runner(statistical_model, **kwargs) return runner def submit(self): @@ -61,7 +67,7 @@ def submit(self): for _, (script, _) in enumerate(self.combined_tickets_generator()): if self.debug: print(script) - runner = self.initialized_runner(script) + runner = self.initialize_runner(script) # print all parameters print("\n\n" + f"{' PARAMETERS ':#^80}") print(runner.model.parameters) diff --git a/bin/alea-run_toymc b/bin/alea-run_toymc index 25416eb9..1ef4c9c9 100755 --- a/bin/alea-run_toymc +++ b/bin/alea-run_toymc @@ -5,7 +5,8 @@ from alea.submitter import Submitter def main(): kwargs = Submitter.runner_kwargs_from_script() - runner = Runner(**kwargs) + statistical_model = kwargs.pop("statistical_model") + runner = Runner(statistical_model, **kwargs) runner.run() diff --git a/tests/test_runner.py b/tests/test_runner.py index 5570e46c..e4f70224 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -30,7 +30,7 @@ def setUp(cls): def set_gaussian_runner(self, toydata_mode="generate_and_store"): """Set a new runner instance with GaussianModel.""" self.runner = Runner( - statistical_model="alea.examples.gaussian_model.GaussianModel", + "alea.examples.gaussian_model.GaussianModel", poi="mu", hypotheses=["free", "zero", "true"], n_mc=self.n_mc, @@ -47,7 +47,7 @@ def set_blueice_runner(self, toydata_mode="generate_and_store"): """Set a new runner instance with BlueiceExtendedModel.""" parameter_zvc = self.running_config["computation_options"]["discovery_power"] self.runner = Runner( - statistical_model=self.running_config["statistical_model"], + self.running_config["statistical_model"], poi=self.running_config["poi"], hypotheses=parameter_zvc["in_common"]["hypotheses"], n_mc=self.n_mc, @@ -92,7 +92,7 @@ def test_init_signatures(self): kwonlyargs, kwonlydefaults, annotations, - ) = inspect.getfullargspec(Runner.__init__) + ) = inspect.getfullargspec(Runner.single_init) if (len(annotations) != len(args[1:])) or (len(defaults) != len(args[1:])): raise ValueError( "The number of annotations and defaults of Runner.__init__ must be the same!"