From 0e41484520977c871d883b0663627ec2de19591a Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Thu, 5 Jan 2023 12:09:06 -0600 Subject: [PATCH 01/31] Add support for sigma=0 in normal distributions --- ConfigSpace/hyperparameters.pyx | 117 +++++++++++++++++--------------- 1 file changed, 63 insertions(+), 54 deletions(-) diff --git a/ConfigSpace/hyperparameters.pyx b/ConfigSpace/hyperparameters.pyx index 59093120..e5b3d5f5 100644 --- a/ConfigSpace/hyperparameters.pyx +++ b/ConfigSpace/hyperparameters.pyx @@ -141,7 +141,7 @@ cdef class Hyperparameter(object): def has_neighbors(self): raise NotImplementedError() - def get_neighbors(self, value, rs, number, transform = False): + def get_neighbors(self, value, rs, number, transform=False): raise NotImplementedError() def get_num_neighbors(self, value): @@ -297,7 +297,7 @@ cdef class Constant(Hyperparameter): def has_neighbors(self) -> bool: return False - def get_num_neighbors(self, value = None) -> int: + def get_num_neighbors(self, value=None) -> int: return 0 def get_neighbors(self, value: Any, rs: np.random.RandomState, number: int, @@ -368,7 +368,7 @@ cdef class NumericalHyperparameter(Hyperparameter): def has_neighbors(self) -> bool: return True - def get_num_neighbors(self, value = None) -> float: + def get_num_neighbors(self, value=None) -> float: return np.inf @@ -522,7 +522,6 @@ cdef class FloatHyperparameter(NumericalHyperparameter): raise NotImplementedError() - cdef class IntegerHyperparameter(NumericalHyperparameter): def __init__(self, name: str, default_value: int, meta: Optional[Dict] = None) -> None: super(IntegerHyperparameter, self).__init__(name, default_value, meta) @@ -538,7 +537,7 @@ cdef class IntegerHyperparameter(NumericalHyperparameter): def check_int(self, parameter: int, name: str) -> int: if abs(int(parameter) - parameter) > 0.00000001 and \ - type(parameter) is not int: + type(parameter) is not int: raise ValueError("For the Integer parameter %s, the value must be " "an Integer, too. Right now it is a %s with value" " %s." % (name, type(parameter), str(parameter))) @@ -895,7 +894,8 @@ cdef class NormalFloatHyperparameter(FloatHyperparameter): self.normalized_default_value = self._inverse_transform(self.default_value) if (lower is not None) ^ (upper is not None): - raise ValueError("Only one bound was provided when both lower and upper bounds must be provided.") + raise ValueError( + "Only one bound was provided when both lower and upper bounds must be provided.") if lower is not None and upper is not None: self.lower = float(lower) @@ -903,12 +903,12 @@ cdef class NormalFloatHyperparameter(FloatHyperparameter): if self.lower >= self.upper: raise ValueError("Upper bound %f must be larger than lower bound " - "%f for hyperparameter %s" % - (self.upper, self.lower, name)) + "%f for hyperparameter %s" % + (self.upper, self.lower, name)) elif log and self.lower <= 0: raise ValueError("Negative lower bound (%f) for log-scale " - "hyperparameter %s is forbidden." % - (self.lower, name)) + "hyperparameter %s is forbidden." % + (self.lower, name)) self.default_value = self.check_default(default_value) @@ -942,9 +942,11 @@ cdef class NormalFloatHyperparameter(FloatHyperparameter): repr_str = io.StringIO() if self.lower is None or self.upper is None: - repr_str.write("%s, Type: NormalFloat, Mu: %s Sigma: %s, Default: %s" % (self.name, repr(self.mu), repr(self.sigma), repr(self.default_value))) + repr_str.write("%s, Type: NormalFloat, Mu: %s Sigma: %s, Default: %s" % + (self.name, repr(self.mu), repr(self.sigma), repr(self.default_value))) else: - repr_str.write("%s, Type: NormalFloat, Mu: %s Sigma: %s, Range: [%s, %s], Default: %s" % (self.name, repr(self.mu), repr(self.sigma), repr(self.lower), repr(self.upper), repr(self.default_value))) + repr_str.write("%s, Type: NormalFloat, Mu: %s Sigma: %s, Range: [%s, %s], Default: %s" % ( + self.name, repr(self.mu), repr(self.sigma), repr(self.lower), repr(self.upper), repr(self.default_value))) if self.log: repr_str.write(", on log-scale") @@ -1031,8 +1033,8 @@ cdef class NormalFloatHyperparameter(FloatHyperparameter): lower = None upper = None else: - lower=np.ceil(self.lower) - upper=np.floor(self.upper) + lower = np.ceil(self.lower) + upper = np.floor(self.upper) return NormalIntegerHyperparameter(self.name, int(np.rint(self.mu)), self.sigma, lower=lower, upper=upper, @@ -1048,13 +1050,14 @@ cdef class NormalFloatHyperparameter(FloatHyperparameter): def _sample(self, rs: np.random.RandomState, size: Optional[int] = None ) -> Union[np.ndarray, float]: - if self.lower == None: + sigma = self.sigma + if sigma == 0: + return self.mu + elif self.lower == None: mu = self.mu - sigma = self.sigma return rs.normal(mu, sigma, size=size) else: mu = self.mu - sigma = self.sigma lower = self._lower upper = self._upper a = (lower - mu) / sigma @@ -1095,7 +1098,7 @@ cdef class NormalFloatHyperparameter(FloatHyperparameter): new_value = rs.normal(value, self.sigma) if self.lower is not None and self.upper is not None: - new_value = min(max(new_value, self.lower), self.upper) + new_value = min(max(new_value, self.lower), self.upper) neighbors.append(new_value) return neighbors @@ -1129,7 +1132,9 @@ cdef class NormalFloatHyperparameter(FloatHyperparameter): """ mu = self.mu sigma = self.sigma - if self.lower == None: + if sigma == 0: + return np.float64(vector == mu) + elif self.lower == None: return norm(loc=mu, scale=sigma).pdf(vector) else: mu = self.mu @@ -1223,7 +1228,8 @@ cdef class BetaFloatHyperparameter(UniformFloatHyperparameter): def __repr__(self) -> str: repr_str = io.StringIO() - repr_str.write("%s, Type: BetaFloat, Alpha: %s Beta: %s, Range: [%s, %s], Default: %s" % (self.name, repr(self.alpha), repr(self.beta), repr(self.lower), repr(self.upper), repr(self.default_value))) + repr_str.write("%s, Type: BetaFloat, Alpha: %s Beta: %s, Range: [%s, %s], Default: %s" % ( + self.name, repr(self.alpha), repr(self.beta), repr(self.lower), repr(self.upper), repr(self.default_value))) if self.log: repr_str.write(", on log-scale") @@ -1314,15 +1320,14 @@ cdef class BetaFloatHyperparameter(UniformFloatHyperparameter): upper = int(np.floor(self.upper)) default_value = int(np.rint(self.default_value)) return BetaIntegerHyperparameter(self.name, lower=lower, upper=upper, alpha=self.alpha, beta=self.beta, - default_value=int(np.rint(self.default_value)), - q=q_int, log=self.log) + default_value=int(np.rint(self.default_value)), + q=q_int, log=self.log) def is_legal(self, value: Union[float]) -> bool: if isinstance(value, (float, int)): return self.upper >= value >= self.lower return False - cpdef bint is_legal_vector(self, DTYPE_t value): return self._upper >= value >= self._lower @@ -1356,7 +1361,7 @@ cdef class BetaFloatHyperparameter(UniformFloatHyperparameter): alpha = self.alpha beta = self.beta return spbeta(alpha, beta, loc=lb, scale=ub-lb).pdf(vector) \ - * (ub-lb) / (self._upper - self._lower) + * (ub-lb) / (self._upper - self._lower) def get_max_density(self) -> float: if (self.alpha > 1) or (self.beta > 1): @@ -1537,7 +1542,7 @@ cdef class UniformIntegerHyperparameter(IntegerHyperparameter): else: return False - def get_num_neighbors(self, value = None) -> int: + def get_num_neighbors(self, value=None) -> int: return self.upper - self.lower def get_neighbors( @@ -1663,8 +1668,7 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): cdef public nfhp cdef normalization_constant - - def __init__(self, name: str, mu: int, sigma: Union[int, float], + def __init__(self, name: str, mu: Union[int, float], sigma: Union[int, float], default_value: Union[int, None] = None, q: Union[None, int] = None, log: bool = False, lower: Optional[int] = None, @@ -1710,6 +1714,8 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): self.mu = mu self.sigma = sigma + if self.sigma == 0: + assert int(self.mu) == self.mu if default_value is not None: default_value = self.check_int(default_value, self.name) @@ -1727,19 +1733,20 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): self.log = bool(log) if (lower is not None) ^ (upper is not None): - raise ValueError("Only one bound was provided when both lower and upper bounds must be provided.") + raise ValueError( + "Only one bound was provided when both lower and upper bounds must be provided.") if lower is not None and upper is not None: self.upper = self.check_int(upper, "upper") self.lower = self.check_int(lower, "lower") if self.lower >= self.upper: raise ValueError("Upper bound %d must be larger than lower bound " - "%d for hyperparameter %s" % - (self.lower, self.upper, name)) + "%d for hyperparameter %s" % + (self.lower, self.upper, name)) elif log and self.lower <= 0: raise ValueError("Negative lower bound (%d) for log-scale " - "hyperparameter %s is forbidden." % - (self.lower, name)) + "hyperparameter %s is forbidden." % + (self.lower, name)) self.lower = lower self.upper = upper @@ -1765,9 +1772,11 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): repr_str = io.StringIO() if self.lower is None or self.upper is None: - repr_str.write("%s, Type: NormalInteger, Mu: %s Sigma: %s, Default: %s" % (self.name, repr(self.mu), repr(self.sigma), repr(self.default_value))) + repr_str.write("%s, Type: NormalInteger, Mu: %s Sigma: %s, Default: %s" % + (self.name, repr(self.mu), repr(self.sigma), repr(self.default_value))) else: - repr_str.write("%s, Type: NormalInteger, Mu: %s Sigma: %s, Range: [%s, %s], Default: %s" % (self.name, repr(self.mu), repr(self.sigma), repr(self.lower), repr(self.upper), repr(self.default_value))) + repr_str.write("%s, Type: NormalInteger, Mu: %s Sigma: %s, Range: [%s, %s], Default: %s" % ( + self.name, repr(self.mu), repr(self.sigma), repr(self.lower), repr(self.upper), repr(self.default_value))) if self.log: repr_str.write(", on log-scale") @@ -1844,7 +1853,7 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): if self.log: return self._transform_scalar(self.mu) else: - return self.mu + return int(np.round(self.mu)) elif self.is_legal(default_value): return default_value @@ -1911,7 +1920,7 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): def _compute_normalization(self): if self.lower is None: warnings.warn('Cannot normalize the pdf exactly for a NormalIntegerHyperparameter' - f' {self.name} without bounds. Skipping normalization for that hyperparameter.') + f' {self.name} without bounds. Skipping normalization for that hyperparameter.') return 1 else: @@ -1964,7 +1973,6 @@ cdef class BetaIntegerHyperparameter(UniformIntegerHyperparameter): cdef public bfhp cdef normalization_constant - def __init__(self, name: str, alpha: Union[int, float], beta: Union[int, float], lower: Union[int, float], upper: Union[int, float], @@ -2022,13 +2030,13 @@ cdef class BetaIntegerHyperparameter(UniformIntegerHyperparameter): else: q = self.q self.bfhp = BetaFloatHyperparameter(self.name, - self.alpha, - self.beta, - log=self.log, - q=q, - lower=self.lower, - upper=self.upper, - default_value=self.default_value) + self.alpha, + self.beta, + log=self.log, + q=q, + lower=self.lower, + upper=self.upper, + default_value=self.default_value) self.default_value = self.check_default(default_value) self.normalized_default_value = self._inverse_transform(self.default_value) @@ -2036,7 +2044,8 @@ cdef class BetaIntegerHyperparameter(UniformIntegerHyperparameter): def __repr__(self) -> str: repr_str = io.StringIO() - repr_str.write("%s, Type: BetaInteger, Alpha: %s Beta: %s, Range: [%s, %s], Default: %s" % (self.name, repr(self.alpha), repr(self.beta), repr(self.lower), repr(self.upper), repr(self.default_value))) + repr_str.write("%s, Type: BetaInteger, Alpha: %s Beta: %s, Range: [%s, %s], Default: %s" % ( + self.name, repr(self.alpha), repr(self.beta), repr(self.lower), repr(self.upper), repr(self.default_value))) if self.log: repr_str.write(", on log-scale") @@ -2094,7 +2103,6 @@ cdef class BetaIntegerHyperparameter(UniformIntegerHyperparameter): default_value=self.default_value, q=self.q, log=self.log, meta=self.meta) - def check_default(self, default_value: Union[int, float, None]) -> int: if default_value is None: # Here, we just let the BetaFloat take care of the default value @@ -2294,7 +2302,7 @@ cdef class CategoricalHyperparameter(Hyperparameter): ordered_probabilities_other is None and len(np.unique(list(ordered_probabilities_self.values()))) == 1 ) - ) + ) ) def __hash__(self): @@ -2419,7 +2427,7 @@ cdef class CategoricalHyperparameter(Hyperparameter): def has_neighbors(self) -> bool: return len(self.choices) > 1 - def get_num_neighbors(self, value = None) -> int: + def get_num_neighbors(self, value=None) -> int: return len(self.choices) - 1 def get_neighbors(self, value: int, rs: np.random.RandomState, @@ -2629,11 +2637,11 @@ cdef class OrdinalHyperparameter(Hyperparameter): def __copy__(self): return OrdinalHyperparameter( - name=self.name, - sequence=copy.deepcopy(self.sequence), - default_value=self.default_value, - meta=self.meta - ) + name=self.name, + sequence=copy.deepcopy(self.sequence), + default_value=self.default_value, + meta=self.meta + ) cpdef int compare(self, value: Union[int, float, str], value2: Union[int, float, str]): if self.value_dict[value] < self.value_dict[value2]: @@ -2853,7 +2861,8 @@ cdef class OrdinalHyperparameter(Hyperparameter): Probability density values of the input vector """ if not np.all(np.isin(vector, self.sequence)): - raise ValueError(f'Some element in the vector {vector} is not in the sequence {self.sequence}.') + raise ValueError( + f'Some element in the vector {vector} is not in the sequence {self.sequence}.') return np.ones_like(vector, dtype=np.float64) / self.num_elements def get_max_density(self) -> float: From e07f87b3cf92594ee47e7295232b2ce2b01f4205 Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Mon, 9 Jan 2023 13:28:02 -0600 Subject: [PATCH 02/31] Update docs to clarify float now allowed --- ConfigSpace/hyperparameters.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ConfigSpace/hyperparameters.pyx b/ConfigSpace/hyperparameters.pyx index e5b3d5f5..fbdc16b8 100644 --- a/ConfigSpace/hyperparameters.pyx +++ b/ConfigSpace/hyperparameters.pyx @@ -1689,7 +1689,7 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): ---------- name : str Name of the hyperparameter with which it can be accessed - mu : int + mu : int, float Mean of the distribution, from which hyperparameter is sampled sigma : int, float Standard deviation of the distribution, from which From d6a172a97f6bbcca66ceb7e137e7cb43afb23b51 Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Mon, 9 Jan 2023 14:12:42 -0600 Subject: [PATCH 03/31] Base normalized_default_value on the original unrounded default value and round the result to the nearest integer --- ConfigSpace/hyperparameters.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ConfigSpace/hyperparameters.pyx b/ConfigSpace/hyperparameters.pyx index fbdc16b8..830627e8 100644 --- a/ConfigSpace/hyperparameters.pyx +++ b/ConfigSpace/hyperparameters.pyx @@ -1760,7 +1760,7 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): default_value=default_value) self.default_value = self.check_default(default_value) - self.normalized_default_value = self._inverse_transform(self.default_value) + self.normalized_default_value = int(np.round(self._inverse_transform(default_value))) if (self.lower is None) or (self.upper is None): # Since a bound is missing, the pdf cannot be normalized. Working with the unnormalized variant) From c89bc7af56055aff980b36c0f060dc3c369b955c Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Mon, 9 Jan 2023 14:13:20 -0600 Subject: [PATCH 04/31] Update NormalIntegerHyperparameter tests --- test/test_hyperparameters.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_hyperparameters.py b/test/test_hyperparameters.py index ca8186f9..32ea6df5 100644 --- a/test/test_hyperparameters.py +++ b/test/test_hyperparameters.py @@ -1193,8 +1193,9 @@ def test_normalint(self): f1 = NormalIntegerHyperparameter("param", 0.5, 5.5) f1_ = NormalIntegerHyperparameter("param", 0.5, 5.5) self.assertEqual(f1, f1_) + default = np.int32(np.round(0.5)) self.assertEqual( - "param, Type: NormalInteger, Mu: 0.5 Sigma: 5.5, Default: 0.5", + f"param, Type: NormalInteger, Mu: 0.5 Sigma: 5.5, Default: {default}", str(f1)) # Test attributes are accessible @@ -1203,8 +1204,8 @@ def test_normalint(self): self.assertEqual(f1.sigma, 5.5) self.assertEqual(f1.q, None) self.assertEqual(f1.log, False) - self.assertAlmostEqual(f1.default_value, 0.5) - self.assertAlmostEqual(f1.normalized_default_value, 0.5) + self.assertAlmostEqual(f1.default_value, default) + self.assertAlmostEqual(f1.normalized_default_value, 0.0) with pytest.warns(UserWarning, match="Setting quantization < 1 for Integer " "Hyperparameter 'param' has no effect"): From 2ab524f89a54b071aad9d2cb64dc18562c799340 Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Mon, 9 Jan 2023 15:05:17 -0600 Subject: [PATCH 05/31] Don't convert normalized_default_value to int' --- ConfigSpace/hyperparameters.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ConfigSpace/hyperparameters.pyx b/ConfigSpace/hyperparameters.pyx index 830627e8..fbdc16b8 100644 --- a/ConfigSpace/hyperparameters.pyx +++ b/ConfigSpace/hyperparameters.pyx @@ -1760,7 +1760,7 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): default_value=default_value) self.default_value = self.check_default(default_value) - self.normalized_default_value = int(np.round(self._inverse_transform(default_value))) + self.normalized_default_value = self._inverse_transform(self.default_value) if (self.lower is None) or (self.upper is None): # Since a bound is missing, the pdf cannot be normalized. Working with the unnormalized variant) From df5640ea03834f5cc827b9b61b01832e904c1e6f Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Mon, 7 Nov 2022 20:25:10 -0600 Subject: [PATCH 06/31] Add ForbiddenCallableRelation --- ConfigSpace/forbidden.pyx | 62 ++++++++++++++++++++++++++++++++++++++- test/test_forbidden.py | 7 +++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index ec1e75f5..d69f1a46 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -33,13 +33,14 @@ import numpy as np import io from ConfigSpace.hyperparameters import Hyperparameter from ConfigSpace.hyperparameters cimport Hyperparameter -from typing import List, Dict, Any, Union +from typing import List, Dict, Any, Union, Callable from ConfigSpace.forbidden cimport AbstractForbiddenComponent from libc.stdlib cimport malloc, free cimport numpy as np +ForbiddenCallable = Callable[[Hyperparameter, Hyperparameter], bool] cdef class AbstractForbiddenComponent(object): @@ -571,6 +572,65 @@ cdef class ForbiddenRelation(AbstractForbiddenComponent): pass +cdef class ForbiddenCallableRelation(ForbiddenRelation): + """A ForbiddenCallable relation between two hyperparameters. + + The ForbiddenCallable uses two hyperparameters as input to a + specified callable, which returns True if the relationship + between the two hyperparameters is forbidden. + + >>> from ConfigSpace import ConfigurationSpace, ForbiddenLessThanRelation + >>> + >>> cs = ConfigurationSpace({"a": [1, 2, 3], "b": [2, 5, 6]}) + >>> + >>> forbidden_clause = ForbiddenFunctionRelation(cs['a'], cs['b']) + >>> cs.add_forbidden_clause(forbidden_clause) + Forbidden: f(a,b) == True + + Parameters + ---------- + left : :ref:`Hyperparameters` + first argument of lambda expression + + right : :ref:`Hyperparameters` + second argument of lambda expression + + f : A callable that relates the two hyperparameters + """ + cdef public f + + def __init__(self, left: Hyperparameter, right : Hyperparameter, + f: ForbiddenCallable): + if not isinstance(f, Callable): # Can't use ForbiddenCallable here apparently + raise TypeError("Argument 'f' is not of type %s." % Callable) + + super().__init__(left, right) + self.f = f + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, self.__class__): + return False + return super().__eq__(other) and self.f == other.f + + def __copy__(self): + return self.__class__( + a=copy.copy(self.left), + b=copy.copy(self.right), + f=copy.copy(self.f) + ) + + def __repr__(self): + from dill.source import getsource + f_source = getsource(self.f) + return f"Forbidden:\n{f_source}\nArguments: {self.left.name}, {self.right.name}" + + cdef int _is_forbidden(self, left, right) except -1: + return self.f(left, right) + + cdef int _is_forbidden_vector(self, DTYPE_t left, DTYPE_t right) except -1: + return self.f(left, right) + + cdef class ForbiddenLessThanRelation(ForbiddenRelation): """A ForbiddenLessThan relation between two hyperparameters. diff --git a/test/test_forbidden.py b/test/test_forbidden.py index 826fc646..f01c4d95 100644 --- a/test/test_forbidden.py +++ b/test/test_forbidden.py @@ -43,6 +43,7 @@ ForbiddenEqualsRelation, ForbiddenLessThanRelation, ForbiddenGreaterThanRelation, + ForbiddenCallableRelation, ) from ConfigSpace import OrdinalHyperparameter @@ -297,3 +298,9 @@ def test_relation(self): {'water_temperature': 'hot', 'water_temperature2': 'cold'}, True) ) + + forb = ForbiddenCallableRelation(hp1, hp2, lambda x, y: len(x) <= len(y)) + self.assertFalse(forb.is_forbidden( + {'water_temperature': 'boiling', 'water_temperature2': 'cold'}, + True) + ) From 132c9c2204e58515c846df3923b1c7f5de8c4934 Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Mon, 7 Nov 2022 20:30:47 -0600 Subject: [PATCH 07/31] Flake8 fixes --- ConfigSpace/forbidden.pyx | 43 ++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index d69f1a46..cc7a49fe 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -12,8 +12,9 @@ # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the nor the -# names of itConfigurationSpaces contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. +# names of itConfigurationSpaces contributors may be used to endorse or +# promote products derived from this software without specific prior +# written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED @@ -33,7 +34,7 @@ import numpy as np import io from ConfigSpace.hyperparameters import Hyperparameter from ConfigSpace.hyperparameters cimport Hyperparameter -from typing import List, Dict, Any, Union, Callable +from typing import Dict, Any, Union, Callable from ConfigSpace.forbidden cimport AbstractForbiddenComponent @@ -93,7 +94,8 @@ cdef class AbstractForbiddenComponent(object): def is_forbidden_vector(self, instantiated_hyperparameters, strict): return bool(self.c_is_forbidden_vector(instantiated_hyperparameters, strict)) - cdef int c_is_forbidden_vector(self, np.ndarray instantiated_hyperparameters, int strict): + cdef int c_is_forbidden_vector(self, np.ndarray instantiated_hyperparameters, + int strict): pass @@ -346,7 +348,6 @@ cdef class AbstractForbiddenConjunction(AbstractForbiddenComponent): return all([self.components[i] == other.components[i] for i in range(self.n_components)]) - cpdef set_vector_idx(self, hyperparameter_to_idx): for component in self.components: component.set_vector_idx(hyperparameter_to_idx) @@ -374,8 +375,8 @@ cdef class AbstractForbiddenConjunction(AbstractForbiddenComponent): else: return False - cdef int* arrptr - arrptr = malloc(sizeof(int) * self.n_components) + cdef int * arrptr + arrptr = malloc(sizeof(int) * self.n_components) # Finally, call is_forbidden for all direct descendents and combine the # outcomes @@ -395,8 +396,8 @@ cdef class AbstractForbiddenConjunction(AbstractForbiddenComponent): cdef int rval cdef AbstractForbiddenComponent component - cdef int* arrptr - arrptr = malloc(sizeof(int) * self.n_components) + cdef int * arrptr + arrptr = malloc(sizeof(int) * self.n_components) # Finally, call is_forbidden for all direct descendents and combine the # outcomes. Check only as many forbidden clauses as the actual @@ -412,7 +413,7 @@ cdef class AbstractForbiddenConjunction(AbstractForbiddenComponent): free(arrptr) return rval - cdef int _is_forbidden(self, int I, int* evaluations): + cdef int _is_forbidden(self, int I, int * evaluations): pass @@ -434,7 +435,8 @@ cdef class ForbiddenAndConjunction(AbstractForbiddenConjunction): >>> forbidden_clause_a = ForbiddenEqualsClause(cs["a"], 2) >>> forbidden_clause_b = ForbiddenInClause(cs["b"], [2]) >>> - >>> forbidden_clause = ForbiddenAndConjunction(forbidden_clause_a, forbidden_clause_b) + >>> forbidden_clause = ForbiddenAndConjunction( + ... forbidden_clause_a, forbidden_clause_b) >>> >>> cs.add_forbidden_clause(forbidden_clause) (Forbidden: a == 2 && Forbidden: b in {2}) @@ -455,7 +457,7 @@ cdef class ForbiddenAndConjunction(AbstractForbiddenConjunction): retval.write(")") return retval.getvalue() - cdef int _is_forbidden(self, int I, int* evaluations): + cdef int _is_forbidden(self, int I, int * evaluations): # Return False if one of the components evaluates to False for i in range(I): @@ -491,7 +493,7 @@ cdef class ForbiddenRelation(AbstractForbiddenComponent): cdef public right cdef public int[2] vector_ids - def __init__(self, left: Hyperparameter, right : Hyperparameter): + def __init__(self, left: Hyperparameter, right: Hyperparameter): if not isinstance(left, Hyperparameter): raise TypeError("Argument 'left' is not of type %s." % Hyperparameter) if not isinstance(right, Hyperparameter): @@ -516,7 +518,8 @@ cdef class ForbiddenRelation(AbstractForbiddenComponent): return (self,) cpdef set_vector_idx(self, hyperparameter_to_idx): - self.vector_ids = (hyperparameter_to_idx[self.left.name], hyperparameter_to_idx[self.right.name]) + self.vector_ids = (hyperparameter_to_idx[self.left.name], + hyperparameter_to_idx[self.right.name]) cpdef is_forbidden(self, instantiated_hyperparameters, strict): left = instantiated_hyperparameters.get(self.left.name) @@ -565,8 +568,10 @@ cdef class ForbiddenRelation(AbstractForbiddenComponent): else: return False - # Relation is always evaluated against actual value and not vector representation - return self._is_forbidden(self.left._transform(left), self.right._transform(right)) + # Relation is always evaluated against actual value and + # not vector representation + return self._is_forbidden(self.left._transform(left), + self.right._transform(right)) cdef int _is_forbidden_vector(self, DTYPE_t left, DTYPE_t right) except -1: pass @@ -599,9 +604,9 @@ cdef class ForbiddenCallableRelation(ForbiddenRelation): """ cdef public f - def __init__(self, left: Hyperparameter, right : Hyperparameter, - f: ForbiddenCallable): - if not isinstance(f, Callable): # Can't use ForbiddenCallable here apparently + def __init__(self, left: Hyperparameter, right: Hyperparameter, + f: ForbiddenCallable): + if not isinstance(f, Callable): # Can't use ForbiddenCallable here apparently raise TypeError("Argument 'f' is not of type %s." % Callable) super().__init__(left, right) From 8d90a9bee81afcbce2ea47339b837bdf1317eb7f Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Mon, 7 Nov 2022 20:36:16 -0600 Subject: [PATCH 08/31] Remove unnecessary spaces added by autopep8 --- ConfigSpace/forbidden.pyx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index cc7a49fe..1230e3bf 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -375,8 +375,8 @@ cdef class AbstractForbiddenConjunction(AbstractForbiddenComponent): else: return False - cdef int * arrptr - arrptr = malloc(sizeof(int) * self.n_components) + cdef int* arrptr + arrptr = malloc(sizeof(int)* self.n_components) # Finally, call is_forbidden for all direct descendents and combine the # outcomes @@ -396,8 +396,8 @@ cdef class AbstractForbiddenConjunction(AbstractForbiddenComponent): cdef int rval cdef AbstractForbiddenComponent component - cdef int * arrptr - arrptr = malloc(sizeof(int) * self.n_components) + cdef int* arrptr + arrptr = malloc(sizeof(int)* self.n_components) # Finally, call is_forbidden for all direct descendents and combine the # outcomes. Check only as many forbidden clauses as the actual @@ -413,7 +413,7 @@ cdef class AbstractForbiddenConjunction(AbstractForbiddenComponent): free(arrptr) return rval - cdef int _is_forbidden(self, int I, int * evaluations): + cdef int _is_forbidden(self, int I, int* evaluations): pass @@ -457,7 +457,7 @@ cdef class ForbiddenAndConjunction(AbstractForbiddenConjunction): retval.write(")") return retval.getvalue() - cdef int _is_forbidden(self, int I, int * evaluations): + cdef int _is_forbidden(self, int I, int* evaluations): # Return False if one of the components evaluates to False for i in range(I): From 71650c47c151d7ff47b7eec7e1ef29097e72e765 Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Mon, 7 Nov 2022 20:38:38 -0600 Subject: [PATCH 09/31] Remove more autopep8 spaces --- ConfigSpace/forbidden.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index 1230e3bf..02b8c540 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -376,7 +376,7 @@ cdef class AbstractForbiddenConjunction(AbstractForbiddenComponent): return False cdef int* arrptr - arrptr = malloc(sizeof(int)* self.n_components) + arrptr = malloc(sizeof(int)* self.n_components) # Finally, call is_forbidden for all direct descendents and combine the # outcomes @@ -397,7 +397,7 @@ cdef class AbstractForbiddenConjunction(AbstractForbiddenComponent): cdef AbstractForbiddenComponent component cdef int* arrptr - arrptr = malloc(sizeof(int)* self.n_components) + arrptr = malloc(sizeof(int)* self.n_components) # Finally, call is_forbidden for all direct descendents and combine the # outcomes. Check only as many forbidden clauses as the actual From 1178b1ef36bdc8e70e26f5be000995111dd6bb5f Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Mon, 14 Nov 2022 17:36:25 -0600 Subject: [PATCH 10/31] Add ForbiddenCallableRelation to __init__.py --- ConfigSpace/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ConfigSpace/__init__.py b/ConfigSpace/__init__.py index 525b50ba..6e845e89 100644 --- a/ConfigSpace/__init__.py +++ b/ConfigSpace/__init__.py @@ -43,7 +43,8 @@ ForbiddenEqualsRelation, ForbiddenGreaterThanRelation, ForbiddenInClause, - ForbiddenLessThanRelation) + ForbiddenLessThanRelation, + ForbiddenCallableRelation) from ConfigSpace.hyperparameters import (BetaFloatHyperparameter, BetaIntegerHyperparameter, CategoricalHyperparameter, Constant, From dfd6d31df622c9f51a429c39f5303be10d57f183 Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Mon, 14 Nov 2022 19:49:37 -0600 Subject: [PATCH 11/31] Remove newline characters from ForbiddenCallableRelation repr --- ConfigSpace/forbidden.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index 02b8c540..1140279b 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -627,7 +627,7 @@ cdef class ForbiddenCallableRelation(ForbiddenRelation): def __repr__(self): from dill.source import getsource f_source = getsource(self.f) - return f"Forbidden:\n{f_source}\nArguments: {self.left.name}, {self.right.name}" + return f"Forbidden: {f_source} | Arguments: {self.left.name}, {self.right.name}" cdef int _is_forbidden(self, left, right) except -1: return self.f(left, right) From 78360ab372cda87029dc9a96afb5c17c52493400 Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Tue, 15 Nov 2022 01:25:25 -0600 Subject: [PATCH 12/31] Keep spaces after some asterisks --- ConfigSpace/forbidden.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index 1140279b..cfb0914c 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -376,7 +376,7 @@ cdef class AbstractForbiddenConjunction(AbstractForbiddenComponent): return False cdef int* arrptr - arrptr = malloc(sizeof(int)* self.n_components) + arrptr = malloc(sizeof(int) * self.n_components) # Finally, call is_forbidden for all direct descendents and combine the # outcomes @@ -397,7 +397,7 @@ cdef class AbstractForbiddenConjunction(AbstractForbiddenComponent): cdef AbstractForbiddenComponent component cdef int* arrptr - arrptr = malloc(sizeof(int)* self.n_components) + arrptr = malloc(sizeof(int) * self.n_components) # Finally, call is_forbidden for all direct descendents and combine the # outcomes. Check only as many forbidden clauses as the actual From 2fb245f265bab459ebf9f24dace9fce5d0ff890f Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Tue, 15 Nov 2022 01:40:52 -0600 Subject: [PATCH 13/31] Use inspect instead of dill --- ConfigSpace/forbidden.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index cfb0914c..d29caeb9 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -625,7 +625,7 @@ cdef class ForbiddenCallableRelation(ForbiddenRelation): ) def __repr__(self): - from dill.source import getsource + from inspect import getsource f_source = getsource(self.f) return f"Forbidden: {f_source} | Arguments: {self.left.name}, {self.right.name}" From 305f52dd8d5179a5e340fe618bd4daad5ce330e4 Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Tue, 29 Nov 2022 19:24:31 -0600 Subject: [PATCH 14/31] Fix example comment --- ConfigSpace/forbidden.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index d29caeb9..3d185065 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -588,7 +588,7 @@ cdef class ForbiddenCallableRelation(ForbiddenRelation): >>> >>> cs = ConfigurationSpace({"a": [1, 2, 3], "b": [2, 5, 6]}) >>> - >>> forbidden_clause = ForbiddenFunctionRelation(cs['a'], cs['b']) + >>> forbidden_clause = ForbiddenCallableRelation(cs['a'], cs['b']) >>> cs.add_forbidden_clause(forbidden_clause) Forbidden: f(a,b) == True From 350f79a2f08013e639bf8b21dcf82744b731f7bb Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Mon, 5 Dec 2022 15:55:23 -0600 Subject: [PATCH 15/31] Update documentation. Works with any callable. --- ConfigSpace/forbidden.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index 3d185065..3ccd7b1d 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -595,10 +595,10 @@ cdef class ForbiddenCallableRelation(ForbiddenRelation): Parameters ---------- left : :ref:`Hyperparameters` - first argument of lambda expression + first argument of callable right : :ref:`Hyperparameters` - second argument of lambda expression + second argument of callable f : A callable that relates the two hyperparameters """ From 1ffed8c9012aa3965e87a59992a2ac881d532051 Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Thu, 15 Dec 2022 19:39:27 -0600 Subject: [PATCH 16/31] Address comments --- ConfigSpace/forbidden.pyx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index 3ccd7b1d..551028b9 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -584,11 +584,11 @@ cdef class ForbiddenCallableRelation(ForbiddenRelation): specified callable, which returns True if the relationship between the two hyperparameters is forbidden. - >>> from ConfigSpace import ConfigurationSpace, ForbiddenLessThanRelation + >>> from ConfigSpace import ConfigurationSpace, ForbiddenCallableRelation >>> >>> cs = ConfigurationSpace({"a": [1, 2, 3], "b": [2, 5, 6]}) >>> - >>> forbidden_clause = ForbiddenCallableRelation(cs['a'], cs['b']) + >>> forbidden_clause = ForbiddenCallableRelation(lambda a, b: a + b > 10, cs['a'], cs['b']) >>> cs.add_forbidden_clause(forbidden_clause) Forbidden: f(a,b) == True @@ -600,7 +600,8 @@ cdef class ForbiddenCallableRelation(ForbiddenRelation): right : :ref:`Hyperparameters` second argument of callable - f : A callable that relates the two hyperparameters + f : Callable + callable that relates the two hyperparameters """ cdef public f @@ -625,9 +626,8 @@ cdef class ForbiddenCallableRelation(ForbiddenRelation): ) def __repr__(self): - from inspect import getsource - f_source = getsource(self.f) - return f"Forbidden: {f_source} | Arguments: {self.left.name}, {self.right.name}" + f_repr = self.f.__qualname__ + return f"Forbidden: {f_repr} | Arguments: {self.left.name}, {self.right.name}" cdef int _is_forbidden(self, left, right) except -1: return self.f(left, right) From 0a6d1d3e87398957b39e513e6bc40de198fda12b Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Thu, 15 Dec 2022 23:39:03 -0600 Subject: [PATCH 17/31] Add support for pickling callables in json serialization, json serialization bug fixes, another fix for float mu in NormalInteger space --- ConfigSpace/forbidden.pyx | 9 ++++-- ConfigSpace/read_and_write/json.py | 46 +++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index 551028b9..80d9baff 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -12,7 +12,7 @@ # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the nor the -# names of itConfigurationSpaces contributors may be used to endorse or +# names of ConfigurationSpace contributors may be used to endorse or # promote products derived from this software without specific prior # written permission. # @@ -626,7 +626,12 @@ cdef class ForbiddenCallableRelation(ForbiddenRelation): ) def __repr__(self): - f_repr = self.f.__qualname__ + if hasattr(self.f, "__qualname__"): + f_repr = self.f.__qualname__ + elif hasattr(self.f, "__class__"): + f_repr = self.__class__.__qualname__ + else: + raise ValueError("Could not find a qualname for the callable") return f"Forbidden: {f_repr} | Arguments: {self.left.name}, {self.right.name}" cdef int _is_forbidden(self, left, right) except -1: diff --git a/ConfigSpace/read_and_write/json.py b/ConfigSpace/read_and_write/json.py index b79f26a7..4deb01fd 100644 --- a/ConfigSpace/read_and_write/json.py +++ b/ConfigSpace/read_and_write/json.py @@ -38,6 +38,7 @@ ForbiddenLessThanRelation, ForbiddenEqualsRelation, ForbiddenGreaterThanRelation, + ForbiddenCallableRelation, ) @@ -254,7 +255,7 @@ def _build_less_than_condition(condition: LessThanCondition) -> Dict: ################################################################################ # Builder for forbidden -def _build_forbidden(clause) -> Dict: +def _build_forbidden(clause, pickle_callables=False) -> Dict: if isinstance(clause, ForbiddenEqualsClause): return _build_forbidden_equals_clause(clause) elif isinstance(clause, ForbiddenInClause): @@ -262,7 +263,7 @@ def _build_forbidden(clause) -> Dict: elif isinstance(clause, ForbiddenAndConjunction): return _build_forbidden_and_conjunction(clause) elif isinstance(clause, ForbiddenRelation): - return _build_forbidden_relation(clause) + return _build_forbidden_relation(clause, pickle_callables=pickle_callables) else: raise TypeError(clause) @@ -294,26 +295,38 @@ def _build_forbidden_and_conjunction(clause: ForbiddenAndConjunction) -> Dict: } -def _build_forbidden_relation(clause: ForbiddenRelation) -> Dict: +def _build_forbidden_relation(clause: ForbiddenRelation, pickle_callables=False) -> Dict: + d = { + 'left': clause.left.name, + 'right': clause.right.name, + 'type': 'RELATION' + } + if isinstance(clause, ForbiddenLessThanRelation): lambda_ = 'LESS' elif isinstance(clause, ForbiddenEqualsRelation): lambda_ = 'EQUALS' elif isinstance(clause, ForbiddenGreaterThanRelation): lambda_ = 'GREATER' + elif isinstance(clause, ForbiddenCallableRelation): + if pickle_callables: + lambda_ = 'CALLABLE' + from pickle import dumps + from base64 import b64encode + # pickle the callable, encode the bytes in b64, and convert it to an ASCII string + d['callable'] = b64encode(dumps(clause.f)).decode('ASCII') + else: + raise ValueError( + "Cannot serialize a ForbiddenCallableRelation if pickle_callables is False") else: raise ValueError("Unknown relation '%s'" % type(clause)) - return { - 'left': clause.left.name, - 'right': clause.right.name, - 'type': 'RELATION', - 'lambda': lambda_ - } + d['lambda'] = lambda_ + return d ################################################################################ -def write(configuration_space, indent=2): +def write(configuration_space, indent=2, pickle_callables=False): """ Create a string representation of a :class:`~ConfigSpace.configuration_space.ConfigurationSpace` in json format. @@ -386,7 +399,7 @@ def write(configuration_space, indent=2): conditions.append(_build_condition(condition)) for forbidden_clause in configuration_space.get_forbiddens(): - forbiddens.append(_build_forbidden(forbidden_clause)) + forbiddens.append(_build_forbidden(forbidden_clause, pickle_callables=pickle_callables)) rval = {} if configuration_space.name is not None: @@ -496,8 +509,8 @@ def _construct_hyperparameter(hyperparameter: Dict) -> Hyperparameter: return NormalIntegerHyperparameter( name=name, log=hyperparameter['log'], - lower=hyperparameter['lower'], - upper=hyperparameter['upper'], + mu=hyperparameter['mu'], + sigma=hyperparameter['sigma'], default_value=hyperparameter['default'], ) elif hp_type == 'categorical': @@ -625,7 +638,7 @@ def _construct_forbidden( elif forbidden_type == 'AND': return _construct_forbidden_and(clause, cs) elif forbidden_type == 'RELATION': - return _construct_forbidden_equals(clause, cs) + return _construct_forbidden_relation(clause, cs) else: return ValueError(forbidden_type) @@ -671,5 +684,10 @@ def _construct_forbidden_relation( return ForbiddenEqualsRelation(left, right) elif clause['lambda'] == "GREATER": return ForbiddenGreaterThanRelation(left, right) + elif clause['lambda'] == "CALLABLE": + from pickle import loads + from base64 import b64decode + f = loads(b64decode(clause["callable"].encode('ASCII'))) + return ForbiddenCallableRelation(left, right, f) else: raise ValueError("Unknown relation '%s'" % clause['lambda']) From ed781afbbd369103494c8be5b8cd0dc62516302d Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Thu, 15 Dec 2022 23:42:48 -0600 Subject: [PATCH 18/31] Fix argument order in example --- ConfigSpace/forbidden.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index 80d9baff..0792053f 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -588,7 +588,7 @@ cdef class ForbiddenCallableRelation(ForbiddenRelation): >>> >>> cs = ConfigurationSpace({"a": [1, 2, 3], "b": [2, 5, 6]}) >>> - >>> forbidden_clause = ForbiddenCallableRelation(lambda a, b: a + b > 10, cs['a'], cs['b']) + >>> forbidden_clause = ForbiddenCallableRelation(cs['a'], cs['b'], lambda a, b: a + b > 10) >>> cs.add_forbidden_clause(forbidden_clause) Forbidden: f(a,b) == True From 9078b81f1a7cfa2683c0fd93c65180c12cca5365 Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Fri, 16 Dec 2022 00:59:32 -0600 Subject: [PATCH 19/31] Add serialization tests for ForbiddenCallableRelation --- ConfigSpace/forbidden.pyx | 5 +++++ test/read_and_write/test_json.py | 15 ++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index 0792053f..7e62f646 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -583,6 +583,11 @@ cdef class ForbiddenCallableRelation(ForbiddenRelation): The ForbiddenCallable uses two hyperparameters as input to a specified callable, which returns True if the relationship between the two hyperparameters is forbidden. + + A ForbiddenCallableRelation may not be serializable. + :func:`ConfigSpace.read_and_write.write` will attempt to pickle and base64 encode + the callable with pickle_callables=True. However, the unpicklability + of the callable cannot be assured. >>> from ConfigSpace import ConfigurationSpace, ForbiddenCallableRelation >>> diff --git a/test/read_and_write/test_json.py b/test/read_and_write/test_json.py index d170880e..ef273302 100644 --- a/test/read_and_write/test_json.py +++ b/test/read_and_write/test_json.py @@ -1,7 +1,8 @@ import os import unittest +from dataclasses import dataclass -from ConfigSpace.forbidden import ForbiddenLessThanRelation +from ConfigSpace.forbidden import ForbiddenLessThanRelation, ForbiddenCallableRelation from ConfigSpace.read_and_write.json import read, write from ConfigSpace.read_and_write.pcs import read as read_pcs from ConfigSpace.read_and_write.pcs_new import read as read_pcs_new @@ -12,6 +13,14 @@ ) +@dataclass +class ProductGreaterThan: + limit: int + + def __call__(self, a, b): + return a*b > self.limit + + class TestJson(unittest.TestCase): def test_serialize_forbidden_in_clause(self): @@ -27,6 +36,10 @@ def test_serialize_forbidden_relation(self): cs.add_forbidden_clause(ForbiddenLessThanRelation(a, b)) write(cs) + cs.add_forbidden_clause(ForbiddenCallableRelation(a, b, ProductGreaterThan(3))) + self.assertRaises(ValueError, write, cs) + write(cs, pickle_callables=True) + def test_configspace_with_probabilities(self): cs = ConfigurationSpace() cs.add_hyperparameter( From 001729b17d47b568ae41cb20d95f07838e7cac5a Mon Sep 17 00:00:00 2001 From: Nicholas Christensen Date: Fri, 16 Dec 2022 01:25:24 -0600 Subject: [PATCH 20/31] Document pickle_callables flag --- ConfigSpace/read_and_write/json.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ConfigSpace/read_and_write/json.py b/ConfigSpace/read_and_write/json.py index 4deb01fd..9c2b7ce4 100644 --- a/ConfigSpace/read_and_write/json.py +++ b/ConfigSpace/read_and_write/json.py @@ -348,6 +348,12 @@ def write(configuration_space, indent=2, pickle_callables=False): a configuration space, which should be written to file. indent : int number of whitespaces to use as indent + pickle_callables : bool + whether the writer should attempt to pickle and encode ForbiddenCallableRelations + and similar objects in the configuration space. Defaults to False. + If False, the writer will raise an exception if it encounters a callable + based configuration space object. Unpicklability cannot be assured. Setting + this to True may compromise portability. Returns ------- From 8a409c6ed4c09136d0256f0c83be5b580e83ea17 Mon Sep 17 00:00:00 2001 From: Eddie Bergman Date: Wed, 11 Jan 2023 13:11:51 +0100 Subject: [PATCH 21/31] fix: Memory leak (#282) * test: Add reproducing test * fix: Make sampling neighbors form uniform Int stable * fix: Memory leak with UniformIntegerHyperparameter When querying a large range for a UniformIntegerHyperparameter with a small std.deviation and log scale, this could cause an infinite loop as the reachable neighbors would be quickly exhausted, yet rejection sampling will continue sampling until some arbitrary termination criterion. Why this was causing a memory leak, I'm not entirely sure. The solution now is that is we have seen a sampled value before, we simply take the one "next to it". * fix: Memory issues with Normal and Beta dists Replaced usages of arange with a chunked version to prevent memory blowup. However this is still incredibly slow and needs a more refined solution as a huge amount of values are required to be computed for what can possibly be analytically derived. * chore: Update flake8 * fix: flake8 version compatible with Python 3.7 * fix: Name generators properly * fix: Test numbers * doc: typo fixes * perf: Generate all possible neighbors at once * test: Add test for center_range and arange_chunked * perf: Call transform on np vector from rvs * perf: Use numpy `.astype(int)` instead of `int` * doc: Document how to get flamegraphs for optimizing * fix: Allow for negatives in arange_chunked again * fix: Change build back to raw Extensions * build: Properly set compiler_directives * ci: Update makefile with helpful commands * ci: Fix docs to install build * perf: cython optimizations * perf: Fix possible memory leak with UniformIntegerHyperparam * fix: Duplicates as `list` instead of set * fix: Convert to `long long` vector * perf: Revert clip to truncnorm This truncnorm has some slight overhead due to however scipy generates its truncnorm distribution, however this overhead is considered worth it for the sake of readability and understanding * test: Test values not match implementation * Intermediate commit * INtermediate commit 2 * Update neighborhood generation for UniformIntegerHyperparameter * Update tests * Make the benchmark sampling script more robust * Revert small change in util function * Improve readability Co-authored-by: Matthias Feurer --- .github/workflows/docs.yml | 1 + .github/workflows/pytest.yml | 6 +- .github/workflows/release.yml | 6 +- .pre-commit-config.yaml | 4 +- ConfigSpace/__init__.py | 4 +- ConfigSpace/c_util.pyx | 2 - ConfigSpace/conditions.pxd | 2 - ConfigSpace/conditions.pyx | 2 - ConfigSpace/configuration_space.pyx | 2 - ConfigSpace/forbidden.pxd | 2 - ConfigSpace/forbidden.pyx | 2 - ConfigSpace/functional.py | 81 ++++++++ ConfigSpace/hyperparameters.pxd | 1 - ConfigSpace/hyperparameters.pyx | 311 ++++++++++++++++++---------- ConfigSpace/util.pyx | 22 +- Makefile | 15 +- scripts/benchmark_sampling.py | 4 +- setup.py | 38 +++- test/test_functional.py | 36 ++++ test/test_util.py | 11 +- 20 files changed, 398 insertions(+), 154 deletions(-) create mode 100644 ConfigSpace/functional.py create mode 100644 test/test_functional.py diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 4ed7856d..da6e6d3c 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -36,6 +36,7 @@ jobs: - name: Install dependencies run: | + pip install build pip install ".[dev]" - name: Make docs diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 18e51f3f..0b334b08 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -159,9 +159,9 @@ jobs: - name: Create sdist id: sdist run: | - python -m pip install --upgrade pip - python setup.py sdist - echo "sdist_name=$(ls -t dist/${{ env.package-name }}-*.tar.gz | head -n 1)" >> $GITHUB_ENV + python -m pip install --upgrade pip build + python -m build --sdist + echo "sdist_name=$(ls -t dist/${{ env.package-name }}-*.tar.gz | head -n 1)" >> "$GITHUB_ENV" - name: Install ${{ env.package-name }} run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e34ab6c6..f7c839e8 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -184,9 +184,9 @@ jobs: - name: Build source distribution run: | - python -m pip install --upgrade pip - python setup.py sdist - echo "sdist_name=$(ls -t dist/${{ env.package-name }}-*.tar.gz | head -n 1)" >> $GITHUB_ENV + python -m pip install --upgrade pip build + python build --sdist + echo "sdist_name=$(ls -t dist/${{ env.package-name }}-*.tar.gz | head -n 1)" >> "$GITHUB_ENV" - name: Twine check ${{ env.package-name }} run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3d1d9a5e..73c362f5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,8 +7,8 @@ repos: name: mypy ConfigSpace files: ConfigSpace - - repo: https://gitlab.com/pycqa/flake8 - rev: 4.0.1 + - repo: https://github.com/pycqa/flake8 + rev: 5.0.4 hooks: - id: flake8 name: flake8 ConfigSpace diff --git a/ConfigSpace/__init__.py b/ConfigSpace/__init__.py index 6e845e89..bd4ed9f4 100644 --- a/ConfigSpace/__init__.py +++ b/ConfigSpace/__init__.py @@ -29,8 +29,6 @@ from ConfigSpace.__version__ import __version__ from ConfigSpace.__authors__ import __authors__ -import ConfigSpace.api.distributions as distributions -import ConfigSpace.api.types as types from ConfigSpace.api import (Beta, Categorical, Distribution, Float, Integer, Normal, Uniform) from ConfigSpace.conditions import (AndConjunction, EqualsCondition, @@ -54,6 +52,8 @@ UniformFloatHyperparameter, UniformIntegerHyperparameter, UnParametrizedHyperparameter) +import ConfigSpace.api.distributions as distributions +import ConfigSpace.api.types as types __all__ = [ "__authors__", diff --git a/ConfigSpace/c_util.pyx b/ConfigSpace/c_util.pyx index 6177ddd4..8b6b2169 100644 --- a/ConfigSpace/c_util.pyx +++ b/ConfigSpace/c_util.pyx @@ -1,5 +1,3 @@ -# cython: language_level=3 - from collections import deque import numpy as np diff --git a/ConfigSpace/conditions.pxd b/ConfigSpace/conditions.pxd index 50f79c6d..8c7af891 100644 --- a/ConfigSpace/conditions.pxd +++ b/ConfigSpace/conditions.pxd @@ -1,5 +1,3 @@ -# cython: language_level=3 - import numpy as np cimport numpy as np diff --git a/ConfigSpace/conditions.pyx b/ConfigSpace/conditions.pyx index 08f7add2..6d07d87e 100644 --- a/ConfigSpace/conditions.pyx +++ b/ConfigSpace/conditions.pyx @@ -26,8 +26,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# cython: language_level=3 - import io from functools import reduce from abc import ABCMeta, abstractmethod diff --git a/ConfigSpace/configuration_space.pyx b/ConfigSpace/configuration_space.pyx index fcfc62d0..7b1295bf 100644 --- a/ConfigSpace/configuration_space.pyx +++ b/ConfigSpace/configuration_space.pyx @@ -26,8 +26,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# cython: language_level=3 - import collections.abc from collections import defaultdict, deque, OrderedDict import copy diff --git a/ConfigSpace/forbidden.pxd b/ConfigSpace/forbidden.pxd index aa7abeb1..b381a19b 100644 --- a/ConfigSpace/forbidden.pxd +++ b/ConfigSpace/forbidden.pxd @@ -1,5 +1,3 @@ -# cython: language_level=3 - import numpy as np cimport numpy as np diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index 7e62f646..ef6b0a52 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -27,8 +27,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# cython: language_level=3 - import copy import numpy as np import io diff --git a/ConfigSpace/functional.py b/ConfigSpace/functional.py new file mode 100644 index 00000000..395f0c33 --- /dev/null +++ b/ConfigSpace/functional.py @@ -0,0 +1,81 @@ +from typing import Iterator + +from more_itertools import roundrobin +import numpy as np + + +def center_range( + center: int, + low: int, + high: int, + step: int = 1, +) -> Iterator[int]: + """Get a range centered around a value. + + >>> list(center_range(5, 0, 10)) + [4, 6, 3, 7, 2, 8, 1, 9, 0, 10] + + Parameters + ---------- + center: int + The center of the range + + low: int + The low end of the range + + high: int + The high end of the range + + step: int = 1 + The step size + + Returns + ------- + Iterator[int] + """ + assert low <= center <= high + above_center = range(center + step, high + 1, step) + below_center = range(center - step, low - 1, -step) + yield from roundrobin(below_center, above_center) + + +def arange_chunked( + start: int, + stop: int, + step: int = 1, + *, + chunk_size: int, +) -> Iterator[np.ndarray]: + """Get np.arange in a chunked fashion. + + >>> list(arange_chunked(0, 10, 3)) + [array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8]), array([9])] + + Parameters + ---------- + start: int + The start of the range + + stop: int + The stop of the range + + chunk_size: int + The size of the chunks + + step: int = 1 + The step size + + Returns + ------- + Iterator[np.ndarray] + """ + assert step > 0 + assert chunk_size > 0 + assert start < stop + n_items = int(np.ceil((stop - start) / step)) + n_chunks = int(np.ceil(n_items / chunk_size)) + + for chunk in range(0, n_chunks): + chunk_start = start + (chunk * chunk_size) + chunk_stop = min(chunk_start + chunk_size, stop) + yield np.arange(chunk_start, chunk_stop, step) diff --git a/ConfigSpace/hyperparameters.pxd b/ConfigSpace/hyperparameters.pxd index ca18809f..f7ff4a9f 100644 --- a/ConfigSpace/hyperparameters.pxd +++ b/ConfigSpace/hyperparameters.pxd @@ -1,4 +1,3 @@ -# cython: language_level=3 from typing import Union import numpy as np cimport numpy as np diff --git a/ConfigSpace/hyperparameters.pyx b/ConfigSpace/hyperparameters.pyx index fbdc16b8..d7aef39d 100644 --- a/ConfigSpace/hyperparameters.pyx +++ b/ConfigSpace/hyperparameters.pyx @@ -27,16 +27,33 @@ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import copy import io -# cython: language_level=3 import math import warnings from collections import OrderedDict, Counter +from itertools import count +from more_itertools import roundrobin, duplicates_everseen from typing import List, Any, Dict, Union, Set, Tuple, Optional, Sequence -import numpy as np from scipy.stats import truncnorm, beta as spbeta, norm +import numpy as np + +# It's necessary to call "import_array" if you use any part of the +# numpy PyArray_* API. From Cython 3, accessing attributes like +# ".shape" on a typed Numpy array use this API. Therefore we recommend +# always calling "import_array" whenever you "cimport numpy" cimport numpy as np +np.import_array() + +from ConfigSpace.functional import center_range, arange_chunked +# OPTIM: Some operations generate an arange which could blowup memory if +# done over the entire space of integers (int32/64). +# To combat this, `arange_chunked` is used in scenarios where reducion +# operations over all the elments could be done in partial steps independantly. +# For example, a sum over the pdf values could be done in chunks. +# This may add some small overhead for smaller ranges but is unlikely to +# be noticable. +ARANGE_CHUNKSIZE = 10_000_000 cdef class Hyperparameter(object): @@ -1542,89 +1559,135 @@ cdef class UniformIntegerHyperparameter(IntegerHyperparameter): else: return False - def get_num_neighbors(self, value=None) -> int: - return self.upper - self.lower + def get_num_neighbors(self, value = None) -> int: + # If there is a value in the range, then that value is not a neighbor of itself + # so we need to remove one + if value is not None and self.lower <= value <= self.upper: + return self.upper - self.lower - 1 + else: + return self.upper - self.lower def get_neighbors( self, - value: Union[int, float], + value: float, rs: np.random.RandomState, number: int = 4, transform: bool = False, std: float = 0.2, ) -> List[int]: - cdef int n_requested = number - cdef int idx = 0 - cdef int i = 0 - neighbors = [] # type: List[int] - cdef int sampled_neighbors = 0 - _neighbors_as_int = set() # type: Set[int] - cdef long long int_value = self._transform(value) - cdef long long new_int_value = 0 - cdef float new_value = 0.0 - cdef np.ndarray samples - cdef double[:] samples_view - - if self.upper - self.lower <= n_requested: - transformed_value = self._transform(value) - for n in range(self.lower, self.upper + 1): - if n != int_value: - if transform: - neighbors.append(n) - else: - n = self._inverse_transform(n) - neighbors.append(n) + """Get the neighbors of a value + + NOTE + ---- + **This assumes the value is in the unit-hypercube [0, 1]** + + Parameters + ---------- + value: float + The value to get neighbors around. This assume the ``value`` has been + converted to the [0, 1] range which can be done with ``_inverse_transform``. + + rs: RandomState + The random state to use + + number: int = 4 + How many neighbors to get + + transform: bool = False + Whether to transform this value from the unit cube, back to the + hyperparameter's specified range of values. + + std: float = 0.2 + The std. dev. to use in the [0, 1] hypercube space while sampling + for neighbors. + + Returns + ------- + List[int] + Some ``number`` of neighbors centered around ``value``. + """ + assert 0 <= value <= 1, ( + "For get neighbors of UniformIntegerHyperparameter, the value" + " if assumed to be in the unit-hypercube [0, 1]. If this was not" + " the behaviour assumed, please raise a ticket on github." + ) + assert number < 1000000, ( + "Can only generate less than 1 million neighbors." + ) + # Convert python values to cython ones + cdef long long center = self._transform(value) + cdef long long lower = self.lower + cdef long long upper = self.upper + cdef unsigned int n_requested = number + cdef unsigned long long n_neighbors = upper - lower - 1 + cdef long long stepsize = self.q if self.q is not None else 1 + + neighbors = [] + + cdef long long v # A value that's possible to return + if n_neighbors < n_requested: + + for v in range(lower, center): + neighbors.append(v) + for v in range(center + 1, upper): + neighbors.append(v) + + if transform: + return neighbors + else: + return self._inverse_transform(np.asarray(neighbors)).tolist() + + # A truncated normal between 0 and 1, centered on the value with a scale of std. + # This will be sampled from and converted to the corresponding int value + # However, this is too slow - we use the "poor man's truncnorm below" + # cdef np.ndarray float_indices = truncnorm.rvs( + # a=(0 - value) / std, + # b=(1 - value) / std, + # loc=value, + # scale=std, + # size=number, + # random_state=rs + # ) + # We sample five times as many values as needed and weed them out below + # (perform rejection sampling and make sure we don't sample any neighbor twice) + # This increases our chances of not having to fill the neighbors list by calling + # `center_range` + # Five is an arbitrary number and can probably be tuned to reduce overhead + cdef np.ndarray float_indices = rs.normal(value, std, size=number * 5) + cdef np.ndarray mask = (float_indices >= 0) & (float_indices <= 1) + float_indices = float_indices[mask] + + cdef np.ndarray possible_neighbors_as_array = self._transform_vector(float_indices).astype(np.longlong) + cdef long long [:] possible_neighbors = possible_neighbors_as_array + + cdef unsigned int n_neighbors_generated = 0 + cdef unsigned int n_candidates = len(float_indices) + cdef unsigned int candidate_index = 0 + cdef set seen = {center} + while n_neighbors_generated < n_requested and candidate_index < n_candidates: + v = possible_neighbors[candidate_index] + if v not in seen: + seen.add(v) + n_neighbors_generated += 1 + candidate_index += 1 + + if n_neighbors_generated < n_requested: + numbers_around = center_range(center, lower, upper, stepsize) + + while n_neighbors_generated < n_requested: + v = next(numbers_around) + if v not in seen: + seen.add(v) + n_neighbors_generated += 1 + + seen.remove(center) + neighbors = list(seen) + if transform: + return neighbors else: - samples = rs.normal(loc=value, scale=std, size=250) - samples_view = samples - - while sampled_neighbors < n_requested: - - while True: - new_value = samples_view[idx] - idx += 1 - i += 1 - if idx >= 250: - samples = rs.normal(loc=value, scale=std, size=250) - samples_view = samples - idx = 0 - if new_value < 0 or new_value > 1: - continue - new_int_value = self._transform(new_value) - if int_value == new_int_value: - continue - elif i >= 200: - # Fallback to uniform sampling if generating samples correctly - # takes too long - values_to_sample = [j for j in range(self.lower, self.upper + 1) - if j != int_value] - samples = rs.choice( - values_to_sample, - size=n_requested, - replace=False, - ) - for sample in samples: - if transform: - neighbors.append(sample) - else: - sample = self._inverse_transform(sample) - neighbors.append(sample) - break - elif new_int_value in _neighbors_as_int: - continue - elif int_value != new_int_value: - break - - _neighbors_as_int.add(new_int_value) - sampled_neighbors += 1 - if transform: - neighbors.append(new_int_value) - else: - new_value = self._inverse_transform(new_int_value) - neighbors.append(new_value) + return self._inverse_transform(np.array(neighbors)).tolist() - return neighbors def _pdf(self, vector: np.ndarray) -> np.ndarray: """ @@ -1891,31 +1954,65 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): rs: np.random.RandomState, number: int = 4, transform: bool = False, - ) -> List[Union[np.ndarray, float, int]]: - neighbors = [] # type: List[Union[np.ndarray, float, int]] - while len(neighbors) < number: - rejected = True - iteration = 0 - while rejected: - iteration += 1 - new_value = rs.normal(value, self.sigma) - int_value = self._transform(value) - new_int_value = self._transform(new_value) - - if self.lower is not None and self.upper is not None: - int_value = min(max(int_value, self.lower), self.upper) - new_int_value = min(max(new_int_value, self.lower), self.upper) - - if int_value != new_int_value: - rejected = False - elif iteration > 100000: - raise ValueError('Probably caught in an infinite loop.') + ) -> List[int]: + stepsize = self.q if self.q is not None else 1 + bounded = self.lower is not None + mu = self.mu + sigma = self.sigma + + neighbors: set[int] = set() + center = self._transform(value) + + if bounded: + float_indices = norm.rvs( + loc=mu, + scale=sigma, + size=number, + random_state=rs, + ) + else: + float_indices = truncnorm( + a = (self.lower - mu) / sigma, + b = (self.upper - mu) / sigma, + loc=center, + scale=sigma, + size=number, + random_state=rs, + ) + + possible_neighbors = self._transform_vector(float_indices).astype(np.longlong) + + for possible_neighbor in possible_neighbors: + # If we already happen to have this neighbor, pick the closest + # number around it that is not arelady included + if possible_neighbor in neighbors or possible_neighbor == center: + + if bounded: + numbers_around = center_range(possible_neighbor, self.lower, self.upper, stepsize) + else: + decrement_count = count(possible_neighbor - stepsize, step=-stepsize) + increment_count = count(possible_neighbor + stepsize, step=stepsize) + numbers_around = roundrobin(decrement_count, increment_count) + + valid_numbers_around = ( + n for n in numbers_around + if (n not in neighbors and n != center) + ) + possible_neighbor = next(valid_numbers_around, None) + + if possible_neighbor is None: + raise ValueError( + f"Found no more eligble neighbors for value {center}" + f"\nfound {neighbors}" + ) + + # We now have a valid sample, add it to the list of neighbors + neighbors.add(possible_neighbor) if transform: - neighbors.append(self._transform(new_value)) + return [self._transform(neighbor) for neighbor in neighbors] else: - neighbors.append(new_value) - return neighbors + return list(neighbors) def _compute_normalization(self): if self.lower is None: @@ -1924,9 +2021,8 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): return 1 else: - all_integer_values = np.arange(self.lower, self.upper + 1) - all_probabilities = self.nfhp.pdf(all_integer_values) - return np.sum(all_probabilities) + chunks = arange_chunked(self.lower, self.upper + 1, chunk_size=ARANGE_CHUNKSIZE) + return sum(self.nfhp.pdf(chunk).sum() for chunk in chunks) def _pdf(self, vector: np.ndarray) -> np.ndarray: """ @@ -1952,9 +2048,9 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): return self.nfhp._pdf(vector) / self.normalization_constant def get_max_density(self): - all_integer_values = np.arange(self.lower, self.upper + 1) - all_probabilities = self.nfhp.pdf(all_integer_values) - return np.max(all_probabilities) / self.normalization_constant + chunks = arange_chunked(self.lower, self.upper + 1, chunk_size=ARANGE_CHUNKSIZE) + maximum = max(self.nfhp.pdf(chunk).max() for chunk in chunks) + return maximum / self.normalization_constant def get_size(self) -> float: if self.lower is None: @@ -2128,9 +2224,8 @@ cdef class BetaIntegerHyperparameter(UniformIntegerHyperparameter): return value def _compute_normalization(self): - all_integer_values = np.arange(self.lower, self.upper + 1) - all_probabilities = self.bfhp.pdf(all_integer_values) - return np.sum(all_probabilities) + chunks = arange_chunked(self.lower, self.upper + 1, chunk_size=ARANGE_CHUNKSIZE) + return sum(self.bfhp.pdf(chunk).sum() for chunk in chunks) def _pdf(self, vector: np.ndarray) -> np.ndarray: """ @@ -2156,9 +2251,9 @@ cdef class BetaIntegerHyperparameter(UniformIntegerHyperparameter): return self.bfhp._pdf(vector) / self.normalization_constant def get_max_density(self): - all_integer_values = np.arange(self.lower, self.upper + 1) - all_probabilities = self.bfhp.pdf(all_integer_values) - return np.max(all_probabilities) / self.normalization_constant + chunks = arange_chunked(self.lower, self.upper + 1, chunk_size=ARANGE_CHUNKSIZE) + maximum = max(self.bfhp.pdf(chunk).max() for chunk in chunks) + return maximum / self.normalization_constant cdef class CategoricalHyperparameter(Hyperparameter): diff --git a/ConfigSpace/util.pyx b/ConfigSpace/util.pyx index 5cae4801..f56d401f 100644 --- a/ConfigSpace/util.pyx +++ b/ConfigSpace/util.pyx @@ -132,16 +132,22 @@ def get_one_exchange_neighbourhood( list(configuration.configuration_space._hyperparameters.keys()) ) hyperparameters_list_length = len(hyperparameters_list) - hyperparameters_used = [hp.name - for hp in configuration.configuration_space.get_hyperparameters() - if hp.get_num_neighbors(configuration.get(hp.name)) == 0 and - configuration.get(hp.name)is not None] + hyperparameters_used = [ + hp.name + for hp in configuration.configuration_space.get_hyperparameters() + if ( + hp.get_num_neighbors(configuration.get(hp.name)) == 0 + and configuration.get(hp.name)is not None + ) + ] number_of_usable_hyperparameters = sum(np.isfinite(configuration.get_array())) n_neighbors_per_hp = { - hp.name: num_neighbors if - isinstance(hp, NumericalHyperparameter) and hp.get_num_neighbors( - configuration.get(hp.name))> num_neighbors - else hp.get_num_neighbors(configuration.get(hp.name)) + hp.name: num_neighbors + if ( + isinstance(hp, NumericalHyperparameter) + and hp.get_num_neighbors(configuration.get(hp.name))> num_neighbors + ) else + hp.get_num_neighbors(configuration.get(hp.name)) for hp in configuration.configuration_space.get_hyperparameters() } diff --git a/Makefile b/Makefile index bde61c2b..fc2d76b8 100644 --- a/Makefile +++ b/Makefile @@ -28,6 +28,19 @@ DIST := "${DIR}/dist"" DOCDIR := "${DIR}/docs" BUILD := "${DIR}/build" INDEX_HTML := "file://${DOCDIR}/build/html/index.html" +NUMPY_INCLUDE := $(shell python -c 'import numpy; print(numpy.get_include())') + +# https://stackoverflow.com/questions/40750596/how-do-i-escape-bracket-in-makefile +CP := ) + +benchmark: + python scripts/benchmark_sampling.py + +cython-annotate: + C_INCLUDE_PATH=$(NUMPY_INCLUDE) cython -3 --directive boundscheck=False,wraparound=False --annotate ConfigSpace/*.pyx + +cython-html: cython-annotate + python -c "import webbrowser; from pathlib import Path; [webbrowser.open(f'file://{path}') for path in Path('ConfigSpace').absolute().glob('*.html')]" install-dev: $(PIP) install -e ".[dev]" @@ -45,7 +58,7 @@ clean-docs: clean: clean-build clean-docs build: - python setup.py develop + python -m build # Running build before making docs is needed all be it very slow. # Without doing a full build, the doctests seem to use docstrings from the last compiled build diff --git a/scripts/benchmark_sampling.py b/scripts/benchmark_sampling.py index 8869e3f4..cbf97d3c 100644 --- a/scripts/benchmark_sampling.py +++ b/scripts/benchmark_sampling.py @@ -31,7 +31,7 @@ def run_test(configuration_space_path): validation_times = [] # Sample a little bit - for i in range(10): + for i in range(20): cs.seed(i) start_time = time.time() configurations = cs.sample_configuration(size=n_configs) @@ -40,7 +40,7 @@ def run_test(configuration_space_path): for j, c in enumerate(configurations): - if i == 0: + if i > 10: neighborhood = ConfigSpace.util.get_one_exchange_neighbourhood( c, seed=i * j, num_neighbors=4) diff --git a/setup.py b/setup.py index 15773e0b..d15074c6 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,7 @@ from setuptools import Extension, find_packages, setup from setuptools.command.build_ext import build_ext +from Cython.Build import cythonize # must go after setuptools # Helper functions @@ -59,8 +60,7 @@ def finalize_options(self): AUTHOR_EMAIL = "feurerm@informatik.uni-freiburg.de" TEST_SUITE = "pytest" -SETUP_REQS = ["numpy", "cython"] -INSTALL_REQS = ["numpy", "cython", "pyparsing", "scipy", "typing_extensions"] +INSTALL_REQS = ["numpy", "pyparsing", "scipy", "typing_extensions", "more_itertools"] MIN_PYTHON_VERSION = ">=3.7" CLASSIFIERS = [ "Programming Language :: Python :: 3.7", @@ -85,8 +85,36 @@ def finalize_options(self): COMPILER_DIRECTIVES = { "boundscheck": False, "wraparound": False, + "language_level": "3", } + +""" +# Profiling +Set the below flag to True to enable profiling of the code. This will cause some minor performance +overhead so it should only be used for debugging purposes. + +Use [`py-spy`](https://github.com/benfred/py-spy) with [speedscope.app](https://www.speedscope.app/) +```bash +pip install py-spy +py-spy record --rate 800 --format speedscope --subprocesses --native -o profile.svg -- python