diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..09ce2199 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +*.DS_Store +*.pyc +*.egg* +venv* +dsr/dsr/summary* +*log_* +.gitignore +.ipynb_checkpoints +~$* +*.vscode/ +dsr/build +dsr/dsr/cyfunc* +**/log/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..92216caa --- /dev/null +++ b/LICENSE @@ -0,0 +1,30 @@ +BSD 3-Clause License + +Copyright (c) 2018, Lawrence Livermore National Security, LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/NOTICE b/NOTICE new file mode 100644 index 00000000..3737d5a8 --- /dev/null +++ b/NOTICE @@ -0,0 +1,21 @@ +This work was produced under the auspices of the U.S. Department of +Energy by Lawrence Livermore National Laboratory under Contract +DE-AC52-07NA27344. + +This work was prepared as an account of work sponsored by an agency of +the United States Government. Neither the United States Government nor +Lawrence Livermore National Security, LLC, nor any of their employees +makes any warranty, expressed or implied, or assumes any legal liability +or responsibility for the accuracy, completeness, or usefulness of any +information, apparatus, product, or process disclosed, or represents that +its use would not infringe privately owned rights. + +Reference herein to any specific commercial product, process, or service +by trade name, trademark, manufacturer, or otherwise does not necessarily +constitute or imply its endorsement, recommendation, or favoring by the +United States Government or Lawrence Livermore National Security, LLC. + +The views and opinions of authors expressed herein do not necessarily +state or reflect those of the United States Government or Lawrence +Livermore National Security, LLC, and shall not be used for advertising +or product endorsement purposes. diff --git a/README.md b/README.md new file mode 100644 index 00000000..61248c9b --- /dev/null +++ b/README.md @@ -0,0 +1,134 @@ +# Deep symbolic regression + +Deep symbolic regression (DSR) is a deep learning algorithm for symbolic regression--the task of recovering tractable mathematical expressions from an input dataset. The package `dsr` contains the code for DSR, including a single-point, parallelized launch script (`dsr/run.py`), baseline genetic programming-based symbolic regression algorithm, and an sklearn-like interface for use with your own data. + +This code supports the ICLR 2021 paper [Deep symbolic regression: Recovering mathematical expressions from data via risk-seeking policy gradients](https://openreview.net/forum?id=m5Qsh0kBQG). + +# Installation + +Installation is straightforward in a Python 3 virtual environment using Pip. From the repository root: + +``` +python3 -m venv venv3 # Create a Python 3 virtual environment +source venv3/bin/activate # Activate the virtual environmnet +pip install -r requirements.txt # Install Python dependencies +export CFLAGS="-I $(python -c "import numpy; print(numpy.get_include())") $CFLAGS" # Needed on Mac to prevent fatal error: 'numpy/arrayobject.h' file not found +pip install -e ./dsr # Install DSR package +``` + +To perform experiments involving the GP baseline, you will need the additional package `deap`. + +# Example usage + +To try out DSR, use the following command from the repository root: + +``` +python -m dsr.run ./dsr/dsr/config.json --b=Nguyen-6 +``` + +This should solve in around 50 training steps (~30 seconds on a laptop). + +# Getting started + +## Configuring runs + +DSR uses JSON files to configure training. + +Top-level key "task" specifies details of the benchmark expression for DSR or GP. See docs in `regression.py` for details. + +Top-level key "training" specifies the training hyperparameters for DSR. See docs in `train.py` for details. + +Top-level key "controller" specifies the RNN controller hyperparameters for DSR. See docs for in `controller.py` for details. + +Top-level key "gp" specifies the hyperparameters for GP if using the GP baseline. See docs for `dsr.baselines.gspr.GP` for details. + +## Launching runs + +After configuring a run, launching it is simple: + +``` +python -m dsr.run [PATH_TO_CONFIG] [--OPTIONS] +``` + +## Sklearn interface + +DSR also provides an [sklearn-like regressor interface](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html). Example usage: + +``` +from dsr import DeepSymbolicRegressor +import numpy as np + +# Generate some data +np.random.seed(0) +X = np.random.random((10, 2)) +y = np.sin(X[:,0]) + X[:,1] ** 2 + +# Create the model +model = DeepSymbolicRegressor("config.json") + +# Fit the model +model.fit(X, y) # Should solve in ~10 seconds + +# View the best expression +print(model.program_.pretty()) + +# Make predictions +model.predict(2 * X) +``` + +## Using an external dataset + +To use your own dataset, simply provide the path to the `"dataset"` key in the config, and give your task an arbitary name. + +``` +"task": { + "task_type": "regression", + "name": "my_task", + "dataset": "./path/to/my_dataset.csv", + ... +} +``` + +Then run DSR: + +``` +python -m dsr.run path/to/config.json +``` + +Note the `--b` flag matches the name of the CSV file (-`.csv` ). + +## Command-line examples + +Show command-line help and quit + +``` +python -m dsr.run --help +``` + +Train 2 indepdent runs of DSR on the Nguyen-1 benchmark using 2 cores + +``` +python -m dsr.run config.json --b=Nguyen-1 --mc=2 --num_cores=2 +``` + +Train DSR on all 12 Nguyen benchmarks using 12 cores + +``` +python -m dsr.run config.json --b=Nguyen --num_cores=12 +``` + +Train 2 independent runs of GP on Nguyen-1 + +``` +python -m dsr.run config.json --method=gp --b=Nguyen-1 --mc=2 --num_cores=2 +``` + +Train DSR on Nguyen-1 and Nguyen-4 + +``` +python -m dsr.run config.json --b=Nguyen-1 --b=Nguyen-4 +``` + +# Release + +LLNL-CODE-647188 diff --git a/dsr/dsr/__init__.py b/dsr/dsr/__init__.py new file mode 100644 index 00000000..b18aa77a --- /dev/null +++ b/dsr/dsr/__init__.py @@ -0,0 +1,3 @@ +from dsr.core import DeepSymbolicOptimizer +from dsr.task.regression.sklearn import DeepSymbolicRegressor + diff --git a/dsr/dsr/baselines/__init__.py b/dsr/dsr/baselines/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dsr/dsr/baselines/constraints.py b/dsr/dsr/baselines/constraints.py new file mode 100644 index 00000000..45c4526e --- /dev/null +++ b/dsr/dsr/baselines/constraints.py @@ -0,0 +1,128 @@ +"""Defines constraints for GP individuals, to be used as decorators for +evolutionary operations.""" + +from dsr.functions import UNARY_TOKENS, BINARY_TOKENS + +TRIG_TOKENS = ["sin", "cos", "tan", "csc", "sec", "cot"] + +# Define inverse tokens +INVERSE_TOKENS = { + "exp" : "log", + "neg" : "neg", + "inv" : "inv", + "sqrt" : "n2" +} + +# Add inverse trig functions +INVERSE_TOKENS.update({ + t : "arc" + t for t in TRIG_TOKENS + }) + +# Add reverse +INVERSE_TOKENS.update({ + v : k for k, v in INVERSE_TOKENS.items() + }) + +DEBUG = False + + +def check_inv(ind): + """Returns True if two sequential tokens are inverse unary operators.""" + + names = [node.name for node in ind] + for i, name in enumerate(names[:-1]): + if name in INVERSE_TOKENS and names[i+1] == INVERSE_TOKENS[name]: + if DEBUG: + print("Constrained inverse:", ind) + return True + return False + + +def check_const(ind): + """Returns True if children of a parent are all const tokens.""" + + names = [node.name for node in ind] + for i, name in enumerate(names): + if name in UNARY_TOKENS and names[i+1] == "const": + if DEBUG: + print("Constrained const (unary)", ind) + return True + if name in BINARY_TOKENS and names[i+1] == "const" and names[i+1] == "const": + if DEBUG: + print(print("Constrained const (binary)", ind)) + return True + return False + + +def check_trig(ind): + """Returns True if a descendant of a trig operator is another trig + operator.""" + + names = [node.name for node in ind] + trig_descendant = False # True when current node is a descendant of a trig operator + trig_dangling = None # Number of unselected nodes in trig subtree + for i, name in enumerate(names): + if name in TRIG_TOKENS: + if trig_descendant: + if DEBUG: + print("Constrained trig:", ind) + return True + trig_descendant = True + trig_dangling = 1 + elif trig_descendant: + if name in BINARY_TOKENS: + trig_dangling += 1 + elif name not in UNARY_TOKENS: + trig_dangling -= 1 + if trig_dangling == 0: + trig_descendant = False + return False + + +def make_check_min_len(min_length): + """Creates closure for minimum length constraint""" + + def check_min_len(ind): + """Returns True if individual is less than minimum length""" + + if len(ind) < min_length: + if DEBUG: + print("Constrained min len: {} (length {})".format(ind, len(ind))) + return True + + return False + + return check_min_len + + +def make_check_max_len(max_length): + """Creates closure for maximum length constraint""" + + def check_max_len(ind): + """Returns True if individual is greater than maximum length""" + + if len(ind) > max_length: + if DEBUG: + print("Constrained max len: {} (length {})".format(ind, len(ind))) + return True + + return False + + return check_max_len + + +def make_check_num_const(max_const): + """Creates closure for maximum number of constants constraint""" + + def check_num_const(ind): + """Returns True if individual has more than max_const const tokens""" + + num_const = len([t for t in ind if t.name == "const"]) + if num_const > max_const: + if DEBUG: + print("Constrained max const: {} ({} consts)".format(ind, num_const)) + return True + + return False + + return check_num_const diff --git a/dsr/dsr/baselines/gpsr.py b/dsr/dsr/baselines/gpsr.py new file mode 100644 index 00000000..f3e7c186 --- /dev/null +++ b/dsr/dsr/baselines/gpsr.py @@ -0,0 +1,297 @@ +import random +import operator +import importlib +from functools import partial + +import numpy as np + +from dsr.functions import function_map +from dsr.const import make_const_optimizer + +from . import constraints + + +GP_MOD = "deap" +OBJECTS = ["base", "gp", "creator", "tools", "algorithms"] +gp = importlib.import_module(GP_MOD + ".gp") +base = importlib.import_module(GP_MOD + ".base") +creator = importlib.import_module(GP_MOD + ".creator") +tools = importlib.import_module(GP_MOD + ".tools") +algorithms = importlib.import_module(GP_MOD + ".algorithms") + + +class GP(): + """Genetic-programming based symbolic regression class""" + + def __init__(self, dataset, metric="nmse", population_size=1000, + generations=1000, n_samples=None, tournament_size=3, + p_crossover=0.5, p_mutate=0.1, + const_range=[-1, 1], const_optimizer="scipy", + const_params=None, seed=0, early_stopping=False, + threshold=1e-12, verbose=True, protected=True, + pareto_front=False, + # Constraint hyperparameters + constrain_const=True, + constrain_trig=True, + constrain_inv=True, + constrain_min_len=True, + constrain_max_len=True, + constrain_num_const=True, + min_length=4, + max_length=30, + max_const=3): + + self.dataset = dataset + self.fitted = False + + assert n_samples is None or generations is None, "At least one of 'n_samples' or 'generations' must be None." + if generations is None: + generations = int(n_samples / population_size) + + # Set hyperparameters + self.population_size = population_size + self.generations = generations + self.tournament_size = tournament_size + self.p_mutate = p_mutate + self.p_crossover = p_crossover + self.seed = seed + self.early_stopping = early_stopping + self.threshold = threshold + self.verbose = verbose + self.pareto_front = pareto_front + + # Fitness function used during training + # Includes closure for fitness function metric and training data + fitness = partial(self.make_fitness(metric), y=dataset.y_train, var_y=np.var(dataset.y_train)) # Function of y_hat + self.fitness = partial(self.compute_fitness, optimize=True, fitness=fitness, X=dataset.X_train.T) # Function of individual + + # Test NMSE, used as final performance metric + # Includes closure for test data + nmse_test = partial(self.make_fitness("nmse"), y=dataset.y_test, var_y=np.var(dataset.y_test)) # Function of y_hat + self.nmse_test = partial(self.compute_fitness, optimize=False, fitness=nmse_test, X=dataset.X_test.T) # Function of individual + + # Noiseless test NMSE, only used to determine success for final performance + # Includes closure for noiseless test data + nmse_test_noiseless = partial(self.make_fitness("nmse"), y=dataset.y_test_noiseless, var_y=np.var(dataset.y_test_noiseless)) # Function of y_hat + self.nmse_test_noiseless = partial(self.compute_fitness, optimize=False, fitness=nmse_test_noiseless, X=dataset.X_test.T) # Function of individual + self.success = lambda ind : self.nmse_test_noiseless(ind)[0] < self.threshold # Function of individual + + # Create the primitive set + pset = gp.PrimitiveSet("MAIN", dataset.X_train.shape[1]) + + # Add input variables + rename_kwargs = {"ARG{}".format(i) : "x{}".format(i + 1) for i in range(dataset.n_input_var)} + pset.renameArguments(**rename_kwargs) + + # Add primitives + for op_name in dataset.function_set: + if op_name == "const": + continue + assert op_name in function_map, "Operation {} not recognized.".format(op_name) + + # Prepend available protected operators with "protected_" + if protected and not op_name.startswith("protected_"): + protected_op_name = "protected_{}".format(op_name) + if protected_op_name in function_map: + op_name = protected_op_name + + op = function_map[op_name] + pset.addPrimitive(op.function, op.arity, name=op.name) + + # # Add constant + # if "const" in dataset.function_set: + # pset.addEphemeralConstant("const", lambda : random.uniform(const_range[0], const_range[1])) + + # Add constant + const = "const" in dataset.function_set + if const: + const_params = const_params if const_params is not None else {} + self.const_opt = make_const_optimizer(const_optimizer, **const_params) + pset.addTerminal(1.0, name="const") + + # Create custom fitness and individual classes + if self.pareto_front: + # Fitness it compared lexographically, so second dimension + # (complexity) is only used in selection if first dimension (error) + # is the same. + creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0)) + else: + creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) + creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin) + + # Define the evolutionary operators + self.toolbox = base.Toolbox() + self.toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2) + self.toolbox.register("individual", tools.initIterate, creator.Individual, self.toolbox.expr) + self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual) + self.toolbox.register("compile", gp.compile, pset=pset) + self.toolbox.register("evaluate", self.fitness) + self.toolbox.register("select", tools.selTournament, tournsize=tournament_size) + self.toolbox.register("mate", gp.cxOnePoint) + self.toolbox.register("expr_mut", gp.genFull, min_=0, max_=2) + self.toolbox.register('mutate', gp.mutUniform, expr=self.toolbox.expr_mut, pset=pset) + + # Define constraints, each defined by a func : gp.Individual -> bool. + # We decorate mutation/crossover operators with constrain, which + # replaces a child with a random parent if func(ind) is True. + constrain = partial(gp.staticLimit, max_value=0) # Constraint decorator + funcs = [] + if constrain_min_len: + funcs.append(constraints.make_check_min_len(min_length)) # Minimum length + if constrain_max_len: + funcs.append(constraints.make_check_max_len(max_length)) # Maximum length + if constrain_inv: + funcs.append(constraints.check_inv) # Subsequence inverse unary operators + if constrain_trig: + funcs.append(constraints.check_trig) # Nested trig operators + if constrain_const and const: + funcs.append(constraints.check_const) # All children are constants + if constrain_num_const and const: + funcs.append(constraints.make_check_num_const(max_const)) # Number of constants + for func in funcs: + for variation in ["mate", "mutate"]: + self.toolbox.decorate(variation, constrain(func)) + + # Create the training function + self.algorithm = algorithms.eaSimple + + + def compute_fitness(self, individual, fitness, X, optimize=False): + """Compute the given fitness function on an individual using X.""" + + if optimize: + # Retrieve symbolic constants + const_idxs = [i for i, node in enumerate(individual) if node.name == "const"] + + # Check if best individual (or any individual in Pareto front) has success=True + # (i.e. NMSE below threshold on noiseless test set) + if self.early_stopping and any([self.success(ind) for ind in self.hof]): + return (999,) + + if optimize and len(const_idxs) > 0: + + # Objective function for evaluating constants + def obj(consts): + for i, const in zip(const_idxs, consts): + individual[i] = gp.Terminal(const, False, object) + individual[i].name = "const" # For good measure + f = self.toolbox.compile(expr=individual) + y_hat = f(*X) + y = self.dataset.y_train + if np.isfinite(y_hat).all(): + # Squash error to prevent consts from becoming inf + return -1/(1 + np.mean((y - y_hat)**2)) + else: + return 0 + + # Do the optimization and set the optimized constants + x0 = np.ones(len(const_idxs)) + optimized_consts = self.const_opt(obj, x0) + for i, const in zip(const_idxs, optimized_consts): + individual[i] = gp.Terminal(const, False, object) + individual[i].name = "const" # This is necessary to ensure the constant is re-optimized in the next generation + + # Execute the individual + f = self.toolbox.compile(expr=individual) + with np.errstate(all="ignore"): + y_hat = f(*X) + + # Check for validity + if np.isfinite(y_hat).all(): + fitness = (fitness(y_hat=y_hat),) + else: + fitness = (np.inf,) + + # Compute complexity (only if using Pareto front) + if self.pareto_front: + complexity = sum([function_map[prim.name].complexity \ + if prim.name in function_map \ + else 1 for prim in individual]) + fitness += (complexity,) + + return fitness + + + def train(self): + """Train the GP""" + + if self.fitted: + raise RuntimeError("This GP has already been fitted!") + + random.seed(self.seed) + + pop = self.toolbox.population(n=self.population_size) + if self.pareto_front: + self.hof = tools.ParetoFront() + else: + self.hof = tools.HallOfFame(maxsize=1) + + stats_fit = tools.Statistics(lambda p : p.fitness.values[0]) + stats_fit.register("avg", np.mean) + stats_fit.register("min", np.min) + stats_size = tools.Statistics(len) + stats_size.register("avg", np.mean) + mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size) + + pop, logbook = self.algorithm(population=pop, + toolbox=self.toolbox, + cxpb=self.p_crossover, + mutpb=self.p_mutate, + ngen=self.generations, + stats=mstats, + halloffame=self.hof, + verbose=self.verbose) + + self.fitted = True + + # Delete custom classes + del creator.FitnessMin + del creator.Individual + if "const" in dir(gp): + del gp.const + + # The best individual is the first one in self.hof with success=True, + # otherwise the highest reward. This mimics DSR's train.py. + ind_best = None + for ind in self.hof: + if self.success(ind): + ind_best = ind + break + ind_best = ind_best if ind_best is not None else self.hof[0] # first element in self.hof is the fittest + + if self.verbose: + print("Printing {}:".format("Pareto front" if self.pareto_front else "hall of fame")) + print("Fitness | Individual") + for ind in self.hof: + print(ind.fitness, [token.name for token in ind]) + + return ind_best, logbook + + + def make_fitness(self, metric): + """Generates a fitness function by name""" + + if metric == "mse": + fitness = lambda y, y_hat, var_y : np.mean((y - y_hat)**2) + + elif metric == "rmse": + fitness = lambda y, y_hat, var_y : np.sqrt(np.mean((y - y_hat)**2)) + + elif metric == "nmse": + fitness = lambda y, y_hat, var_y : np.mean((y - y_hat)**2 / var_y) + + elif metric == "nrmse": + fitness = lambda y, y_hat, var_y : np.sqrt(np.mean((y - y_hat)**2 / var_y)) + + # Complementary inverse NMSE + elif metric == "cinv_nmse": + fitness = lambda y, y_hat, var_y : 1 - 1/(1 + np.mean((y - y_hat)**2 / var_y)) + + # Complementary inverse NRMSE + elif metric == "cinv_nrmse": + fitness = lambda y, y_hat, var_y : 1 - 1/(1 + np.sqrt(np.mean((y - y_hat)**2 / var_y))) + + else: + raise ValueError("Metric not recognized.") + + return fitness diff --git a/dsr/dsr/config.json b/dsr/dsr/config.json new file mode 100644 index 00000000..831c8707 --- /dev/null +++ b/dsr/dsr/config.json @@ -0,0 +1,99 @@ +{ + "task": { + "task_type" : "regression", + "name" : "Nguyen-1", + "function_set": null, + "dataset" : { + "name" : null, + "noise": null, + "dataset_size_multiplier": 1.0 + }, + "metric" : "inv_nrmse", + "metric_params" : [1.0], + "threshold" : 1e-12, + "protected" : false, + "reward_noise" : 0.0 + }, + "prior": { + "length" : {"min_" : 4, "max_" : 30}, + "repeat" : {"tokens" : "const", "max_" : 3}, + "inverse" : {}, + "trig" : {}, + "const" : {} + }, + "training": { + "logdir": "./log", + "n_epochs": null, + "n_samples": 2000000, + "batch_size": 1000, + "complexity": "length", + "complexity_weight": 0.0, + "const_optimizer": "scipy", + "const_params": {}, + "alpha": 0.5, + "epsilon": 0.05, + "verbose": true, + "baseline": "R_e", + "b_jumpstart": false, + "n_cores_batch": 1, + "summary": false, + "debug": 0, + "output_file": null, + "save_all_r": false, + "early_stopping": true, + "pareto_front": false, + "hof": 100 + }, + "controller": { + "cell": "lstm", + "num_layers": 1, + "num_units": 32, + "initializer": "zeros", + "embedding": false, + "embedding_size": 8, + "optimizer": "adam", + "learning_rate": 0.0005, + "observe_action": false, + "observe_parent": true, + "observe_sibling": true, + "entropy_weight": 0.005, + "ppo": false, + "ppo_clip_ratio": 0.2, + "ppo_n_iters": 10, + "ppo_n_mb": 4, + "pqt": false, + "pqt_k": 10, + "pqt_batch_size": 1, + "pqt_weight": 200.0, + "pqt_use_pg": false, + "max_length": 30 + }, + "gp": { + "population_size": 1000, + "generations": null, + "n_samples" : 2000000, + "tournament_size": 2, + "metric": "nmse", + "const_range": [ + -1.0, + 1.0 + ], + "p_crossover": 0.95, + "p_mutate": 0.03, + "seed": 0, + "early_stopping": true, + "pareto_front": false, + "threshold": 1e-12, + "verbose": false, + "protected": true, + "constrain_const": true, + "constrain_trig": true, + "constrain_inv": true, + "constrain_min_len": true, + "constrain_max_len": true, + "constrain_num_const": true, + "min_length": 4, + "max_length": 30, + "max_const" : 3 + } +} diff --git a/dsr/dsr/const.py b/dsr/dsr/const.py new file mode 100644 index 00000000..dd41cbf9 --- /dev/null +++ b/dsr/dsr/const.py @@ -0,0 +1,74 @@ +"""Constant optimizer used for deep symbolic regression.""" + +from functools import partial + +import numpy as np +from scipy.optimize import minimize + + +def make_const_optimizer(name, **kwargs): + """Returns a ConstOptimizer given a name and keyword arguments""" + + const_optimizers = { + None : Dummy, + "dummy" : Dummy, + "scipy" : ScipyMinimize, + } + + return const_optimizers[name](**kwargs) + + +class ConstOptimizer(object): + """Base class for constant optimizer""" + + def __init__(self, **kwargs): + self.kwargs = kwargs + + + def __call__(self, f, x0): + """ + Optimizes an objective function from an initial guess. + + The objective function is the negative of the base reward (reward + without penalty) used for training. Optimization excludes any penalties + because they are constant w.r.t. to the constants being optimized. + + Parameters + ---------- + f : function mapping np.ndarray to float + Objective function (negative base reward). + + x0 : np.ndarray + Initial guess for constant placeholders. + + Returns + ------- + x : np.ndarray + Vector of optimized constants. + """ + raise NotImplementedError + + +class Dummy(ConstOptimizer): + """Dummy class that selects the initial guess for each constant""" + + def __init__(self, **kwargs): + super(Dummy, self).__init__(**kwargs) + + + def __call__(self, f, x0): + return x0 + + +class ScipyMinimize(ConstOptimizer): + """SciPy's non-linear optimizer""" + + def __init__(self, **kwargs): + super(ScipyMinimize, self).__init__(**kwargs) + + + def __call__(self, f, x0): + with np.errstate(divide='ignore'): + opt_result = partial(minimize, **self.kwargs)(f, x0) + x = opt_result['x'] + return x diff --git a/dsr/dsr/controller.py b/dsr/dsr/controller.py new file mode 100644 index 00000000..1872c7f0 --- /dev/null +++ b/dsr/dsr/controller.py @@ -0,0 +1,666 @@ +"""Controller used to generate distribution over hierarchical, variable-length objects.""" + +import tensorflow as tf +import numpy as np + +from dsr.program import Program +from dsr.memory import Batch +from dsr.subroutines import parents_siblings +from dsr.prior import LengthConstraint + + +class LinearWrapper(tf.contrib.rnn.LayerRNNCell): + """ + RNNCell wrapper that adds a linear layer to the output. + + See: https://github.com/tensorflow/models/blob/master/research/brain_coder/single_task/pg_agent.py + """ + + def __init__(self, cell, output_size): + self.cell = cell + self._output_size = output_size + + def __call__(self, inputs, state, scope=None): + with tf.variable_scope(type(self).__name__): + outputs, state = self.cell(inputs, state, scope=scope) + logits = tf.layers.dense(outputs, units=self._output_size) + + return logits, state + + @property + def output_size(self): + return self._output_size + + @property + def state_size(self): + return self.cell.state_size + + def zero_state(self, batch_size, dtype): + return self.cell.zero_state(batch_size, dtype) + + +class Controller(object): + """ + Recurrent neural network (RNN) controller used to generate expressions. + + Specifically, the RNN outputs a distribution over pre-order traversals of + symbolic expression trees. It is trained using REINFORCE with baseline. + + Parameters + ---------- + sess : tf.Session + TenorFlow Session object. + + prior : dsr.prior.JointPrior + JointPrior object used to adjust probabilities during sampling. + + summary : bool + Write tensorboard summaries? + + debug : int + Debug level, also used in learn(). 0: No debug. 1: Print shapes and + number of parameters for each variable. + + cell : str + Recurrent cell to use. Supports 'lstm' and 'gru'. + + num_layers : int + Number of RNN layers. + + num_units : int or list of ints + Number of RNN cell units in each of the RNN's layers. If int, the value + is repeated for each layer. + + initiailizer : str + Initializer for the recurrent cell. Supports 'zeros' and 'var_scale'. + + embedding : bool + Embed each observation? + + embedding_size : int + Size of embedding for each observation if embedding=True. + + optimizer : str + Optimizer to use. Supports 'adam', 'rmsprop', and 'sgd'. + + learning_rate : float + Learning rate for optimizer. + + observe_action : bool + Observe previous action token? + + observe_parent : bool + Observe parent token? + + observe_sibling : bool + Observe sibling token? + + entropy_weight : float + Coefficient for entropy bonus. + + ppo : bool + Use proximal policy optimization (instead of vanilla policy gradient)? + + ppo_clip_ratio : float + Clip ratio to use for PPO. + + ppo_n_iters : int + Number of optimization iterations for PPO. + + ppo_n_mb : int + Number of minibatches per optimization iteration for PPO. + + pqt : bool + Train with priority queue training (PQT)? + + pqt_k : int + Size of priority queue. + + pqt_batch_size : int + Size of batch to sample (with replacement) from priority queue. + + pqt_weight : float + Coefficient for PQT loss function. + + pqt_use_pg : bool + Use policy gradient loss when using PQT? + + max_length : int or None + Maximum sequence length. This will be overridden if a LengthConstraint + with a maximum length is part of the prior. + + """ + + def __init__(self, sess, prior, debug=0, summary=True, + # RNN cell hyperparameters + cell='lstm', + num_layers=1, + num_units=32, + initializer='zeros', + # Embedding hyperparameters + embedding=False, + embedding_size=4, + # Optimizer hyperparameters + optimizer='adam', + learning_rate=0.001, + # Observation space hyperparameters + observe_action=True, + observe_parent=True, + observe_sibling=True, + # Loss hyperparameters + entropy_weight=0.0, + # PPO hyperparameters + ppo=False, + ppo_clip_ratio=0.2, + ppo_n_iters=10, + ppo_n_mb=4, + # PQT hyperparameters + pqt=False, + pqt_k=10, + pqt_batch_size=1, + pqt_weight=200.0, + pqt_use_pg=False, + # Other hyperparameters + max_length=None): + + self.sess = sess + self.prior = prior + self.summary = summary + self.rng = np.random.RandomState(0) # Used for PPO minibatch sampling + + lib = Program.library + + # Find max_length from the LengthConstraint prior, if it exists + prior_max_length = None + for single_prior in self.prior.priors: + if isinstance(single_prior, LengthConstraint): + if single_prior.max is not None: + prior_max_length = single_prior.max + self.max_length = prior_max_length + break + if prior_max_length is None: + assert max_length is not None, "max_length must be specified if "\ + "there is no LengthConstraint." + self.max_length = max_length + print("WARNING: Maximum length not constrained. Sequences will " + "stop at {} and complete by repeating the first input " + "variable.".format(self.max_length)) + elif max_length is not None and max_length != self.max_length: + print("WARNING: max_length ({}) will be overridden by value from " + "LengthConstraint ({}).".format(max_length, self.max_length)) + max_length = self.max_length + + # Hyperparameters + self.observe_parent = observe_parent + self.observe_sibling = observe_sibling + self.entropy_weight = entropy_weight + self.ppo = ppo + self.ppo_n_iters = ppo_n_iters + self.ppo_n_mb = ppo_n_mb + self.pqt = pqt + self.pqt_k = pqt_k + self.pqt_batch_size = pqt_batch_size + + n_choices = lib.L + + # Placeholders, computed after instantiating expressions + self.batch_size = tf.placeholder(dtype=tf.int32, shape=(), name="batch_size") + self.baseline = tf.placeholder(dtype=tf.float32, shape=(), name="baseline") + + # Parameter assertions/warnings + assert observe_action + observe_parent + observe_sibling > 0, "Must include at least one observation." + + self.compute_parents_siblings = any([self.observe_parent, + self.observe_sibling, + self.prior.requires_parents_siblings]) + + # Build controller RNN + with tf.name_scope("controller"): + + def make_initializer(name): + if name == "zeros": + return tf.zeros_initializer() + if name == "var_scale": + return tf.contrib.layers.variance_scaling_initializer( + factor=0.5, mode='FAN_AVG', uniform=True, seed=0) + raise ValueError("Did not recognize initializer '{}'".format(name)) + + def make_cell(name, num_units, initializer): + if name == 'lstm': + return tf.nn.rnn_cell.LSTMCell(num_units, initializer=initializer) + if name == 'gru': + return tf.nn.rnn_cell.GRUCell(num_units, kernel_initializer=initializer, bias_initializer=initializer) + raise ValueError("Did not recognize cell type '{}'".format(name)) + + # Create recurrent cell + if isinstance(num_units, int): + num_units = [num_units] * num_layers + initializer = make_initializer(initializer) + cell = tf.contrib.rnn.MultiRNNCell( + [make_cell(cell, n, initializer=initializer) for n in num_units]) + cell = LinearWrapper(cell=cell, output_size=n_choices) + + # Define input dimensions + n_action_inputs = n_choices + 1 # lib tokens + empty token + n_parent_inputs = n_choices + 1 - len(lib.terminal_tokens) # Parent sub-lib tokens + empty token + n_sibling_inputs = n_choices + 1 # lib tokens + empty tokens + + # Create embeddings + if embedding: + with tf.variable_scope("embeddings", + initializer=tf.random_uniform_initializer(minval=-1.0, maxval=1.0, seed=0)): + if observe_action: + action_embeddings = tf.get_variable("action_embeddings", [n_action_inputs, embedding_size], trainable=True) + if observe_parent: + parent_embeddings = tf.get_variable("parent_embeddings", [n_parent_inputs, embedding_size], trainable=True) + if observe_sibling: + sibling_embeddings = tf.get_variable("sibling_embeddings", [n_sibling_inputs, embedding_size], trainable=True) + + # First observation is all empty tokens + initial_obs = tuple() + for n in [n_action_inputs, n_parent_inputs, n_sibling_inputs]: + obs = tf.constant(n - 1, dtype=np.int32) + obs = tf.broadcast_to(obs, [self.batch_size]) + initial_obs += (obs,) + + # Get initial prior + initial_prior = self.prior.initial_prior() + initial_prior = tf.constant(initial_prior, dtype=tf.float32) + prior_dims = tf.stack([self.batch_size, n_choices]) + initial_prior = tf.broadcast_to(initial_prior, prior_dims) + # arities = np.array([Program.arities[i] for i in range(n_choices)]) + # prior = np.zeros(n_choices, dtype=np.float32) + # if self.min_length is not None and self.min_length > 1: + # prior[arities == 0] = -np.inf + # prior = tf.constant(prior, dtype=tf.float32) + # prior_dims = tf.stack([self.batch_size, n_choices]) + # prior = tf.broadcast_to(prior, prior_dims) + # initial_prior = prior + + + # Returns concatenated one-hot or embeddings from observation tokens + # Used for both raw_rnn and dynamic_rnn + def get_input(obs): + action, parent, sibling = obs + observations = [] + if observe_action: + if embedding: + obs = tf.nn.embedding_lookup(action_embeddings, action) + else: + obs = tf.one_hot(action, depth=n_action_inputs) + observations.append(obs) + if observe_parent: + if embedding: + obs = tf.nn.embedding_lookup(parent_embeddings, parent) + else: + obs = tf.one_hot(parent, depth=n_parent_inputs) + observations.append(obs) + if observe_sibling: + if embedding: + obs = tf.nn.embedding_lookup(sibling_embeddings, sibling) + else: + obs = tf.one_hot(sibling, depth=n_sibling_inputs) + observations.append(obs) + input_ = tf.concat(observations, -1) + return input_ + + + # Applies constraints + def get_action_parent_sibling_prior_dangling(actions, dangling): + n = actions.shape[0] # Batch size + i = actions.shape[1] - 1 # Current index + action = actions[:, -1] # Current action + + # Depending on the constraints, may need to compute parents and siblings + if self.compute_parents_siblings: + parent, sibling = parents_siblings(actions, arities=lib.arities, parent_adjust=lib.parent_adjust) + else: + parent = np.zeros(n, dtype=np.int32) + sibling = np.zeros(n, dtype=np.int32) + + # Update dangling with (arity - 1) for each element in action + dangling += lib.arities[action] - 1 + + prior = self.prior(actions, parent, sibling, dangling) + + return action, parent, sibling, prior, dangling + + + # Given the actions chosen so far, return the observation, the prior, and the updated dangling + # Uses py_func to retrieve action/parent/sibling/dangling + def get_next_obs_prior_dangling(actions_ta, dangling): + + # Get current action batch + actions = tf.transpose(actions_ta.stack()) # Shape: (?, time) + + # Compute parent, sibling, prior, and dangling + action, parent, sibling, prior, dangling = tf.py_func(func=get_action_parent_sibling_prior_dangling, + inp=[actions, dangling], + Tout=[tf.int32, tf.int32, tf.int32, tf.float32, tf.int32]) + + # Observe previous action, parent, and/or sibling + obs = (action, parent, sibling) + + # Set the shapes for returned Tensors + action.set_shape([None]) + parent.set_shape([None]) + sibling.set_shape([None]) + prior.set_shape([None, lib.L]) + dangling.set_shape([None]) + + return obs, prior, dangling + + + # Define loop function to be used by tf.nn.raw_rnn. + initial_cell_input = get_input(initial_obs) + def loop_fn(time, cell_output, cell_state, loop_state): + + if cell_output is None: # time == 0 + finished = tf.zeros(shape=[self.batch_size], dtype=tf.bool) + obs = initial_obs + next_input = get_input(obs) + next_cell_state = cell.zero_state(batch_size=self.batch_size, dtype=tf.float32) # 2-tuple, each shape (?, num_units) + emit_output = None + actions_ta = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=False) # Read twice + obs_tas = (tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=True), # Action inputs + tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=True), # Parent inputs + tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=True)) # Sibling inputs + priors_ta = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True, clear_after_read=True) + prior = initial_prior + lengths = tf.ones(shape=[self.batch_size], dtype=tf.int32) + dangling = tf.ones(shape=[self.batch_size], dtype=tf.int32) + next_loop_state = ( + actions_ta, + obs_tas, + priors_ta, + obs, + prior, + dangling, + lengths, # Unused until implementing variable length + finished) + else: + actions_ta, obs_tas, priors_ta, obs, prior, dangling, lengths, finished = loop_state + logits = cell_output + prior + next_cell_state = cell_state + emit_output = logits + action = tf.multinomial(logits=logits, num_samples=1, output_dtype=tf.int32, seed=1)[:, 0] + # When implementing variable length: + # action = tf.where( + # tf.logical_not(finished), + # tf.multinomial(logits=logits, num_samples=1, output_dtype=tf.int32)[:, 0], + # tf.zeros(shape=[self.batch_size], dtype=tf.int32)) + next_actions_ta = actions_ta.write(time - 1, action) # Write chosen actions + next_obs, next_prior, next_dangling = get_next_obs_prior_dangling(next_actions_ta, dangling) + next_input = get_input(next_obs) + next_obs_tas = ( # Write OLD observation + obs_tas[0].write(time - 1, obs[0]), # Action inputs + obs_tas[1].write(time - 1, obs[1]), # Parent inputs + obs_tas[2].write(time - 1, obs[2])) # Sibling inputs + next_priors_ta = priors_ta.write(time - 1, prior) # Write OLD prior + finished = next_finished = tf.logical_or( + finished, + time >= max_length) + # When implementing variable length: + # finished = next_finished = tf.logical_or(tf.logical_or( + # finished, # Already finished + # next_dangling == 0), # Currently, this will be 0 not just the first time, but also at max_length + # time >= max_length) + next_lengths = tf.where( + finished, # Ever finished + lengths, + tf.tile(tf.expand_dims(time + 1, 0), [self.batch_size])) + next_loop_state = (next_actions_ta, + next_obs_tas, + next_priors_ta, + next_obs, + next_prior, + next_dangling, + next_lengths, + next_finished) + + return (finished, next_input, next_cell_state, emit_output, next_loop_state) + + # Returns RNN emit outputs TensorArray (i.e. logits), final cell state, and final loop state + with tf.variable_scope('policy'): + _, _, loop_state = tf.nn.raw_rnn(cell=cell, loop_fn=loop_fn) + actions_ta, obs_tas, priors_ta, _, _, _, _, _ = loop_state + + self.actions = tf.transpose(actions_ta.stack(), perm=[1, 0]) # (?, max_length) + self.obs = [tf.transpose(obs_ta.stack(), perm=[1, 0]) for obs_ta in obs_tas] # [(?, max_length)] * 3 + self.priors = tf.transpose(priors_ta.stack(), perm=[1, 0, 2]) # (?, max_length, n_choices) + + + # Generates dictionary containing placeholders needed for a batch of sequences + def make_batch_ph(name): + with tf.name_scope(name): + batch_ph = { + "actions" : tf.placeholder(tf.int32, [None, max_length]), + "obs" : (tf.placeholder(tf.int32, [None, max_length]), + tf.placeholder(tf.int32, [None, max_length]), + tf.placeholder(tf.int32, [None, max_length])), + "priors" : tf.placeholder(tf.float32, [None, max_length, n_choices]), + "lengths" : tf.placeholder(tf.int32, [None,]), + "rewards" : tf.placeholder(tf.float32, [None], name="r") + } + batch_ph = Batch(**batch_ph) + + return batch_ph + + def safe_cross_entropy(p, logq, axis=-1): + safe_logq = tf.where(tf.equal(p, 0.), tf.ones_like(logq), logq) + return - tf.reduce_sum(p * safe_logq, axis) + + # Generates tensor for neglogp of a given batch + def make_neglogp_and_entropy(B): + with tf.variable_scope('policy', reuse=True): + logits, _ = tf.nn.dynamic_rnn(cell=cell, + inputs=get_input(B.obs), + sequence_length=B.lengths, # Backpropagates only through sequence length + dtype=tf.float32) + logits += B.priors + probs = tf.nn.softmax(logits) + logprobs = tf.nn.log_softmax(logits) + + # Generate mask from sequence lengths + # NOTE: Using this mask for neglogp and entropy actually does NOT + # affect training because gradients are zero outside the lengths. + # However, the mask makes tensorflow summaries accurate. + mask = tf.sequence_mask(B.lengths, maxlen=max_length, dtype=tf.float32) + + # Negative log probabilities of sequences + actions_one_hot = tf.one_hot(B.actions, depth=n_choices, axis=-1, dtype=tf.float32) + neglogp_per_step = safe_cross_entropy(actions_one_hot, logprobs, axis=2) # Sum over action dim + neglogp = tf.reduce_sum(neglogp_per_step * mask, axis=1) # Sum over time dim + + # NOTE 1: The above implementation is the same as the one below: + # neglogp_per_step = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=actions) + # neglogp = tf.reduce_sum(neglogp_per_step, axis=1) # Sum over time + # NOTE 2: The above implementation is also the same as the one below, with a few caveats: + # Exactly equivalent when removing priors. + # Equivalent up to precision when including clipped prior. + # Crashes when prior is not clipped due to multiplying zero by -inf. + # neglogp_per_step = -tf.nn.log_softmax(logits + tf.clip_by_value(priors, -2.4e38, 0)) * actions_one_hot + # neglogp_per_step = tf.reduce_sum(neglogp_per_step, axis=2) + # neglogp = tf.reduce_sum(neglogp_per_step, axis=1) # Sum over time + + entropy_per_step = safe_cross_entropy(probs, logprobs, axis=2) # Sum over action dim -> (batch_size, max_length) + entropy = tf.reduce_sum(entropy_per_step * mask, axis=1) # Sum over time dim -> (batch_size, ) + + return neglogp, entropy + + + # On policy batch + self.sampled_batch_ph = make_batch_ph("sampled_batch") + + # Memory batch + self.memory_batch_ph = make_batch_ph("memory_batch") + memory_neglogp, _ = make_neglogp_and_entropy(self.memory_batch_ph) + self.memory_probs = tf.exp(-memory_neglogp) + self.memory_logps = -memory_neglogp + + # PQT batch + if pqt: + self.pqt_batch_ph = make_batch_ph("pqt_batch") + + # Setup losses + with tf.name_scope("losses"): + + neglogp, entropy = make_neglogp_and_entropy(self.sampled_batch_ph) + r = self.sampled_batch_ph.rewards + + # Entropy loss + entropy_loss = -self.entropy_weight * tf.reduce_mean(entropy, name="entropy_loss") + loss = entropy_loss + + # PPO loss + if ppo: + assert not pqt, "PPO is not compatible with PQT" + + self.old_neglogp_ph = tf.placeholder(dtype=tf.float32, shape=(None,), name="old_neglogp") + ratio = tf.exp(self.old_neglogp_ph - neglogp) + clipped_ratio = tf.clip_by_value(ratio, 1. - ppo_clip_ratio, 1. + ppo_clip_ratio) + ppo_loss = -tf.reduce_mean(tf.minimum(ratio * (r - self.baseline), clipped_ratio * (r - self.baseline))) + loss += ppo_loss + + # Define PPO diagnostics + clipped = tf.logical_or(ratio < (1. - ppo_clip_ratio), ratio > 1. + ppo_clip_ratio) + self.clip_fraction = tf.reduce_mean(tf.cast(clipped, tf.float32)) + self.sample_kl = tf.reduce_mean(neglogp - self.old_neglogp_ph) + + # Policy gradient loss + else: + if not pqt or (pqt and pqt_use_pg): + pg_loss = tf.reduce_mean((r - self.baseline) * neglogp, name="pg_loss") + loss += pg_loss + + # Priority queue training loss + if pqt: + pqt_neglogp, _ = make_neglogp_and_entropy(self.pqt_batch_ph) + pqt_loss = pqt_weight * tf.reduce_mean(pqt_neglogp, name="pqt_loss") + loss += pqt_loss + + self.loss = loss + + def make_optimizer(name, learning_rate): + if name == "adam": + return tf.train.AdamOptimizer(learning_rate=learning_rate) + if name == "rmsprop": + return tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=0.99) + if name == "sgd": + return tf.train.GradientDescentOptimizer(learning_rate=learning_rate) + raise ValueError("Did not recognize optimizer '{}'".format(name)) + + # Create training op + optimizer = make_optimizer(name=optimizer, learning_rate=learning_rate) + with tf.name_scope("train"): + self.grads_and_vars = optimizer.compute_gradients(self.loss) + self.train_op = optimizer.apply_gradients(self.grads_and_vars) + # The two lines above are equivalent to: + # self.train_op = optimizer.minimize(self.loss) + with tf.name_scope("grad_norm"): + self.grads, _ = list(zip(*self.grads_and_vars)) + self.norms = tf.global_norm(self.grads) + + if debug >= 1: + total_parameters = 0 + print("") + for variable in tf.trainable_variables(): + shape = variable.get_shape() + n_parameters = np.product(shape) + total_parameters += n_parameters + print("Variable: ", variable.name) + print(" Shape: ", shape) + print(" Parameters:", n_parameters) + print("Total parameters:", total_parameters) + + # Create summaries + with tf.name_scope("summary"): + if self.summary: + if ppo: + tf.summary.scalar("ppo_loss", ppo_loss) + else: + if not pqt or (pqt and pqt_use_pg): + tf.summary.scalar("pg_loss", pg_loss) + if pqt: + tf.summary.scalar("pqt_loss", pqt_loss) + tf.summary.scalar("entropy_loss", entropy_loss) + tf.summary.scalar("total_loss", self.loss) + tf.summary.scalar("reward", tf.reduce_mean(r)) + tf.summary.scalar("baseline", self.baseline) + tf.summary.histogram("reward", r) + tf.summary.histogram("length", self.sampled_batch_ph.lengths) + for g, v in self.grads_and_vars: + tf.summary.histogram(v.name, v) + tf.summary.scalar(v.name + '_norm', tf.norm(v)) + tf.summary.histogram(v.name + '_grad', g) + tf.summary.scalar(v.name + '_grad_norm', tf.norm(g)) + tf.summary.scalar('gradient norm', self.norms) + self.summaries = tf.summary.merge_all() + + def sample(self, n): + """Sample batch of n expressions""" + + feed_dict = {self.batch_size : n} + + actions, obs, priors = self.sess.run([self.actions, self.obs, self.priors], feed_dict=feed_dict) + + return actions, obs, priors + + + def compute_probs(self, memory_batch, log=False): + """Compute the probabilities of a Batch.""" + + feed_dict = { + self.memory_batch_ph : memory_batch + } + + if log: + fetch = self.memory_logps + else: + fetch = self.memory_probs + probs = self.sess.run([fetch], feed_dict=feed_dict)[0] + return probs + + + def train_step(self, b, sampled_batch, pqt_batch): + """Computes loss, trains model, and returns summaries.""" + + feed_dict = { + self.baseline : b, + self.sampled_batch_ph : sampled_batch + } + + if self.pqt: + feed_dict.update({ + self.pqt_batch_ph : pqt_batch + }) + + if self.ppo: + # Compute old_neglogp to be used for training + old_neglogp = self.sess.run(self.neglogp, feed_dict=feed_dict) + + # Perform multiple epochs of minibatch training + feed_dict[self.old_neglogp_ph] = old_neglogp + indices = np.arange(len(r)) + for epoch in range(self.ppo_n_iters): + self.rng.shuffle(indices) + minibatches = np.array_split(indices, self.ppo_n_mb) + for i, mb in enumerate(minibatches): + mb_feed_dict = {k : v[mb] for k, v in feed_dict.items() if k not in [self.baseline, self.batch_size]} + mb_feed_dict.update({ + self.baseline : b, + self.batch_size : len(mb) + }) + + _ = self.sess.run([self.train_op], feed_dict=mb_feed_dict) + + else: + _ = self.sess.run([self.train_op], feed_dict=feed_dict) + + # Return summaries + if self.summary: + summaries = self.sess.run(self.summaries, feed_dict=feed_dict) + else: + summaries = None + + return summaries diff --git a/dsr/dsr/core.py b/dsr/dsr/core.py new file mode 100644 index 00000000..daec11cd --- /dev/null +++ b/dsr/dsr/core.py @@ -0,0 +1,126 @@ +"""Core deep symbolic optimizer construct.""" + +import json +import zlib +from collections import defaultdict +from multiprocessing import Pool + +import tensorflow as tf + +from dsr.task import set_task +from dsr.controller import Controller +from dsr.train import learn +from dsr.prior import make_prior +from dsr.program import Program + + +class DeepSymbolicOptimizer(): + """ + Deep symbolic optimization model. Includes model hyperparameters and + training configuration. + + Parameters + ---------- + config : dict or str + Config dictionary or path to JSON. See dsr/dsr/config.json for template. + + Attributes + ---------- + config : dict + Configuration parameters for training. + + Methods + ------- + train + Builds and trains the model according to config. + """ + + def __init__(self, config=None): + self.update_config(config) + self.sess = None + + def setup(self, seed=0): + + # Clear the cache, reset the compute graph, and set the seed + Program.clear_cache() + tf.reset_default_graph() + self.seed(seed) # Must be called _after_ resetting graph + + self.pool = self.make_pool() + self.sess = tf.Session() + self.prior = self.make_prior() + self.controller = self.make_controller() + + def train(self, seed=0): + + # Setup the model + self.setup(seed) + + # Train the model + result = learn(self.sess, + self.controller, + self.pool, + **self.config_training) + return result + + def update_config(self, config): + if config is None: + config = {} + elif isinstance(config, str): + with open(config, 'rb') as f: + config = json.load(f) + + self.config = defaultdict(dict, config) + self.config_task = self.config["task"] + self.config_prior = self.config["prior"] + self.config_training = self.config["training"] + self.config_controller = self.config["controller"] + + def seed(self, seed_=0): + """Set the tensorflow seed, which will be offset by a checksum on the + task name to ensure seeds differ across different tasks.""" + + if "name" in self.config_task: + task_name = self.config_task["name"] + else: + task_name = "" + seed_ += zlib.adler32(task_name.encode("utf-8")) + tf.set_random_seed(seed_) + + return seed_ + + def make_prior(self): + prior = make_prior(Program.library, self.config_prior) + return prior + + def make_controller(self): + controller = Controller(self.sess, + self.prior, + **self.config_controller) + return controller + + def make_pool(self): + # Create the pool and set the Task for each worker + pool = None + n_cores_batch = self.config_training.get("n_cores_batch") + if n_cores_batch is not None and n_cores_batch > 1: + pool = Pool(n_cores_batch, + initializer=set_task, + initargs=(self.config_task,)) + + # Set the Task for the parent process + set_task(self.config_task) + + return pool + + def save(self, save_path): + + saver = tf.train.Saver() + saver.save(self.sess, save_path) + + def load(self, load_path): + + if self.sess is None: + self.setup() + saver = tf.train.Saver() + saver.restore(self.sess, load_path) diff --git a/dsr/dsr/cyfunc.pyx b/dsr/dsr/cyfunc.pyx new file mode 100644 index 00000000..11ebd6b5 --- /dev/null +++ b/dsr/dsr/cyfunc.pyx @@ -0,0 +1,90 @@ +''' +# cython: linetrace=True +# distutils: define_macros=CYTHON_TRACE_NOGIL=1 +''' +# Uncomment the above lines for cProfile + +import numpy as np +import array + +# Cython specific C imports +cimport numpy as np +from cpython cimport array +cimport cython +from libc.stdlib cimport malloc, free +from cpython.ref cimport PyObject + +# Static inits +cdef list apply_stack = [[None for i in range(25)] for i in range(1024)] +cdef int *stack_count = malloc(1024 * sizeof(int)) + +@cython.boundscheck(False) # turn off bounds-checking for entire function +@cython.wraparound(False) # turn off negative index wrapping for entire function +def execute(np.ndarray X, int len_traversal, list traversal, int[:] is_input_var): + + """Executes the program according to X. + + Parameters + ---------- + X : array-like, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of features. + + Returns + ------- + y_hats : array-like, shape = [n_samples] + The result of executing the program on X. + """ + #sp = 0 # allow a dummy first row, requires a none type function with arity of -1 + + # Init some ints + cdef int sp = -1 # Stack pointer + cdef int Xs = X.shape[0] + + # Give cdef hints for object types + cdef int i + cdef int n + cdef int arity + cdef np.ndarray intermediate_result + cdef list stack_end + cdef object stack_end_function + + for i in range(len_traversal): + + if not is_input_var[i]: + sp += 1 + # Move this to the front with a memset call + stack_count[sp] = 0 + # Store the reference to stack_count[sp] rather than keep calling + apply_stack[sp][stack_count[sp]] = traversal[i] + stack_end = apply_stack[sp] + # The first element is the function itself + stack_end_function = stack_end[0] + arity = stack_end_function.arity + else: + # Not a function, so lazily evaluate later + stack_count[sp] += 1 + stack_end[stack_count[sp]] = X[:, traversal[i].input_var] + + # Keep on doing this so long as arity matches up, we can + # add in numbers above and complete the arity later. + while stack_count[sp] == arity: + intermediate_result = stack_end_function(*stack_end[1:(stack_count[sp] + 1)]) # 85% of overhead + + # I think we can get rid of this line, but will require a major rewrite. + if sp == 0: + return intermediate_result + + sp -= 1 + # Adjust pointer at the end of the stack + stack_end = apply_stack[sp] + stack_count[sp] += 1 + stack_end[stack_count[sp]] = intermediate_result + + # The first element is the function itself + stack_end_function = stack_end[0] + arity = stack_end_function.arity + + # We should never get here + assert False, "Function should never get here!" + return None diff --git a/dsr/dsr/functions.py b/dsr/dsr/functions.py new file mode 100644 index 00000000..705eb602 --- /dev/null +++ b/dsr/dsr/functions.py @@ -0,0 +1,195 @@ +"""Common Tokens used for executable Programs.""" + +import numpy as np +from fractions import Fraction + +from dsr.library import Token, PlaceholderConstant + +GAMMA = 0.57721566490153286060651209008240243104215933593992 + + +"""Define custom unprotected operators""" +def logabs(x1): + """Closure of log for non-positive arguments.""" + return np.log(np.abs(x1)) + +def expneg(x1): + return np.exp(-x1) + +def n3(x1): + return np.power(x1, 3) + +def n4(x1): + return np.power(x1, 4) + +def sigmoid(x1): + return 1 / (1 + np.exp(-x1)) + +def harmonic(x1): + if all(val.is_integer() for val in x1): + return np.array([sum(Fraction(1, d) for d in range(1, int(val)+1)) for val in x1], dtype=np.float32) + else: + return GAMMA + np.log(x1) + 0.5/x1 - 1./(12*x1**2) + 1./(120*x1**4) + + +# Annotate unprotected ops +unprotected_ops = [ + # Binary operators + Token(np.add, "add", arity=2, complexity=1), + Token(np.subtract, "sub", arity=2, complexity=1), + Token(np.multiply, "mul", arity=2, complexity=1), + Token(np.divide, "div", arity=2, complexity=2), + + # Built-in unary operators + Token(np.sin, "sin", arity=1, complexity=3), + Token(np.cos, "cos", arity=1, complexity=3), + Token(np.tan, "tan", arity=1, complexity=4), + Token(np.exp, "exp", arity=1, complexity=4), + Token(np.log, "log", arity=1, complexity=4), + Token(np.sqrt, "sqrt", arity=1, complexity=4), + Token(np.square, "n2", arity=1, complexity=2), + Token(np.negative, "neg", arity=1, complexity=1), + Token(np.abs, "abs", arity=1, complexity=2), + Token(np.maximum, "max", arity=1, complexity=4), + Token(np.minimum, "min", arity=1, complexity=4), + Token(np.tanh, "tanh", arity=1, complexity=4), + Token(np.reciprocal, "inv", arity=1, complexity=2), + + # Custom unary operators + Token(logabs, "logabs", arity=1, complexity=4), + Token(expneg, "expneg", arity=1, complexity=4), + Token(n3, "n3", arity=1, complexity=3), + Token(n4, "n4", arity=1, complexity=3), + Token(sigmoid, "sigmoid", arity=1, complexity=4), + Token(harmonic, "harmonic", arity=1, complexity=4) +] + + +"""Define custom protected operators""" +def protected_div(x1, x2): + with np.errstate(divide='ignore', invalid='ignore', over='ignore'): + return np.where(np.abs(x2) > 0.001, np.divide(x1, x2), 1.) + +def protected_exp(x1): + with np.errstate(over='ignore'): + return np.where(x1 < 100, np.exp(x1), 0.0) + +def protected_log(x1): + """Closure of log for non-positive arguments.""" + with np.errstate(divide='ignore', invalid='ignore'): + return np.where(np.abs(x1) > 0.001, np.log(np.abs(x1)), 0.) + +def protected_sqrt(x1): + """Closure of sqrt for negative arguments.""" + return np.sqrt(np.abs(x1)) + +def protected_inv(x1): + """Closure of inverse for zero arguments.""" + with np.errstate(divide='ignore', invalid='ignore'): + return np.where(np.abs(x1) > 0.001, 1. / x1, 0.) + +def protected_expneg(x1): + with np.errstate(over='ignore'): + return np.where(x1 > -100, np.exp(-x1), 0.0) + +def protected_n2(x1): + with np.errstate(over='ignore'): + return np.where(np.abs(x1) < 1e6, np.square(x1), 0.0) + +def protected_n3(x1): + with np.errstate(over='ignore'): + return np.where(np.abs(x1) < 1e6, np.power(x1, 3), 0.0) + +def protected_n4(x1): + with np.errstate(over='ignore'): + return np.where(np.abs(x1) < 1e6, np.power(x1, 4), 0.0) + +def protected_sigmoid(x1): + return 1 / (1 + protected_expneg(x1)) + +# Annotate protected ops +protected_ops = [ + # Protected binary operators + Token(protected_div, "div", arity=2, complexity=2), + + # Protected unary operators + + Token(protected_exp, "exp", arity=1, complexity=4), + Token(protected_log, "log", arity=1, complexity=4), + Token(protected_log, "logabs", arity=1, complexity=4), # Protected logabs is support, but redundant + Token(protected_sqrt, "sqrt", arity=1, complexity=4), + Token(protected_inv, "inv", arity=1, complexity=2), + Token(protected_expneg, "expneg", arity=1, complexity=4), + Token(protected_n2, "n2", arity=1, complexity=2), + Token(protected_n3, "n3", arity=1, complexity=3), + Token(protected_n4, "n4", arity=1, complexity=3), + Token(protected_sigmoid, "sigmoid", arity=1, complexity=4) +] + +# Add unprotected ops to function map +function_map = { + op.name : op for op in unprotected_ops + } + +# Add protected ops to function map +function_map.update({ + "protected_{}".format(op.name) : op for op in protected_ops + }) + +UNARY_TOKENS = set([op.name for op in function_map.values() if op.arity == 1]) +BINARY_TOKENS = set([op.name for op in function_map.values() if op.arity == 2]) + + +def create_tokens(n_input_var, function_set, protected): + """ + Helper function to create Tokens. + + Parameters + ---------- + n_input_var : int + Number of input variable Tokens. + + function_set : list + Names of registered Tokens, or floats that will create new Tokens. + + protected : bool + Whether to use protected versions of registered Tokens. + """ + + tokens = [] + + # Create input variable Tokens + for i in range(n_input_var): + token = Token(name="x{}".format(i + 1), arity=0, complexity=1, + function=None, input_var=i) + tokens.append(token) + + for op in function_set: + + # Registered Token + if op in function_map: + # Overwrite available protected operators + if protected and not op.startswith("protected_"): + protected_op = "protected_{}".format(op) + if protected_op in function_map: + op = protected_op + + token = function_map[op] + + # Hard-coded floating-point constant + elif isinstance(op, float) or isinstance(op, int): + name = str(op) + value = np.atleast_1d(np.float32(op)) + function = lambda : value + token = Token(name=name, arity=0, complexity=1, function=function) + + # Constant placeholder (to-be-optimized) + elif op == "const": + token = PlaceholderConstant() + + else: + raise ValueError("Operation {} not recognized.".format(op)) + + tokens.append(token) + + return tokens diff --git a/dsr/dsr/library.py b/dsr/dsr/library.py new file mode 100644 index 00000000..e016e4f5 --- /dev/null +++ b/dsr/dsr/library.py @@ -0,0 +1,196 @@ +"""Classes for Token and Library""" + +from collections import defaultdict + +import numpy as np + + +class Token(): + """ + An arbitrary token or "building block" of a Program object. + + Attributes + ---------- + name : str + Name of token. + + arity : int + Arity (number of arguments) of token. + + complexity : float + Complexity of token. + + function : callable + Function associated with the token; used for exectuable Programs. + + input_var : int or None + Index of input if this Token is an input variable, otherwise None. + + Methods + ------- + __call__(input) + Call the Token's function according to input. + """ + + def __init__(self, function, name, arity, complexity, input_var=None): + self.function = function + self.name = name + self.arity = arity + self.complexity = complexity + self.input_var = input_var + + if input_var is not None: + assert function is None, "Input variables should not have functions." + assert arity == 0, "Input variables should have arity zero." + + def __call__(self, *args): + assert self.function is not None, \ + "Token {} is not callable.".format(self.name) + + return self.function(*args) + + def __repr__(self): + return self.name + + +class PlaceholderConstant(Token): + """ + A Token for placeholder constants that will be optimized with respect to + the reward function. The function simply returns the "value" attribute. + + Parameters + ---------- + value : float or None + Current value of the constant, or None if not yet set. + """ + + def __init__(self, value=None): + if value is not None: + value = np.atleast_1d(value) + self.value = value + + def function(): + assert self.value is not None, \ + "Constant is not callable with value None." + return self.value + + super().__init__(function=function, name="const", arity=0, complexity=1) + + def __repr__(self): + if self.value is None: + return self.name + return str(self.value[0]) + + +class Library(): + """ + Library of Tokens. We use a list of Tokens (instead of set or dict) since + we so often index by integers given by the Controller. + + Attributes + ---------- + tokens : list of Token + List of available Tokens in the library. + + names : list of str + Names corresponding to Tokens in the library. + + arities : list of int + Arities corresponding to Tokens in the library. + """ + + def __init__(self, tokens): + + self.tokens = tokens + self.L = len(tokens) + self.names = [t.name for t in tokens] + self.arities = np.array([t.arity for t in tokens], dtype=np.int32) + + self.input_tokens = np.array( + [i for i, t in enumerate(self.tokens) if t.input_var is not None], + dtype=np.int32) + + def get_tokens_of_arity(arity): + _tokens = [i for i in range(self.L) if self.arities[i] == arity] + return np.array(_tokens, dtype=np.int32) + + self.tokens_of_arity = defaultdict(lambda : np.array([], dtype=np.int32)) + for arity in self.arities: + self.tokens_of_arity[arity] = get_tokens_of_arity(arity) + self.terminal_tokens = self.tokens_of_arity[0] + self.unary_tokens = self.tokens_of_arity[1] + self.binary_tokens = self.tokens_of_arity[2] + + try: + self.const_token = self.names.index("const") + except ValueError: + self.const_token = None + self.parent_adjust = np.full_like(self.arities, -1) + count = 0 + for i in range(len(self.arities)): + if self.arities[i] > 0: + self.parent_adjust[i] = count + count += 1 + + trig_names = ["sin", "cos", "tan", "csc", "sec", "cot"] + trig_names += ["arc" + name for name in trig_names] + + self.float_tokens = np.array( + [i for i, t in enumerate(self.tokens) if t.arity == 0 and t.input_var is None], + dtype=np.int32) + self.trig_tokens = np.array( + [i for i, t in enumerate(self.tokens) if t.name in trig_names], + dtype=np.int32) + + inverse_tokens = { + "inv" : "inv", + "neg" : "neg", + "exp" : "log", + "log" : "exp", + "sqrt" : "n2", + "n2" : "sqrt" + } + token_from_name = {t.name : i for i, t in enumerate(self.tokens)} + self.inverse_tokens = {token_from_name[k] : token_from_name[v] for k, v in inverse_tokens.items() if k in token_from_name and v in token_from_name} + + def __getitem__(self, val): + """Shortcut to get Token by name or index.""" + + if isinstance(val, str): + try: + i = self.names.index(val) + except ValueError: + raise TokenNotFoundError("Token {} does not exist.".format(val)) + elif isinstance(val, (int, np.integer)): + i = val + else: + raise TokenNotFoundError("Library must be indexed by str or int, not {}.".format(type(val))) + + try: + token = self.tokens[i] + except IndexError: + raise TokenNotFoundError("Token index {} does not exist".format(i)) + return token + + def tokenize(self, inputs): + """Convert inputs to list of Tokens.""" + + if isinstance(inputs, str): + inputs = inputs.split(',') + elif not isinstance(inputs, list) and not isinstance(inputs, np.ndarray): + inputs = [inputs] + tokens = [input_ if isinstance(input_, Token) else self[input_] for input_ in inputs] + return tokens + + def actionize(self, inputs): + """Convert inputs to array of 'actions', i.e. ints corresponding to + Tokens in the Library.""" + + tokens = self.tokenize(inputs) + actions = np.array([self.tokens.index(t) for t in tokens], + dtype=np.int32) + return actions + + +class TokenNotFoundError(Exception): + pass diff --git a/dsr/dsr/memory.py b/dsr/dsr/memory.py new file mode 100644 index 00000000..88c8eb0d --- /dev/null +++ b/dsr/dsr/memory.py @@ -0,0 +1,358 @@ +"""Classes for memory buffers, priority queues, and quantile estimation.""" + +import heapq +from collections import namedtuple + +import numpy as np + + +Batch = namedtuple( + "Batch", ["actions", "obs", "priors", "lengths", "rewards"]) + + +def make_queue(controller=None, priority=False, capacity=np.inf, seed=0): + """Factory function for various Queues. + + Parameters + ---------- + controller : dsr.controller.Controller + Reference to the Controller, used to compute probabilities of items in + the Queue. + + priority : bool + If True, returns an object inheriting UniquePriorityQueue. Otherwise, + returns an object inheriting from UniqueQueue. + + capacity : int + Maximum queue length. + + seed : int + RNG seed used for random sampling. + + Returns + ------- + queue : ProgramQueue + Dynamic class inheriting from ProgramQueueMixin and a Queue subclass. + """ + + if priority: + Base = UniquePriorityQueue + else: + Base = UniqueQueue + + class ProgramQueue(ProgramQueueMixin, Base): + def __init__(self, controller, capacity, seed): + ProgramQueueMixin.__init__(self, controller) + Base.__init__(self, capacity, seed) + + queue = ProgramQueue(controller, capacity, seed) + return queue + + +def get_samples(batch, key): + """ + Returns a sub-Batch with samples from the given indices. + + Parameters + ---------- + key : int or slice + Indices of samples to return. + + Returns + ------- + batch : Batch + Sub-Batch with samples from the given indices. + """ + + batch = Batch( + actions=batch.actions[key], + obs=tuple(o[key] for o in batch.obs), + priors=batch.priors[key], + lengths=batch.lengths[key], + rewards=batch.rewards[key]) + return batch + + +# Adapted from https://github.com/tensorflow/models/blob/1af55e018eebce03fb61bba9959a04672536107d/research/brain_coder/common/utils.py +class ItemContainer(object): + """Class for holding an item with its score. + + Defines a comparison function for use in the heap-queue. + """ + + def __init__(self, score, item, extra_data): + self.item = item + self.score = score + self.extra_data = extra_data + + def __lt__(self, other): + assert isinstance(other, type(self)) + return self.score < other.score + + def __eq__(self, other): + assert isinstance(other, type(self)) + return self.item == other.item + + def __iter__(self): + """Allows unpacking like a tuple.""" + yield self.score + yield self.item + yield self.extra_data + + def __repr__(self): + """String representation of this item. + + `extra_data` is not included in the representation. We are assuming that + `extra_data` is not easily interpreted by a human (if it was, it should be + hashable, like a string or tuple). + + Returns: + String representation of `self`. + """ + return str((self.score, self.item)) + + def __str__(self): + return repr(self) + + +class Queue(object): + """Abstract class for queue that must define a push and pop routine""" + + def __init__(self, capacity, seed=0): + self.capacity = capacity + self.rng = np.random.RandomState(seed) + self.heap = [] + self.unique_items = set() + + def push(self, score, item, extra_data): + raise NotImplementedError + + def pop(self): + raise NotImplementedError + + def random_sample(self, sample_size): + """Uniform randomly select items from the queue. + + Args: + sample_size: Number of random samples to draw. The same item can be + sampled multiple times. + + Returns: + List of sampled items (of length `sample_size`). Each element in the list + is a tuple: (item, extra_data). + """ + idx = self.rng.choice(len(self.heap), sample_size, ) + return [(self.heap[i].item, self.heap[i].extra_data) for i in idx] + + def __len__(self): + return len(self.heap) + + def __iter__(self): + for _, item, _ in self.heap: + yield item + + def __repr__(self): + return '[' + ', '.join(repr(c) for c in self.heap) + ']' + + def __str__(self): + return repr(self) + + +class UniqueQueue(Queue): + """A queue in which duplicates are not allowed. Instead, adding a duplicate + moves that item to the back of the queue.""" + + def push(self, score, item, extra_data=None): + """Push an item onto the queue, or move it to the back if already + present. + + Score is unused but included as an argument to follow the interface. + """ + + container = ItemContainer(None, item, extra_data) + + # If the item is already in the queue, move it to the back of the queue + # and return + if item in self.unique_items: + self.heap.remove(container) + self.heap.append(container) + return + + # If the queue is at capacity, first pop the front of the queue + if len(self.heap) >= self.capacity: + self.pop() + + # Add the item + self.heap.append(container) + self.unique_items.add(item) + + def pop(self): + """Pop the front of the queue (the oldest item).""" + + if not self.heap: + return () + score, item, extra_data = self.heap.pop(0) + self.unique_items.remove(item) + return (score, item, extra_data) + + +# Adapted from https://github.com/tensorflow/models/blob/1af55e018eebce03fb61bba9959a04672536107d/research/brain_coder/common/utils.py +class UniquePriorityQueue(Queue): + """A priority queue where duplicates are not added. + + The top items by score remain in the queue. When the capacity is reached, + the lowest scored item in the queue will be dropped. + """ + + def push(self, score, item, extra_data=None): + """Push an item onto the queue. + + If the queue is at capacity, the item with the smallest score will be + dropped. Note that it is assumed each item has exactly one score. The same + item with a different score will still be dropped. + + Args: + score: Number used to prioritize items in the queue. Largest scores are + kept in the queue. + item: A hashable item to be stored. Duplicates of this item will not be + added to the queue. + extra_data: An extra (possible not hashable) data to store with the item. + """ + if item in self.unique_items: + return + if len(self.heap) >= self.capacity: + _, popped_item, _ = heapq.heappushpop( + self.heap, ItemContainer(score, item, extra_data)) + self.unique_items.add(item) + self.unique_items.remove(popped_item) + else: + heapq.heappush(self.heap, ItemContainer(score, item, extra_data)) + self.unique_items.add(item) + + def pop(self): + """Pop the item with the lowest score. + + Returns: + score: Item's score. + item: The item that was popped. + extra_data: Any extra data stored with the item. + """ + if not self.heap: + return () + score, item, extra_data = heapq.heappop(self.heap) + self.unique_items.remove(item) + return score, item, extra_data + + def get_max(self): + """Peek at the item with the highest score. + + Returns: + Same as `pop`. + """ + if not self.heap: + return () + score, item, extra_data = heapq.nlargest(1, self.heap)[0] + return score, item, extra_data + + def get_min(self): + """Peek at the item with the lowest score. + + Returns: + Same as `pop`. + """ + if not self.heap: + return () + score, item, extra_data = heapq.nsmallest(1, self.heap)[0] + return score, item, extra_data + + def iter_in_order(self): + """Iterate over items in the queue from largest score to smallest. + + Yields: + item: Hashable item. + extra_data: Extra data stored with the item. + """ + for _, item, extra_data in heapq.nlargest(len(self.heap), self.heap): + yield item, extra_data + + +class ProgramQueueMixin(): + """A mixin for Queues with additional utilities specific to Batch and + Program.""" + + def __init__(self, controller=None): + self.controller = controller + + def push_sample(self, sample, program): + """ + Push a single sample corresponding to Program to the queue. + + Parameters + ---------- + sample : Batch + A Batch comprising a single sample. + + program : Program + Program corresponding to the sample. + """ + + id_ = program.str + score = sample.rewards + self.push(score, id_, sample) + + def push_batch(self, batch, programs): + """Push a Batch corresponding to Programs to the queue.""" + + for i, program in enumerate(programs): + sample = get_samples(batch, i) + self.push_sample(sample, program) + + def push_best(self, batch, programs): + """Push the single best sample from a Batch""" + + i = np.argmax(batch.rewards) + sample = get_samples(batch, i) + program = programs[i] + self.push_sample(sample, program) + + def sample_batch(self, sample_size): + """Randomly select items from the queue and return them as a Batch.""" + + assert len(self.heap) > 0, "Cannot sample from an empty queue." + samples = [sample for (id_, sample) in self.random_sample(sample_size)] + batch = self._make_batch(samples) + return batch + + def _make_batch(self, samples): + """Turns a list of samples into a Batch.""" + + actions = np.stack([s.actions for s in samples], axis=0) + obs = tuple([np.stack([s.obs[i] for s in samples], axis=0) for i in range(3)]) + priors = np.stack([s.priors for s in samples], axis=0) + lengths = np.array([s.lengths for s in samples], dtype=np.int32) + rewards = np.array([s.rewards for s in samples], dtype=np.float32) + batch = Batch(actions=actions, obs=obs, priors=priors, + lengths=lengths, rewards=rewards) + return batch + + def to_batch(self): + """Return the entire queue as a Batch.""" + + samples = [container.extra_data for container in self.heap] + batch = self._make_batch(samples) + return batch + + def compute_probs(self): + """Computes the probabilities of items in the queue according to the + Controller.""" + + if self.controller is None: + raise RuntimeError("Cannot compute probabilities. This Queue does \ + not have a Controller.") + return self.controller.compute_probs(self.to_batch()) + + def get_rewards(self): + """Returns the rewards""" + + r = [container.extra_data.rewards for container in self.heap] + return r diff --git a/dsr/dsr/prior.py b/dsr/dsr/prior.py new file mode 100644 index 00000000..510d79f8 --- /dev/null +++ b/dsr/dsr/prior.py @@ -0,0 +1,527 @@ +"""Class for Prior object.""" + +import numpy as np + +from dsr.subroutines import ancestors +from dsr.library import TokenNotFoundError + + +def make_prior(library, config_prior): + """Factory function for JointPrior object.""" + + prior_dict = { + "relational" : RelationalConstraint, + "length" : LengthConstraint, + "repeat" : RepeatConstraint, + "inverse" : InverseUnaryConstraint, + "trig" : TrigConstraint, + "const" : ConstConstraint + } + + priors = [] + warnings = [] + for prior_type, prior_args in config_prior.items(): + assert prior_type in prior_dict, \ + "Unrecognized prior type: {}.".format(prior_type) + prior_class = prior_dict[prior_type] + + if isinstance(prior_args, dict): + prior_args = [prior_args] + for single_prior_args in prior_args: + + # Attempt to build the Prior. Any Prior can fail if it references a + # Token not in the Library. + try: + prior = prior_class(library, **single_prior_args) + warning = prior.validate() + except TokenNotFoundError: + prior = None + warning = "Uses Tokens not in the Library." + + # Add warning context + if warning is not None: + warning = "Skipping invalid '{}' with arguments {}. " \ + "Reason: {}" \ + .format(prior_class.__name__, single_prior_args, warning) + warnings.append(warning) + + # Add the Prior if there are no warnings + if warning is None: + priors.append(prior) + + joint_prior = JointPrior(library, priors) + + print("-- Building prior -------------------") + print("\n".join(["WARNING: " + message for message in warnings])) + print(joint_prior.describe()) + print("-------------------------------------") + + return joint_prior + + +class JointPrior(): + """A collection of joint Priors.""" + + def __init__(self, library, priors): + """ + Parameters + ---------- + library : Library + The Library associated with the Priors. + + priors : list of Prior + The individual Priors to be joined. + """ + + self.library = library + self.L = self.library.L + self.priors = priors + assert all([prior.library is library for prior in priors]), \ + "All Libraries must be identical." + + self.requires_parents_siblings = True + + self.describe() + + def initial_prior(self): + combined_prior = np.zeros((self.L,), dtype=np.float32) + for prior in self.priors: + combined_prior += prior.initial_prior() + return combined_prior + + def __call__(self, actions, parent, sibling, dangling): + zero_prior = np.zeros((actions.shape[0], self.L), dtype=np.float32) + ind_priors = [zero_prior.copy() for _ in range(len(self.priors))] + for i in range(len(self.priors)): + ind_priors[i] += self.priors[i](actions, parent, sibling, dangling) + combined_prior = sum(ind_priors) + zero_prior + return combined_prior + + def describe(self): + message = "\n".join(prior.describe() for prior in self.priors) + return message + + +class Prior(): + """Abstract class whose call method return logits.""" + + def __init__(self, library): + self.library = library + self.L = library.L + + def validate(self): + """ + Determine whether the Prior has a valid configuration. This is useful + when other algorithmic parameters may render the Prior degenerate. For + example, having a TrigConstraint with no trig Tokens. + + Returns + ------- + message : str or None + Error message if Prior is invalid, or None if it is valid. + """ + + return None + + def init_zeros(self, actions): + """Helper function to generate a starting prior of zeros.""" + + batch_size = actions.shape[0] + prior = np.zeros((batch_size, self.L), dtype=np.float32) + return prior + + def initial_prior(self): + """ + Compute the initial prior, before any actions are selected. + + Returns + ------- + initial_prior : array + Initial logit adjustment before actions are selected. Shape is + (self.L,) as it will be broadcast to batch size later. + """ + + return np.zeros((self.L,), dtype=np.float32) + + def __call__(self, actions, parent, sibling, dangling): + """ + Compute the prior (logit adjustment) given the current actions. + + Returns + ------- + prior : array + Logit adjustment for selecting next action. Shape is (batch_size, + self.L). + """ + + raise NotImplementedError + + def describe(self): + """Describe the Prior.""" + + message = "No description." + return message + + +class Constraint(Prior): + def __init__(self, library): + Prior.__init__(self, library) + + def make_constraint(self, mask, tokens): + """ + Generate the prior for a batch of constraints and the corresponding + Tokens to constrain. + + For example, with L=5 and tokens=[1,2], a constrained row of the prior + will be: [0.0, -np.inf, -np.inf, 0.0, 0.0]. + + Parameters + __________ + + mask : np.ndarray, shape=(?,), dtype=np.bool_ + Boolean mask of samples to constrain. + + tokens : np.ndarray, dtype=np.int32 + Tokens to constrain. + + Returns + _______ + + prior : np.ndarray, shape=(?, L), dtype=np.float32 + Logit adjustment. Since these are hard constraints, each element is + either 0.0 or -np.inf. + """ + + prior = np.zeros((mask.shape[0], self.L), dtype=np.float32) + for t in tokens: + prior[mask, t] = -np.inf + return prior + + +class RelationalConstraint(Constraint): + """ + Class that constrains the following: + + Constrain (any of) `targets` from being the `relationship` of (any of) + `effectors`. + + Parameters + ---------- + targets : list of Tokens + List of Tokens, all of which will be constrained if any of effectors + are the given relationship. + + effectors : list of Tokens + List of Tokens, any of which will cause all targets to be constrained + if they are the given relationship. + + relationship : choice of ["child", "descendant", "sibling", "uchild"] + The type of relationship to constrain. + """ + + def __init__(self, library, targets, effectors, relationship): + Prior.__init__(self, library) + self.targets = library.actionize(targets) + self.effectors = library.actionize(effectors) + self.relationship = relationship + + def validate(self): + message = [] + if self.relationship in ["child", "descendant", "uchild"]: + if np.isin(self.effectors, self.library.terminal_tokens).any(): + message = "{} relationship cannot have terminal effectors." \ + .format(self.relationship.capitalize()) + return message + if len(self.targets) == 0: + message = "There are no target Tokens." + return message + if len(self.effectors) == 0: + message = "There are no effector Tokens." + return message + return None + + def __call__(self, actions, parent, sibling, dangling): + + if self.relationship == "descendant": + mask = ancestors(actions=actions, + arities=self.library.arities, + ancestor_tokens=self.effectors) + prior = self.make_constraint(mask, self.targets) + + elif self.relationship == "child": + parents = self.effectors + adj_parents = self.library.parent_adjust[parents] + mask = np.isin(parent, adj_parents) + prior = self.make_constraint(mask, self.targets) + + elif self.relationship == "sibling": + # The sibling relationship is reflexive: if A is a sibling of B, + # then B is also a sibling of A. Thus, we combine two priors, where + # targets and effectors are swapped. + mask = np.isin(sibling, self.effectors) + prior = self.make_constraint(mask, self.targets) + mask = np.isin(sibling, self.targets) + prior += self.make_constraint(mask, self.effectors) + + elif self.relationship == "uchild": + # Case 1: parent is a unary effector + unary_effectors = np.intersect1d(self.effectors, + self.library.unary_tokens) + adj_unary_effectors = self.library.parent_adjust[unary_effectors] + mask = np.isin(parent, adj_unary_effectors) + # Case 2: sibling is a target and parent is an effector + adj_effectors = self.library.parent_adjust[self.effectors] + mask += np.logical_and(np.isin(sibling, self.targets), + np.isin(parent, adj_effectors)) + prior = self.make_constraint(mask, [self.targets]) + + return prior + + def describe(self): + + targets = ", ".join([self.library.names[t] for t in self.targets]) + effectors = ", ".join([self.library.names[t] for t in self.effectors]) + relationship = { + "child" : "a child", + "sibling" : "a sibling", + "descendant" : "a descendant", + "uchild" : "the only unique child" + }[self.relationship] + message = "[{}] cannot be {} of [{}]." \ + .format(targets, relationship, effectors) + return message + + +class TrigConstraint(RelationalConstraint): + """Class that constrains trig Tokens from being the desendants of trig + Tokens.""" + + def __init__(self, library): + targets = library.trig_tokens + effectors = library.trig_tokens + RelationalConstraint.__init__(self, library, + targets=targets, + effectors=effectors, + relationship="descendant") + + +class ConstConstraint(RelationalConstraint): + """Class that constrains the const Token from being the only unique child + of all non-terminal Tokens.""" + + def __init__(self, library): + targets = library.const_token + effectors = np.concatenate([library.unary_tokens, + library.binary_tokens]) + RelationalConstraint.__init__(self, library, + targets=targets, + effectors=effectors, + relationship="uchild") + + +class InverseUnaryConstraint(Constraint): + """Class that constrains each unary Token from being the child of its + corresponding inverse unary Tokens.""" + + def __init__(self, library): + Prior.__init__(self, library) + self.priors = [] + for target, effector in library.inverse_tokens.items(): + targets = [target] + effectors = [effector] + prior = RelationalConstraint(library, + targets=targets, + effectors=effectors, + relationship="child") + self.priors.append(prior) + + def validate(self): + if len(self.priors) == 0: + message = "There are no inverse unary Token pairs in the Library." + return message + return None + + def __call__(self, actions, parent, sibling, dangling): + prior = sum([prior(actions, parent, sibling, dangling) + for prior in self.priors]) + return prior + + def describe(self): + message = [prior.describe() for prior in self.priors] + return "\n".join(message) + + +class RepeatConstraint(Constraint): + """Class that constrains Tokens to appear between a minimum and/or maximum + number of times.""" + + def __init__(self, library, tokens, min_=None, max_=None): + """ + Parameters + ---------- + tokens : Token or list of Tokens + Token(s) which should, in total, occur between min_ and max_ times. + + min_ : int or None + Minimum number of times tokens should occur. + + max_ : int or None + Maximum number of times tokens should occur. + """ + + Prior.__init__(self, library) + assert min_ is not None or max_ is not None, \ + "At least one of (min_, max_) must not be None." + self.min = min_ + self.max = max_ + self.tokens = library.actionize(tokens) + + assert min_ is None, "Repeat minimum constraints are not yet " \ + "supported. This requires knowledge of length constraints." + + def __call__(self, actions, parent, sibling, dangling): + counts = np.sum(np.isin(actions, self.tokens), axis=1) + prior = self.init_zeros(actions) + if self.min is not None: + raise NotImplementedError + if self.max is not None: + mask = counts >= self.max + prior += self.make_constraint(mask, self.tokens) + return prior + + def describe(self): + names = ", ".join([self.library.names[t] for t in self.tokens]) + if self.min is None: + message = "[{}] cannot occur more than {} times."\ + .format(names, self.max) + elif self.max is None: + message = "[{}] must occur at least {} times."\ + .format(names, self.min) + else: + message = "[{}] must occur between {} and {} times."\ + .format(names, self.min, self.max) + return message + + +class LengthConstraint(Constraint): + """Class that constrains the Program from falling within a minimum and/or + maximum length""" + + def __init__(self, library, min_=None, max_=None): + """ + Parameters + ---------- + min_ : int or None + Minimum length of the Program. + + max_ : int or None + Maximum length of the Program. + """ + + Prior.__init__(self, library) + self.min = min_ + self.max = max_ + + assert min_ is not None or max_ is not None, \ + "At least one of (min_, max_) must not be None." + + def initial_prior(self): + prior = Prior.initial_prior(self) + for t in self.library.terminal_tokens: + prior[t] = -np.inf + return prior + + def __call__(self, actions, parent, sibling, dangling): + + # Initialize the prior + prior = self.init_zeros(actions) + i = actions.shape[1] - 1 # Current time + + # Never need to constrain max length for first half of expression + if self.max is not None and (i + 2) >= self.max // 2: + remaining = self.max - (i + 1) + # assert sum(dangling > remaining) == 0, (dangling, remaining) + mask = dangling >= remaining - 1 # Constrain binary + prior += self.make_constraint(mask, self.library.binary_tokens) + mask = dangling == remaining # Constrain unary + prior += self.make_constraint(mask, self.library.unary_tokens) + + # Constrain terminals when dangling == 1 until selecting the + # (min_length)th token + if self.min is not None and (i + 2) < self.min: + mask = dangling == 1 # Constrain terminals + prior += self.make_constraint(mask, self.library.terminal_tokens) + + return prior + + def describe(self): + message = [] + if self.min is not None: + message.append("Sequences have minimum length {}.".format(self.min)) + if self.max is not None: + message.append("Sequences have maximum length {}.".format(self.max)) + message = "\n".join(message) + return message + + +class UniformArityPrior(Prior): + """Class that puts a fixed prior on arities by transforming the initial + distribution from uniform over tokens to uniform over arities.""" + + def __init__(self, library): + + Prior.__init__(self, library) + + # For each token, subtract log(n), where n is the total number of tokens + # in the library with the same arity as that token. This is equivalent + # to... For each arity, subtract log(n) from tokens of that arity, where + # n is the total number of tokens of that arity + self.logit_adjust = np.zeros((self.L,), dtype=np.float32) + for arity, tokens in self.library.tokens_of_arity.items(): + self.logit_adjust[tokens] -= np.log(len(tokens)) + + def initial_prior(self): + return self.logit_adjust + + def __call__(self, actions, parent, sibling, dangling): + + # This will be broadcast when added to the joint prior + prior = self.logit_adjust + return prior + + +class SoftLengthPrior(Prior): + """Class the puts a soft prior on length. Before loc, terminal probabilities + are scaled by exp(-(t - loc) ** 2 / (2 * scale)) where dangling == 1. After + loc, non-terminal probabilities are scaled by that number.""" + + def __init__(self, library, loc, scale): + + Prior.__init__(self, library) + + self.loc = loc + self.scale = scale + + self.terminal_mask = np.zeros((self.L,), dtype=np.bool) + self.terminal_mask[self.library.terminal_tokens] = True + + self.nonterminal_mask = ~self.terminal_mask + + def __call__(self, actions, parent, sibling, dangling): + + # Initialize the prior + prior = self.init_zeros(actions) + t = actions.shape[1] # Current time + + # Adjustment to terminal or non-terminal logits + logit_adjust = -(t - self.loc) ** 2 / (2 * self.scale) + + # Before loc, decrease p(terminal) where dangling == 1 + if t < self.loc: + prior[dangling == 1] += self.terminal_mask * logit_adjust + + # After loc, decrease p(non-terminal) + else: + prior += self.nonterminal_mask * logit_adjust + + return prior diff --git a/dsr/dsr/program.py b/dsr/dsr/program.py new file mode 100644 index 00000000..d00f3e30 --- /dev/null +++ b/dsr/dsr/program.py @@ -0,0 +1,640 @@ +"""Class for symbolic expression object or program.""" + +import array +import os +import warnings +from textwrap import indent + +import numpy as np +from sympy.parsing.sympy_parser import parse_expr +from sympy import pretty + +from dsr.functions import PlaceholderConstant +from dsr.const import make_const_optimizer +from dsr.utils import cached_property +import dsr.utils as U + + +def _finish_tokens(tokens): + """ + Complete the pre-order traversal. + + Parameters + ---------- + tokens : list of integers + A list of integers corresponding to tokens in the library. The list + defines an expression's pre-order traversal. + + Returns + _______ + tokens : list of integers + A list of integers corresponding to tokens in the library. The list + defines an expression's pre-order traversal. "Dangling" programs are + completed with repeated "x1" until the expression completes. + + """ + + arities = np.array([Program.library.arities[t] for t in tokens]) + dangling = 1 + np.cumsum(arities - 1) + + if 0 in dangling: + expr_length = 1 + np.argmax(dangling == 0) + tokens = tokens[:expr_length] + else: + # Extend with first variable until complete + tokens = np.append(tokens, np.random.choice(Program.library.input_tokens, size=dangling[-1])) + + return tokens + + +def from_str_tokens(str_tokens, optimize, skip_cache=False): + """ + Memoized function to generate a Program from a list of str and/or float. + See from_tokens() for details. + + Parameters + ---------- + str_tokens : str | list of (str | float) + Either a comma-separated string of tokens and/or floats, or a list of + str and/or floats. + + optimize : bool + See from_tokens(). + + skip_cache : bool + See from_tokens(). + + Returns + ------- + program : Program + See from_tokens(). + """ + + # Convert str to list of str + if isinstance(str_tokens, str): + str_tokens = str_tokens.split(",") + + # Convert list of str|float to list of tokens + if isinstance(str_tokens, list): + traversal = [] + constants = [] + for s in str_tokens: + if s in Program.library.names: + t = Program.library.names.index(s.lower()) + elif U.is_float(s): + assert "const" not in str_tokens, "Currently does not support both placeholder and hard-coded constants." + assert not optimize, "Currently does not support optimization with hard-coded constants." + t = Program.library.const_token + constants.append(float(s)) + else: + raise ValueError("Did not recognize token {}.".format(s)) + traversal.append(t) + traversal = np.array(traversal, dtype=np.int32) + else: + raise ValueError("Input must be list or string.") + + # Generate base Program (with "const" for constants) + p = from_tokens(traversal, optimize=optimize, skip_cache=skip_cache) + + # Replace any constants + p.set_constants(constants) + + return p + + +def from_tokens(tokens, optimize, skip_cache=False): + """ + Memoized function to generate a Program from a list of tokens. + + Since some tokens are nonfunctional, this first computes the corresponding + traversal. If that traversal exists in the cache, the corresponding Program + is returned. Otherwise, a new Program is returned. + + Parameters + ---------- + tokens : list of integers + A list of integers corresponding to tokens in the library. The list + defines an expression's pre-order traversal. "Dangling" programs are + completed with repeated "x1" until the expression completes. + + optimize : bool + Whether to optimize the program before returning it. + + skip_cache : bool + Whether to bypass the cache when creating the program. + + Returns + _______ + program : Program + The Program corresponding to the tokens, either pulled from memoization + or generated from scratch. + """ + + ''' + Truncate expressions that complete early; extend ones that don't complete + ''' + tokens = _finish_tokens(tokens) + + # For stochastic Tasks, there is no cache; always generate a new Program. + # For deterministic Programs, if the Program is in the cache, return it; + # otherwise, create a new one and add it to the cache. + if skip_cache: + p = Program(tokens, optimize=optimize) + elif Program.task.stochastic: + p = Program(tokens, optimize=optimize) + else: + key = tokens.tostring() + if key in Program.cache: + p = Program.cache[key] + p.count += 1 + else: + p = Program(tokens, optimize=optimize) + Program.cache[key] = p + + return p + + +class Program(object): + """ + The executable program representing the symbolic expression. + + The program comprises unary/binary operators, constant placeholders + (to-be-optimized), input variables, and hard-coded constants. + + Parameters + ---------- + tokens : list of integers + A list of integers corresponding to tokens in the library. "Dangling" + programs are completed with repeated "x1" until the expression + completes. + + optimize : bool + Whether to optimize the program upon initializing it. + + Attributes + ---------- + traversal : list + List of operators (type: Function) and terminals (type: int, float, or + str ("const")) encoding the pre-order traversal of the expression tree. + + tokens : np.ndarry (dtype: int) + Array of integers whose values correspond to indices + + const_pos : list of int + A list of indicies of constant placeholders along the traversal. + + float_pos : list of float + A list of indices of constants placeholders or floating-point constants + along the traversal. + + sympy_expr : str + The (lazily calculated) SymPy expression corresponding to the program. + Used for pretty printing _only_. + + base_r : float + The base reward (reward without penalty) of the program on the training + data. + + complexity : float + The (lazily calcualted) complexity of the program. + + r : float + The (lazily calculated) reward of the program on the training data. + + count : int + The number of times this Program has been sampled. + + str : str + String representation of tokens. Useful as unique identifier. + """ + + # Static variables + task = None # Task + library = None # Library + const_optimizer = None # Function to optimize constants + cache = {} + + # Cython-related static variables + have_cython = None # Do we have cython installed + execute = None # Link to execute. Either cython or python + cyfunc = None # Link to cyfunc lib since we do an include inline + + def __init__(self, tokens, optimize): + + """ + Builds the Program from a list of Tokens, optimizes the Constants + against reward function, and evalutes the reward. + """ + + self.traversal = [Program.library[t] for t in tokens] + self.const_pos = [i for i, t in enumerate(tokens) if Program.library[t].name == "const"] # Just constant placeholder positions + self.len_traversal = len(self.traversal) + + if self.have_cython and self.len_traversal > 1: + self.is_input_var = array.array('i', [t.input_var is not None for t in self.traversal]) + + self.invalid = False + self.str = tokens.tostring() + + if optimize: + _ = self.optimize() + + self.count = 1 + + def cython_execute(self, X): + """Executes the program according to X using Cython. + + Parameters + ---------- + X : array-like, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of features. + + Returns + ------- + y_hats : array-like, shape = [n_samples] + The result of executing the program on X. + """ + + if self.len_traversal > 1: + return self.cyfunc.execute(X, self.len_traversal, self.traversal, self.is_input_var) + else: + return self.python_execute(X) + + def python_execute(self, X): + """Executes the program according to X using Python. + + Parameters + ---------- + X : array-like, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of features. + + Returns + ------- + y_hats : array-like, shape = [n_samples] + The result of executing the program on X. + """ + + # # Check for single-node programs + # node = self.traversal[0] + # if isinstance(node, float): + # return np.repeat(node, X.shape[0]) + # if isinstance(node, int): + # return X[:, node] + + apply_stack = [] + + for node in self.traversal: + + apply_stack.append([node]) + + while len(apply_stack[-1]) == apply_stack[-1][0].arity + 1: + # Apply functions that have sufficient arguments + token = apply_stack[-1][0] + terminals = apply_stack[-1][1:] + # terminals = [np.repeat(t, X.shape[0]) if isinstance(t, float) + # else X[:, t] if isinstance(t, int) + # else t for t in apply_stack[-1][1:]] + if token.input_var is not None: + intermediate_result = X[:, token.input_var] + else: + intermediate_result = token(*terminals) + if len(apply_stack) != 1: + apply_stack.pop() + apply_stack[-1].append(intermediate_result) + else: + return intermediate_result + + # We should never get here + assert False, "Function should never get here!" + return None + + + def optimize(self): + """ + Optimizes the constant tokens against the training data and returns the + optimized constants. + + This function generates an objective function based on the training + dataset, reward function, and constant optimizer. It ignores penalties + because the Program structure is fixed, thus penalties are all the same. + It then optimizes the constants of the program and returns the optimized + constants. + + Returns + _______ + optimized_constants : vector + Array of optimized constants. + """ + + # Create the objective function, which is a function of the constants being optimized + def f(consts): + self.set_constants(consts) + r = self.task.reward_function(self) + obj = -r # Constant optimizer minimizes the objective function + + # Need to reset to False so that a single invalid call during + # constant optimization doesn't render the whole Program invalid. + self.invalid = False + + return obj + + assert self.execute is not None, "set_execute needs to be called first" + + if len(self.const_pos) > 0: + # Do the optimization + x0 = np.ones(len(self.const_pos)) # Initial guess + optimized_constants = Program.const_optimizer(f, x0) + self.set_constants(optimized_constants) + + else: + # No need to optimize if there are no constants + optimized_constants = [] + + return optimized_constants + + def set_constants(self, consts): + """Sets the program's constants to the given values""" + + for i, const in enumerate(consts): + # Create a new instance of PlaceholderConstant instead of changing + # the "values" attribute, otherwise all Programs will have the same + # instance and just overwrite each other's value. + self.traversal[self.const_pos[i]] = PlaceholderConstant(const) + + @classmethod + def clear_cache(cls): + """Clears the class' cache""" + + cls.cache = {} + + @classmethod + def set_task(cls, task): + """Sets the class' Task""" + + Program.task = task + Program.library = task.library + + @classmethod + def set_const_optimizer(cls, name, **kwargs): + """Sets the class' constant optimizer""" + + const_optimizer = make_const_optimizer(name, **kwargs) + Program.const_optimizer = const_optimizer + + @classmethod + def set_complexity_penalty(cls, name, weight): + """Sets the class' complexity penalty""" + + all_functions = { + # No penalty + None : lambda p : 0.0, + + # Length of tree + "length" : lambda p : len(p) + } + + assert name in all_functions, "Unrecognzied complexity penalty name" + + if weight == 0: + Program.complexity_penalty = lambda p : 0.0 + else: + Program.complexity_penalty = lambda p : weight * all_functions[name](p) + + @classmethod + def set_execute(cls, protected): + """Sets which execute method to use""" + + """ + If cython ran, we will have a 'c' file generated. The dynamic libary can be + given different names, so it's not reliable for testing if cython ran. + """ + cpath = os.path.join(os.path.dirname(__file__),'cyfunc.c') + + if os.path.isfile(cpath): + from . import cyfunc + Program.cyfunc = cyfunc + execute_function = Program.cython_execute + Program.have_cython = True + else: + execute_function = Program.python_execute + Program.have_cython = False + + if protected: + Program.execute = execute_function + else: + + class InvalidLog(): + """Log class to catch and record numpy warning messages""" + + def __init__(self): + self.error_type = None # One of ['divide', 'overflow', 'underflow', 'invalid'] + self.error_node = None # E.g. 'exp', 'log', 'true_divide' + self.new_entry = False # Flag for whether a warning has been encountered during a call to Program.execute() + + def write(self, message): + """This is called by numpy when encountering a warning""" + + if not self.new_entry: # Only record the first warning encounter + message = message.strip().split(' ') + self.error_type = message[1] + self.error_node = message[-1] + self.new_entry = True + + def update(self, p): + """If a floating-point error was encountered, set Program.invalid + to True and record the error type and error node.""" + + if self.new_entry: + p.invalid = True + p.error_type = self.error_type + p.error_node = self.error_node + self.new_entry = False + + + invalid_log = InvalidLog() + np.seterrcall(invalid_log) # Tells numpy to call InvalidLog.write() when encountering a warning + + # Define closure for execute function + def unsafe_execute(p, X): + """This is a wrapper for execute_function. If a floating-point error + would be hit, a warning is logged instead, p.invalid is set to True, + and the appropriate nan/inf value is returned. It's up to the task's + reward function to decide how to handle nans/infs.""" + + with np.errstate(all='log'): + y = execute_function(p, X) + invalid_log.update(p) + return y + + Program.execute = unsafe_execute + + + @cached_property + def complexity(self): + """Evaluates and returns the complexity of the program""" + + return Program.complexity_penalty(self.traversal) + + + @cached_property + def base_r(self): + """Evaluates and returns the base reward of the program on the training + set""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + return self.task.reward_function(self) + + @cached_property + def r(self): + """Evaluates and returns the reward of the program on the training + set""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + return self.base_r - self.complexity + + + @cached_property + def evaluate(self): + """Evaluates and returns the evaluation metrics of the program.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + return self.task.evaluate(self) + + @cached_property + def complexity_eureqa(self): + """Computes sum of token complexity based on Eureqa complexity measures.""" + + complexity = sum([t.complexity for t in self.traversal]) + return complexity + + + @cached_property + def sympy_expr(self): + """ + Returns the attribute self.sympy_expr. + + This is actually a bit complicated because we have to go: traversal --> + tree --> serialized tree --> SymPy expression + """ + + tree = self.traversal.copy() + tree = build_tree(tree) + tree = convert_to_sympy(tree) + try: + expr = parse_expr(tree.__repr__()) # SymPy expression + except: + expr = "N/A" + + return expr + + + def pretty(self): + """Returns pretty printed string of the program""" + return pretty(self.sympy_expr) + + + def print_stats(self): + """Prints the statistics of the program""" + print("\tReward: {}".format(self.r)) + print("\tBase reward: {}".format(self.base_r)) + print("\tCount: {}".format(self.count)) + print("\tInvalid: {}".format(self.invalid)) + print("\tTraversal: {}".format(self)) + print("\tExpression:") + print("{}\n".format(indent(self.pretty(), '\t '))) + + + def __repr__(self): + """Prints the program's traversal""" + + return ','.join([repr(t) for t in self.traversal]) + + +############################################################################### +# Everything below this line is currently only being used for pretty printing # +############################################################################### + + +# Possible library elements that sympy capitalizes +capital = ["add", "mul", "pow"] + + +class Node(object): + """Basic tree class supporting printing""" + + def __init__(self, val): + self.val = val + self.children = [] + + def __repr__(self): + children_repr = ",".join(repr(child) for child in self.children) + if len(self.children) == 0: + return self.val # Avoids unnecessary parantheses, e.g. x1() + return "{}({})".format(self.val, children_repr) + + +def build_tree(traversal): + """Recursively builds tree from pre-order traversal""" + + op = traversal.pop(0) + n_children = op.arity + val = repr(op) + if val in capital: + val = val.capitalize() + + node = Node(val) + + for _ in range(n_children): + node.children.append(build_tree(traversal)) + + return node + + +def convert_to_sympy(node): + """Adjusts trees to only use node values supported by sympy""" + + if node.val == "div": + node.val = "Mul" + new_right = Node("Pow") + new_right.children.append(node.children[1]) + new_right.children.append(Node("-1")) + node.children[1] = new_right + + elif node.val == "sub": + node.val = "Add" + new_right = Node("Mul") + new_right.children.append(node.children[1]) + new_right.children.append(Node("-1")) + node.children[1] = new_right + + elif node.val == "inv": + node.val = Node("Pow") + node.children.append(Node("-1")) + + elif node.val == "neg": + node.val = Node("Mul") + node.children.append(Node("-1")) + + elif node.val == "n2": + node.val = "Pow" + node.children.append(Node("2")) + + elif node.val == "n3": + node.val = "Pow" + node.children.append(Node("3")) + + elif node.val == "n4": + node.val = "Pow" + node.children.append(Node("4")) + + for child in node.children: + convert_to_sympy(child) + + + + return node diff --git a/dsr/dsr/run.py b/dsr/dsr/run.py new file mode 100644 index 00000000..94e2b65b --- /dev/null +++ b/dsr/dsr/run.py @@ -0,0 +1,224 @@ +"""Parallelized, single-point launch script to run DSR or GP on a set of benchmarks.""" + +import warnings +warnings.filterwarnings('ignore', category=DeprecationWarning) +warnings.filterwarnings('ignore', category=FutureWarning) + +import os +import sys +import json +import time +from datetime import datetime +import multiprocessing +from functools import partial +from pkg_resources import resource_filename +import zlib + +import click +import numpy as np +import pandas as pd +from sympy.parsing.sympy_parser import parse_expr +from sympy import srepr + +from dsr import DeepSymbolicOptimizer +from dsr.program import Program +from dsr.task.regression.dataset import BenchmarkDataset +from dsr.baselines import gpsr + + +def train_dsr(name_and_seed, config): + """Trains DSR and returns dict of reward, expression, and traversal""" + + # Override the benchmark name and output file + name, seed = name_and_seed + config["task"]["name"] = name + config["training"]["output_file"] = "dsr_{}_{}.csv".format(name, seed) + + # Try importing TensorFlow (with suppressed warnings), Controller, and learn + # When parallelizing across tasks, these will already be imported, hence try/except + try: + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + import tensorflow as tf + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + from dsr.controller import Controller + from dsr.train import learn + except ModuleNotFoundError: # Specific subclass of ImportError for when module is not found, probably needs to be excepted first + print("One or more libraries not found") + raise ModuleNotFoundError + except ImportError: + # Have we already imported tf? If so, this is the error we want to dodge. + if 'tf' in globals(): + pass + else: + raise ImportError + + # Train the model + model = DeepSymbolicOptimizer(config) + start = time.time() + result = {"name" : name, "seed" : seed} # Name and seed are listed first + result.update(model.train(seed=seed)) + result["t"] = time.time() - start + result.pop("program") + + return result + + +def train_gp(name_and_seed, logdir, config_task, config_gp): + """Trains GP and returns dict of reward, expression, and program""" + + name, seed = name_and_seed + config_gp["seed"] = seed + zlib.adler32(name.encode("utf-8")) + + start = time.time() + + # Load the dataset + config_dataset = config_task["dataset"] + config_dataset["name"] = name + dataset = BenchmarkDataset(**config_dataset) + + # Fit the GP + gp = gpsr.GP(dataset=dataset, **config_gp) + p, logbook = gp.train() + + # Retrieve results + r = base_r = p.fitness.values[0] + str_p = str(p) + nmse_test = gp.nmse_test(p)[0] + nmse_test_noiseless = gp.nmse_test_noiseless(p)[0] + success = gp.success(p) + + # Many failure cases right now for converting to SymPy expression + try: + expression = repr(parse_expr(str_p.replace("X", "x").replace("add", "Add").replace("mul", "Mul"))) + except: + expression = "N/A" + + # Save run details + drop = ["gen", "nevals"] + df_fitness = pd.DataFrame(logbook.chapters["fitness"]).drop(drop, axis=1) + df_fitness = df_fitness.rename({"avg" : "fit_avg", "min" : "fit_min"}, axis=1) + df_fitness["fit_best"] = df_fitness["fit_min"].cummin() + df_len = pd.DataFrame(logbook.chapters["size"]).drop(drop, axis=1) + df_len = df_len.rename({"avg" : "l_avg"}, axis=1) + df = pd.concat([df_fitness, df_len], axis=1, sort=False) + df.to_csv(os.path.join(logdir, "gp_{}_{}.csv".format(name, seed)), index=False) + + result = { + "name" : name, + "seed" : seed, + "r" : r, + "base_r" : base_r, + "nmse_test" : nmse_test, + "nmse_test_noiseless" : nmse_test_noiseless, + "success" : success, + "expression" : expression, + "traversal" : str_p, + "t" : time.time() - start + } + + return result + + +@click.command() +@click.argument('config_template', default="config.json") +@click.option('--method', default="dsr", type=click.Choice(["dsr", "gp"]), help="Symbolic regression method") +@click.option('--mc', default=1, type=int, help="Number of Monte Carlo trials for each benchmark") +@click.option('--output_filename', default=None, help="Filename to write results") +@click.option('--n_cores_task', '--n', default=1, help="Number of cores to spread out across tasks") +@click.option('--seed_shift', default=0, type=int, help="Integer to add to each seed (i.e. to combine multiple runs)") +@click.option('--b', multiple=True, type=str, help="Name of benchmark or benchmark prefix") +def main(config_template, method, mc, output_filename, n_cores_task, seed_shift, b): + """Runs DSR or GP on multiple benchmarks using multiprocessing.""" + + # Load the config file + with open(config_template, encoding='utf-8') as f: + config = json.load(f) + + # Required configs + config_task = config["task"] # Task specification parameters + config_training = config["training"] # Training hyperparameters + + # Optional configs + config_controller = config.get("controller") # Controller hyperparameters + config_language_model_prior = config.get("language_model_prior") # Language model hyperparameters + config_gp = config.get("gp") # GP hyperparameters + + # Create output directories + if output_filename is None: + output_filename = "benchmark_{}.csv".format(method) + config_training["logdir"] = os.path.join( + config_training["logdir"], + "log_{}".format(datetime.now().strftime("%Y-%m-%d-%H%M%S"))) + logdir = config_training["logdir"] + if "dataset" in config_task and "backup" in config_task["dataset"] and config_task["dataset"]["backup"]: + config_task["dataset"]["logdir"] = logdir + os.makedirs(logdir, exist_ok=True) + output_filename = os.path.join(logdir, output_filename) + # Use benchmark name from config if not specified as command-line arg + if len(b) == 0: + if isinstance(config_task["name"], str): + b = (config_task["name"],) + elif isinstance(config_task["name"], list): + b = tuple(config_task["name"]) + + # Shortcut to run all Nguyen benchmarks + benchmarks = list(b) + if "Nguyen" in benchmarks: + benchmarks.remove("Nguyen") + benchmarks += ["Nguyen-{}".format(i+1) for i in range(12)] + + # Generate benchmark-seed pairs for each MC. When passed to the TF RNG, + # seeds will be added to checksums on the benchmark names + unique_benchmarks = benchmarks.copy() + benchmarks *= mc + seeds = (np.arange(mc) + seed_shift).repeat(len(unique_benchmarks)).tolist() + names_and_seeds = list(zip(benchmarks, seeds)) + + # Edit n_cores_task and/or n_cores_batch + if n_cores_task == -1: + n_cores_task = multiprocessing.cpu_count() + if n_cores_task > len(benchmarks): + print("Setting 'n_cores_task' to {} for batch because there are only {} benchmarks.".format(len(benchmarks), len(benchmarks))) + n_cores_task = len(benchmarks) + if method == "dsr": + if config_training["verbose"] and n_cores_task > 1: + print("Setting 'verbose' to False for parallelized run.") + config_training["verbose"] = False + if config_training["n_cores_batch"] != 1 and n_cores_task > 1: + print("Setting 'n_cores_batch' to 1 to avoid nested child processes.") + config_training["n_cores_batch"] = 1 + print("Running {} for n={} on benchmarks {}".format(method, mc, unique_benchmarks)) + + # Write terminal command and config.json into log directory + cmd_filename = os.path.join(logdir, "cmd.out") + with open(cmd_filename, 'w') as f: + print(" ".join(sys.argv), file=f) + config_filename = os.path.join(logdir, "config.json") + with open(config_filename, 'w') as f: + json.dump(config, f, indent=4) + + # Define the work + if method == "dsr": + work = partial(train_dsr, config=config) + elif method == "gp": + work = partial(train_gp, logdir=logdir, config_task=config_task, config_gp=config_gp) + + # Farm out the work + write_header = True + if n_cores_task > 1: + pool = multiprocessing.Pool(n_cores_task) + for result in pool.imap_unordered(work, names_and_seeds): + pd.DataFrame(result, index=[0]).to_csv(output_filename, header=write_header, mode='a', index=False) + print("Completed {} ({} of {}) in {:.0f} s".format(result["name"], result["seed"]+1-seed_shift, mc, result["t"])) + write_header = False + else: + for name_and_seed in names_and_seeds: + result = work(name_and_seed) + pd.DataFrame(result, index=[0]).to_csv(output_filename, header=write_header, mode='a', index=False) + write_header = False + + print("Results saved to: {}".format(output_filename)) + + +if __name__ == "__main__": + main() diff --git a/dsr/dsr/subroutines.py b/dsr/dsr/subroutines.py new file mode 100644 index 00000000..fbe4221a --- /dev/null +++ b/dsr/dsr/subroutines.py @@ -0,0 +1,120 @@ +"""Numba-compiled subroutines used for deep symbolic optimization.""" + +from numba import jit, prange +import numpy as np + + +@jit(nopython=True, parallel=True) +def parents_siblings(tokens, arities, parent_adjust): + """ + Given a batch of action sequences, computes and returns the parents and + siblings of the next element of the sequence. + + The batch has shape (N, L), where N is the number of sequences (i.e. batch + size) and L is the length of each sequence. In some cases, expressions may + already be complete; in these cases, this function sees the start of a new + expression, even though the return value for these elements won't matter + because their gradients will be zero because of sequence_length. + + Parameters + __________ + + tokens : np.ndarray, shape=(N, L), dtype=np.int32 + Batch of action sequences. Values correspond to library indices. + + arities : np.ndarray, dtype=np.int32 + Array of arities corresponding to library indices. + + parent_adjust : np.ndarray, dtype=np.int32 + Array of parent sub-library index corresponding to library indices. + + Returns + _______ + + adj_parents : np.ndarray, shape=(N,), dtype=np.int32 + Adjusted parents of the next element of each action sequence. + + siblings : np.ndarray, shape=(N,), dtype=np.int32 + Siblings of the next element of each action sequence. + + """ + N, L = tokens.shape + + empty_parent = np.max(parent_adjust) + 1 # Empty token is after all non-empty tokens + empty_sibling = len(arities) # Empty token is after all non-empty tokens + adj_parents = np.full(shape=(N,), fill_value=empty_parent, dtype=np.int32) + siblings = np.full(shape=(N,), fill_value=empty_sibling, dtype=np.int32) + # Parallelized loop over action sequences + for r in prange(N): + arity = arities[tokens[r, -1]] + if arity > 0: # Parent is the previous element; no sibling + adj_parents[r] = parent_adjust[tokens[r, -1]] + continue + dangling = 0 + # Loop over elements in an action sequence + for c in range(L): + arity = arities[tokens[r, L - c - 1]] + dangling += arity - 1 + if dangling == 0: # Parent is L-c-1, sibling is the next + adj_parents[r] = parent_adjust[tokens[r, L - c - 1]] + siblings[r] = tokens[r, L - c] + break + return adj_parents, siblings + + +@jit(nopython=True, parallel=True) +def ancestors(actions, arities, ancestor_tokens): + """ + Given a batch of action sequences, determines whether the next element of + the sequence has an ancestor in ancestor_tokens. + + The batch has shape (N, L), where N is the number of sequences (i.e. batch + size) and L is the length of each sequence. In some cases, expressions may + already be complete; in these cases, this function sees the start of a new + expression, even though the return value for these elements won't matter + because their gradients will be zero because of sequence_length. + + Parameters + __________ + + actions : np.ndarray, shape=(N, L), dtype=np.int32 + Batch of action sequences. Values correspond to library indices. + + arities : np.ndarray, dtype=np.int32 + Array of arities corresponding to library indices. + + ancestor_tokens : np.ndarray, dtype=np.int32 + Array of ancestor library indices to check. + + Returns + _______ + + mask : np.ndarray, shape=(N,), dtype=np.bool_ + Mask of whether the next element of each sequence has an ancestor in + ancestor_tokens. + """ + + N, L = actions.shape + mask = np.zeros(shape=(N,), dtype=np.bool_) + # Parallelized loop over action sequences + for r in prange(N): + dangling = 0 + threshold = None # If None, current branch does not have trig ancestor + for c in range(L): + arity = arities[actions[r, c]] + dangling += arity - 1 + # Turn "on" if a trig function is found + # Remain "on" until branch completes + if threshold is None: + for trig_token in ancestor_tokens: + if actions[r, c] == trig_token: + threshold = dangling - 1 + break + # Turn "off" once the branch completes + else: + if dangling == threshold: + threshold = None + # If the sequences ended "on", then there is a trig ancestor + if threshold is not None: + mask[r] = True + return mask diff --git a/dsr/dsr/task/__init__.py b/dsr/dsr/task/__init__.py new file mode 100644 index 00000000..8dc70998 --- /dev/null +++ b/dsr/dsr/task/__init__.py @@ -0,0 +1 @@ +from dsr.task.task import make_task, set_task, Task diff --git a/dsr/dsr/task/regression/__init__.py b/dsr/dsr/task/regression/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dsr/dsr/task/regression/benchmarks.csv b/dsr/dsr/task/regression/benchmarks.csv new file mode 100644 index 00000000..3a2ce53f --- /dev/null +++ b/dsr/dsr/task/regression/benchmarks.csv @@ -0,0 +1,38 @@ +name,variables,expression,train_spec,test_spec,function_set +Nguyen-1,1,"pow(x1,3)+pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,Koza +Nguyen-2,1,"pow(x1,4)+pow(x1,3)+pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,Koza +Nguyen-3,1,"pow(x1,5)+pow(x1,4)+pow(x1,3)+pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,Koza +Nguyen-4,1,"pow(x1,6)+pow(x1,5)+pow(x1,4)+pow(x1,3)+pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,Koza +Nguyen-5,1,"sin(pow(x1,2))*cos(x1)-1","{""all"":{""U"":[-1,1,20]}}",None,Koza +Nguyen-6,1,"sin(x1)+sin(x1+pow(x1,2))","{""all"":{""U"":[-1,1,20]}}",None,Koza +Nguyen-7,1,"log(x1+1)+log(pow(x1,2)+1)","{""all"":{""U"":[0,2,20]}}",None,Koza +Nguyen-8,1,sqrt(x1),"{""all"":{""U"":[0,4,20]}}",None,Koza +Nguyen-9,2,"sin(x1)+sin(pow(x2,2))","{""all"":{""U"":[0,1,20]}}",None,Koza +Nguyen-10,2,2*sin(x1)*cos(x2),"{""all"":{""U"":[0,1,20]}}",None,Koza +Nguyen-11,2,"pow(x1,x2)","{""all"":{""U"":[0,1,20]}}",None,Koza +Nguyen-12,2,"pow(x1,4)-pow(x1,3)+div(pow(x2,2),2)-x2","{""all"":{""U"":[0,1,20]}}",None,Koza +Nguyen-2a,1,"4*pow(x1,4)+3*pow(x1,3)+2*pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,Koza +Nguyen-5a,1,"sin(pow(x1,2))*cos(x1)-2","{""all"":{""U"":[-1,1,20]}}",None,Koza +Nguyen-8a,1,"pow(x1,1/3)","{""all"":{""U"":[0,4,20]}}",None,Koza +Nguyen-8aa,1,"pow(x1,2/3)","{""all"":{""U"":[0,4,20]}}",None,Koza +Nguyen-1c,1,"3.39*pow(x1,3)+2.12*pow(x1,2)+1.78*x1","{""all"":{""U"":[-1,1,20]}}",None,CKoza +Nguyen-5c,1,"sin(pow(x1,2))*cos(x1)-0.75","{""all"":{""U"":[-1,1,20]}}",None,CKoza +Nguyen-7c,1,"log(x1+1.4)+log(pow(x1,2)+1.3)","{""all"":{""U"":[0,2,20]}}",None,CKoza +Nguyen-8c,1,sqrt(1.23*x1),"{""all"":{""U"":[0,4,20]}}",None,CKoza +Nguyen-10c,2,sin(1.5*x1)*cos(0.5*x2),"{""all"":{""U"":[0,1,20]}}",None,CKoza +GrammarVAE-1,1,"1./3+x1+sin(pow(x1,2))","{""all"":{""E"":[-10,10,1000]}}",None,GrammarVAE +Jin-1,2,"2.5*pow(x1,4)-1.3*pow(x1,3)+0.5*pow(x2,2)-1.7*x2","{""all"":{""U"":[-3.0,3.0,100]}}","{""all"":{""U"":[-3.0,3.0,30]}}",Jin +Jin-2,2,"8.0*pow(x1,2)+8.0*pow(x2,3)-15.0","{""all"":{""U"":[-3.0,3.0,100]}}","{""all"":{""U"":[-3.0,3.0,30]}}",Jin +Jin-3,2,"0.2*pow(x1,3)+0.5*pow(x2,3)-1.2*x2-0.5*x1","{""all"":{""U"":[-3.0,3.0,100]}}","{""all"":{""U"":[-3.0,3.0,30]}}",Jin +Jin-4,2,1.5*exp(x1)+5.0*cos(x2),"{""all"":{""U"":[-3.0,3.0,100]}}","{""all"":{""U"":[-3.0,3.0,30]}}",Jin +Jin-5,2,6.0*sin(x1)*cos(x2),"{""all"":{""U"":[-3.0,3.0,100]}}","{""all"":{""U"":[-3.0,3.0,30]}}",Jin +Jin-6,2,1.35*x1*x2+5.5*sin((x1-1.0)*(x2-1.0)),"{""all"":{""U"":[-3.0,3.0,100]}}","{""all"":{""U"":[-3.0,3.0,30]}}",Jin +Neat-1,1,"pow(x1,4)+pow(x1,3)+pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,KozaPlus1 +Neat-2,1,"pow(x1,5)+pow(x1,4)+pow(x1,3)+pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,KozaPlus1 +Neat-3,1,"sin(pow(x1,2))*cos(x1)-1","{""all"":{""U"":[-1,1,20]}}",None,KozaPlus1 +Neat-4,1,"log(x1+1)+log(pow(x1,2)+1)","{""all"":{""U"":[0,2,20]}}",None,KozaPlus1 +Neat-5,2,2*sin(x1)*cos(x2),"{""all"":{""U"":[-1,1,100]}}",None,Koza +Neat-6,1,harmonic(x1),"{""all"":{""E"":[1,50,1]}}","{""all"":{""E"":[1,120,1]}}",KeijzerPlus1 +Neat-7,2,2-2.1*cos(9.8*x1)*sin(1.3*x2),"{""all"":{""U"":[-50,50,10000]}}",None,Korns +Neat-8,2,"div(exp(-pow(x1-1,2)),(1.2+pow((x2-2.5),2)))","{""all"":{""U"":[0.3,4,100]}}",None,Vladislavleva-B +Neat-9,2,"div(1,(1+pow(x1,-4)))+div(1,(1+pow(x2,-4)))","{""all"":{""E"":[-5,5,0.4]}}",None,Koza diff --git a/dsr/dsr/task/regression/dataset.py b/dsr/dsr/task/regression/dataset.py new file mode 100644 index 00000000..a6c9dfe6 --- /dev/null +++ b/dsr/dsr/task/regression/dataset.py @@ -0,0 +1,274 @@ +"""Class for deterministically generating a benchmark dataset from benchmark specifications.""" + +import os +import ast +import itertools +from pkg_resources import resource_filename +import zlib + +import click +import pandas as pd +import numpy as np + +from dsr.functions import function_map + + +class BenchmarkDataset(object): + """ + Class used to generate (X, y) data from a named benchmark expression. + + Parameters + ---------- + name : str + Name of benchmark expression. + + benchmark_source : str, optional + Filename of CSV describing benchmark expressions. + + root : str, optional + Directory containing benchmark_source and function_sets.csv. + + noise : float, optional + If not None, Gaussian noise is added to the y values with standard + deviation = noise * RMS of the noiseless y training values. + + dataset_size_multiplier : float, optional + Multiplier for size of the dataset. + + seed : int, optional + Random number seed used to generate data. Checksum on name is added to + seed. + + logdir : str, optional + Directory where experiment logfiles are saved. + + backup : bool, optional + Save generated dataset in logdir if logdir is provided. + """ + + def __init__(self, name, benchmark_source="benchmarks.csv", root=None, noise=0.0, + dataset_size_multiplier=1.0, seed=0, logdir=None, + backup=False): + # Set class variables + self.name = name + self.seed = seed + self.noise = noise if noise is not None else 0.0 + self.dataset_size_multiplier = dataset_size_multiplier if dataset_size_multiplier is not None else 1.0 + + # Set random number generator used for sampling X values + seed += zlib.adler32(name.encode("utf-8")) # Different seed for each name, otherwise two benchmarks with the same domain will always have the same X values + self.rng = np.random.RandomState(seed) + + # Load benchmark data + if root is None: + root = resource_filename("dsr.task", "regression") + benchmark_path = os.path.join(root, benchmark_source) + benchmark_df = pd.read_csv(benchmark_path, index_col=0, encoding="ISO-8859-1") + row = benchmark_df.loc[name] + self.n_input_var = row["variables"] + + # Create symbolic expression + self.numpy_expr = self.make_numpy_expr(row["expression"]) + + # Create X values + train_spec = ast.literal_eval(row["train_spec"]) + test_spec = ast.literal_eval(row["test_spec"]) + if test_spec is None: + test_spec = train_spec + self.X_train = self.make_X(train_spec) + self.X_test = self.make_X(test_spec) + self.train_spec = train_spec + self.test_spec = test_spec + + # Compute y values + self.y_train = self.numpy_expr(self.X_train) + self.y_test = self.numpy_expr(self.X_test) + self.y_train_noiseless = self.y_train.copy() + self.y_test_noiseless = self.y_test.copy() + + # Add Gaussian noise + if self.noise > 0: + y_rms = np.sqrt(np.mean(self.y_train**2)) + scale = self.noise * y_rms + self.y_train += self.rng.normal(loc=0, scale=scale, size=self.y_train.shape) + self.y_test += self.rng.normal(loc=0, scale=scale, size=self.y_test.shape) + elif self.noise < 0: + print('WARNING: Ignoring negative noise value: {}'.format(self.noise)) + + # Load default function set + function_set_path = os.path.join(root, "function_sets.csv") + function_set_df = pd.read_csv(function_set_path, index_col=0) + function_set_name = row["function_set"] + self.function_set = function_set_df.loc[function_set_name].tolist()[0].strip().split(',') + + # Prepare status output + output_message = '\n-- Building dataset -----------------\n' + output_message += 'Benchmark path : {}\n'.format(benchmark_path) + output_message += 'Generated data for benchmark : {}\n'.format(name) + output_message += 'Function set path : {}\n'.format(function_set_path) + output_message += 'Function set : {} --> {}\n'.format(function_set_name, self.function_set) + if backup and logdir is not None: + output_message += self.save(logdir) + output_message += '-------------------------------------\n\n' + print(output_message) + + def make_X(self, spec): + """Creates X values based on specification""" + + features = [] + for i in range(1, self.n_input_var + 1): + + # Hierarchy: "all" --> "x{}".format(i) + input_var = "x{}".format(i) + if "all" in spec: + input_var = "all" + elif input_var not in spec: + input_var = "x1" + + if "U" in spec[input_var]: + low, high, n = spec[input_var]["U"] + n = int(n * self.dataset_size_multiplier) + feature = self.rng.uniform(low=low, high=high, size=n) + elif "E" in spec[input_var]: + start, stop, step = spec[input_var]["E"] + if step > stop - start: + n = step + else: + n = int((stop - start)/step) + 1 + n = int(n * self.dataset_size_multiplier) + feature = np.linspace(start=start, stop=stop, num=n, endpoint=True) + else: + raise ValueError("Did not recognize specification for {}: {}.".format(input_var, spec[input_var])) + features.append(feature) + + # Do multivariable combinations + if "E" in spec[input_var] and self.n_input_var > 1: + X = np.array(list(itertools.product(*features))) + else: + X = np.column_stack(features) + + return X + + def make_numpy_expr(self, s): + # This isn't pretty, but unlike sympy's lambdify, this ensures we use + # our protected functions. Otherwise, some expressions may have large + # error even if the functional form is correct due to the training set + # not using protected functions. + + # Replace function names + s = s.replace("ln(", "log(") + s = s.replace("pi", "np.pi") + s = s.replace("pow", "np.power") + for k in function_map.keys(): + s = s.replace(k + '(', "function_map['{}'].function(".format(k)) + + # Replace variable names + for i in reversed(range(self.n_input_var)): + old = "x{}".format(i+1) + new = "x[:, {}]".format(i) + s = s.replace(old, new) + + numpy_expr = lambda x : eval(s) + + return numpy_expr + + def save(self, logdir='./'): + save_path = os.path.join(logdir,'data_{}_n{:.2f}_d{:.0f}_s{}.csv'.format( + self.name, self.noise, self.dataset_size_multiplier, self.seed)) + try: + os.makedirs(logdir, exist_ok=True) + np.savetxt( + save_path, + np.concatenate( + ( + np.hstack((self.X_train, self.y_train[..., np.newaxis])), + np.hstack((self.X_test, self.y_test[..., np.newaxis])) + ), axis=0), + delimiter=',', fmt='%1.5f' + ) + return 'Saved dataset to : {}\n'.format(save_path) + except: + import sys + e = sys.exc_info()[0] + print("WARNING: Could not save dataset: {}".format(e)) + + def plot(self, logdir='./'): + """Plot Dataset with underlying ground truth.""" + if self.X_train.shape[1] == 1: + from matplotlib import pyplot as plt + save_path = os.path.join(logdir,'plot_{}_n{:.2f}_d{:.0f}_s{}.png'.format( + self.name, self.noise, self.dataset_size_multiplier, self.seed)) + + # Draw ground truth expression + bounds = list(list(self.train_spec.values())[0].values())[0][:2] + x = np.linspace(bounds[0], bounds[1], endpoint=True, num=100) + y = self.numpy_expr(x[:, None]) + plt.plot(x, y, color='red', linestyle='dashed') + # Draw the actual points + plt.scatter(self.X_train, self.y_train) + # Add a title + plt.title( + "{} N:{} M:{} S:{}".format( + self.name, self.noise, self.dataset_size_multiplier, self.seed), + fontsize=7) + try: + os.makedirs(logdir, exist_ok=True) + plt.savefig(save_path) + print('Saved plot to : {}'.format(save_path)) + except: + import sys + e = sys.exc_info()[0] + print("WARNING: Could not plot dataset: {}".format(e)) + plt.close() + else: + print("WARNING: Plotting only supported for 2D datasets.") + + +@click.command() +@click.argument("benchmark_source", default="benchmarks.csv") +@click.option('--plot', is_flag=True) +@click.option('--save_csv', is_flag=True) +@click.option('--sweep', is_flag=True) +def main(benchmark_source, plot, save_csv, sweep): + """Plots all benchmark expressions.""" + + regression_path = resource_filename("dsr.task", "regression/") + benchmark_path = os.path.join(regression_path, benchmark_source) + save_dir = os.path.join(regression_path, 'log') + df = pd.read_csv(benchmark_path, encoding="ISO-8859-1") + names = df["name"].to_list() + for name in names: + + if not name.startswith("Nguyen") and not name.startswith("Constant") and not name.startswith("Custom"): + continue + + datasets = [] + + # Noiseless + d = BenchmarkDataset( + name=name, + benchmark_source=benchmark_source) + datasets.append(d) + + # Generate all combinations of noise levels and dataset size multipliers + if sweep and name.startswith("Nguyen"): + noises = [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10] + dataset_size_multipliers = [1.0, 10.0] + for noise in noises: + for dataset_size_multiplier in dataset_size_multipliers: + d = BenchmarkDataset( + name=name, + benchmark_source=benchmark_source, + noise=noise, + dataset_size_multiplier=dataset_size_multiplier, + backup=save_csv, + logdir=save_dir) + datasets.append(d) + + # Plot and/or save datasets + for dataset in datasets: + if plot and dataset.X_train.shape[1] == 1: + dataset.plot(save_dir) + +if __name__ == "__main__": + main() diff --git a/dsr/dsr/task/regression/function_sets.csv b/dsr/dsr/task/regression/function_sets.csv new file mode 100644 index 00000000..8c23cdd3 --- /dev/null +++ b/dsr/dsr/task/regression/function_sets.csv @@ -0,0 +1,13 @@ +name,function_set +Koza,"add,sub,mul,div,sin,cos,exp,log" +CKoza,"add,sub,mul,div,sin,cos,exp,log,const" +KozaPlus1,"add,sub,mul,div,sin,cos,exp,log,1.0" +Korns,"add,sub,mul,div,sin,cos,exp,log,n2,n3,sqrt,tan,tanh,const" +Keijzer,"add,mul,inv,neg,sqrt,const" +KeijzerPlus1,"add,mul,inv,neg,sqrt,1.0,const" +Vladislavleva-A,"add,sub,mul,div,n2" +Vladislavleva-B,"add,sub,mul,div,n2,exp,expneg" +Vladislavleva-C,"add,sub,mul,div,n2,exp,expneg,sin,cos" +None,"add,sub,mul,div,sin,cos,exp,log" +Jin,"add,sub,mul,div,sin,cos,exp,n2,n3,const" +GrammarVAE,"add,mul,div,sin,exp,1.0,2.0,3.0" diff --git a/dsr/dsr/task/regression/regression.py b/dsr/dsr/task/regression/regression.py new file mode 100644 index 00000000..0dbab848 --- /dev/null +++ b/dsr/dsr/task/regression/regression.py @@ -0,0 +1,352 @@ +import numpy as np +import pandas as pd + +import dsr +from dsr.library import Library +from dsr.functions import create_tokens +from dsr.task.regression.dataset import BenchmarkDataset + + +def make_regression_task(name, function_set, dataset, metric="inv_nrmse", + metric_params=(1.0,), extra_metric_test=None, extra_metric_test_params=(), + reward_noise=0.0, reward_noise_type="r", threshold=1e-12, + normalize_variance=False, protected=False): + """ + Factory function for regression rewards. This includes closures for a + dataset and regression metric (e.g. inverse NRMSE). Also sets regression- + specific metrics to be used by Programs. + + Parameters + ---------- + name : str or None + Name of regression benchmark, if using benchmark dataset. + + function_set : list or None + List of allowable functions. If None, uses function_set according to + benchmark dataset. + + dataset : dict, str, or tuple + If dict: .dataset.BenchmarkDataset kwargs. + If str: filename of dataset. + If tuple: (X, y) data + + metric : str + Name of reward function metric to use. + + metric_params : list + List of metric-specific parameters. + + extra_metric_test : str + Name of extra function metric to use for testing. + + extra_metric_test_params : list + List of metric-specific parameters for extra test metric. + + reward_noise : float + Noise level to use when computing reward. + + reward_noise_type : "y_hat" or "r" + "y_hat" : N(0, reward_noise * y_rms_train) is added to y_hat values. + "r" : N(0, reward_noise) is added to r. + + normalize_variance : bool + If True and reward_noise_type=="r", reward is multiplied by + 1 / sqrt(1 + 12*reward_noise**2) (We assume r is U[0,1]). + + protected : bool + Whether to use protected functions. + + threshold : float + Threshold of NMSE on noiseless data used to determine success. + + Returns + ------- + + task : Task + Dynamically created Task object whose methods contains closures. + """ + + X_test = y_test = y_test_noiseless = None + + # Benchmark dataset config + if isinstance(dataset, dict): + dataset["name"] = name + benchmark = BenchmarkDataset(**dataset) + X_train = benchmark.X_train + y_train = benchmark.y_train + X_test = benchmark.X_test + y_test = benchmark.y_test + y_test_noiseless = benchmark.y_test_noiseless + + # Unless specified, use the benchmark's default function_set + if function_set is None: + function_set = benchmark.function_set + + # Dataset filename + elif isinstance(dataset, str): + df = pd.read_csv(dataset, header=None) # Assuming data file does not have header rows + X_train = df.values[:, :-1] + y_train = df.values[:, -1] + + # sklearn-like (X, y) data + elif isinstance(dataset, tuple): + X_train = dataset[0] + y_train = dataset[1] + + if X_test is None: + X_test = X_train + y_test = y_train + y_test_noiseless = y_test + + if function_set is None: + print("WARNING: Function set not provided. Using default set.") + function_set = ["add", "sub", "mul", "div", "sin", "cos", "exp", "log"] + + # Save time by only computing these once + var_y_test = np.var(y_test) + var_y_test_noiseless = np.var(y_test_noiseless) + + # Define closures for metric + metric, invalid_reward, max_reward = make_regression_metric(metric, y_train, *metric_params) + if extra_metric_test is not None: + print("Setting extra test metric to {}.".format(extra_metric_test)) + metric_test, _, _ = make_regression_metric(extra_metric_test, y_test, *extra_metric_test_params) + assert reward_noise >= 0.0, "Reward noise must be non-negative." + if reward_noise: + assert reward_noise_type in ["y_hat", "r"], "Reward noise type not recognized." + rng = np.random.RandomState(0) + y_rms_train = np.sqrt(np.mean(y_train ** 2)) + if reward_noise_type == "y_hat": + scale = reward_noise * y_rms_train + elif reward_noise_type == "r": + scale = reward_noise + + def reward(p): + + # Compute estimated values + y_hat = p.execute(X_train) + + # For invalid expressions, return invalid_reward + if p.invalid: + return invalid_reward + + ### Observation noise + # For reward_noise_type == "y_hat", success must always be checked to + # ensure success cases aren't overlooked due to noise. If successful, + # return max_reward. + if reward_noise and reward_noise_type == "y_hat": + if p.evaluate.get("success"): + return max_reward + y_hat += rng.normal(loc=0, scale=scale, size=y_hat.shape) + + # Compute metric + r = metric(y_train, y_hat) + + ### Direct reward noise + # For reward_noise_type == "r", success can for ~max_reward metrics be + # confirmed before adding noise. If successful, must return np.inf to + # avoid overlooking success cases. + if reward_noise and reward_noise_type == "r": + if r >= max_reward - 1e-5 and p.evaluate.get("success"): + return np.inf + r += rng.normal(loc=0, scale=scale) + if normalize_variance: + r /= np.sqrt(1 + 12*scale**2) + + return r + + + def evaluate(p): + + # Compute predictions on test data + y_hat = p.execute(X_test) + if p.invalid: + nmse_test = None + nmse_test_noiseless = None + success = False + + else: + # NMSE on test data (used to report final error) + nmse_test = np.mean((y_test - y_hat)**2) / var_y_test + + # NMSE on noiseless test data (used to determine recovery) + nmse_test_noiseless = np.mean((y_test_noiseless - y_hat)**2) / var_y_test_noiseless + + # Success is defined by NMSE on noiseless test data below a threshold + success = nmse_test_noiseless < threshold + + info = { + "nmse_test" : nmse_test, + "nmse_test_noiseless" : nmse_test_noiseless, + "success" : success + } + + if extra_metric_test is not None: + if p.invalid: + m_test = None + m_test_noiseless = None + else: + m_test = metric_test(y_test, y_hat) + m_test_noiseless = metric_test(y_test_noiseless, y_hat) + + info.update( + { + extra_metric_test : m_test, + extra_metric_test + '_noiseless' : m_test_noiseless + } + ) + + return info + + tokens = create_tokens(n_input_var=X_train.shape[1], + function_set=function_set, + protected=protected) + library = Library(tokens) + + stochastic = reward_noise > 0.0 + + extra_info = {} + + task = dsr.task.Task(reward_function=reward, + evaluate=evaluate, + library=library, + stochastic=stochastic, + extra_info=extra_info) + + return task + + +def make_regression_metric(name, y_train, *args): + """ + Factory function for a regression metric. This includes a closures for + metric parameters and the variance of the training data. + + Parameters + ---------- + + name : str + Name of metric. See all_metrics for supported metrics. + + args : args + Metric-specific parameters + + Returns + ------- + + metric : function + Regression metric mapping true and estimated values to a scalar. + + invalid_reward: float or None + Reward value to use for invalid expression. If None, the training + algorithm must handle it, e.g. by rejecting the sample. + + max_reward: float + Maximum possible reward under this metric. + """ + + var_y = np.var(y_train) + + all_metrics = { + + # Negative mean squared error + # Range: [-inf, 0] + # Value = -var(y) when y_hat == mean(y) + "neg_mse" : (lambda y, y_hat : -np.mean((y - y_hat)**2), + 0), + + # Negative root mean squared error + # Range: [-inf, 0] + # Value = -sqrt(var(y)) when y_hat == mean(y) + "neg_rmse" : (lambda y, y_hat : -np.sqrt(np.mean((y - y_hat)**2)), + 0), + + # Negative normalized mean squared error + # Range: [-inf, 0] + # Value = -1 when y_hat == mean(y) + "neg_nmse" : (lambda y, y_hat : -np.mean((y - y_hat)**2)/var_y, + 0), + + # Negative normalized root mean squared error + # Range: [-inf, 0] + # Value = -1 when y_hat == mean(y) + "neg_nrmse" : (lambda y, y_hat : -np.sqrt(np.mean((y - y_hat)**2)/var_y), + 0), + + # (Protected) negative log mean squared error + # Range: [-inf, 0] + # Value = -log(1 + var(y)) when y_hat == mean(y) + "neglog_mse" : (lambda y, y_hat : -np.log(1 + np.mean((y - y_hat)**2)), + 0), + + # (Protected) inverse mean squared error + # Range: [0, 1] + # Value = 1/(1 + args[0]*var(y)) when y_hat == mean(y) + "inv_mse" : (lambda y, y_hat : 1/(1 + args[0]*np.mean((y - y_hat)**2)), + 1), + + # (Protected) inverse normalized mean squared error + # Range: [0, 1] + # Value = 1/(1 + args[0]) when y_hat == mean(y) + "inv_nmse" : (lambda y, y_hat : 1/(1 + args[0]*np.mean((y - y_hat)**2)/var_y), + 1), + + # (Protected) inverse normalized root mean squared error + # Range: [0, 1] + # Value = 1/(1 + args[0]) when y_hat == mean(y) + "inv_nrmse" : (lambda y, y_hat : 1/(1 + args[0]*np.sqrt(np.mean((y - y_hat)**2)/var_y)), + 1), + + # Fraction of predicted points within p0*abs(y) + p1 band of the true value + # Range: [0, 1] + "fraction" : (lambda y, y_hat : np.mean(abs(y - y_hat) < args[0]*abs(y) + args[1]), + 2), + + # Pearson correlation coefficient + # Range: [0, 1] + "pearson" : (lambda y, y_hat : scipy.stats.pearsonr(y, y_hat)[0], + 0), + + # Spearman correlation coefficient + # Range: [0, 1] + "spearman" : (lambda y, y_hat : scipy.stats.spearmanr(y, y_hat)[0], + 0) + } + + assert name in all_metrics, "Unrecognized reward function name." + assert len(args) == all_metrics[name][1], "For {}, expected {} reward function parameters; received {}.".format(name,all_metrics[name][1], len(args)) + metric = all_metrics[name][0] + + # For negative MSE-based rewards, invalid reward is the value of the reward function when y_hat = mean(y) + # For inverse MSE-based rewards, invalid reward is 0.0 + # For non-MSE-based rewards, invalid reward is the minimum value of the reward function's range + all_invalid_rewards = { + "neg_mse" : -var_y, + "neg_rmse" : -np.sqrt(var_y), + "neg_nmse" : -1.0, + "neg_nrmse" : -1.0, + "neglog_mse" : -np.log(1 + var_y), + "inv_mse" : 0.0, #1/(1 + args[0]*var_y), + "inv_nmse" : 0.0, #1/(1 + args[0]), + "inv_nrmse" : 0.0, #1/(1 + args[0]), + "fraction" : 0.0, + "pearson" : 0.0, + "spearman" : 0.0 + } + invalid_reward = all_invalid_rewards[name] + + all_max_rewards = { + "neg_mse" : 0.0, + "neg_rmse" : 0.0, + "neg_nmse" : 0.0, + "neg_nrmse" : 0.0, + "neglog_mse" : 0.0, + "inv_mse" : 1.0, + "inv_nmse" : 1.0, + "inv_nrmse" : 1.0, + "fraction" : 1.0, + "pearson" : 1.0, + "spearman" : 1.0 + } + max_reward = all_max_rewards[name] + + return metric, invalid_reward, max_reward diff --git a/dsr/dsr/task/regression/sklearn.py b/dsr/dsr/task/regression/sklearn.py new file mode 100644 index 00000000..c3777a30 --- /dev/null +++ b/dsr/dsr/task/regression/sklearn.py @@ -0,0 +1,35 @@ +from copy import deepcopy + +from sklearn.base import BaseEstimator, RegressorMixin +from sklearn.utils.validation import check_is_fitted + +from dsr import DeepSymbolicOptimizer + + +class DeepSymbolicRegressor(DeepSymbolicOptimizer, + BaseEstimator, RegressorMixin): + """ + Sklearn interface for deep symbolic regression. + """ + + def __init__(self, config=None): + DeepSymbolicOptimizer.__init__(self, config) + + def fit(self, X, y): + + # Update the Task + config = deepcopy(self.config) + config["task"]["task_type"] = "regression" + config["task"]["dataset"] = (X, y) + self.update_config(config) + + train_result = self.train() + self.program_ = train_result["program"] + + return self + + def predict(self, X): + + check_is_fitted(self, "program_") + + return self.program_.execute(X) diff --git a/dsr/dsr/task/regression/test_sklearn.py b/dsr/dsr/task/regression/test_sklearn.py new file mode 100644 index 00000000..193bf6c9 --- /dev/null +++ b/dsr/dsr/task/regression/test_sklearn.py @@ -0,0 +1,24 @@ +"""Tests for sklearn interface.""" + +import pytest +import numpy as np + +from dsr import DeepSymbolicRegressor +from dsr.test.generate_test_data import CONFIG_TRAINING_OVERRIDE + + +@pytest.fixture +def model(): + return DeepSymbolicRegressor("config.json") + + +def test_task(model): + """Test regression for various configs.""" + + # Generate some data + np.random.seed(0) + X = np.random.random(size=(10, 3)) + y = np.random.random(size=(10,)) + + model.config_training.update(CONFIG_TRAINING_OVERRIDE) + model.fit(X, y) diff --git a/dsr/dsr/task/task.py b/dsr/dsr/task/task.py new file mode 100644 index 00000000..8574cb08 --- /dev/null +++ b/dsr/dsr/task/task.py @@ -0,0 +1,86 @@ +"""Factory functions for generating symbolic search tasks.""" + +from dataclasses import dataclass +from typing import Callable, List, Dict, Any + +from dsr.task.regression.regression import make_regression_task +from dsr.program import Program +from dsr.library import Library + + +@dataclass(frozen=True) +class Task: + """ + Data object specifying a symbolic search task. + + Attributes + ---------- + reward_function : function + Reward function mapping program.Program object to scalar. Includes + test argument for train vs test evaluation. + + eval_function : function + Evaluation function mapping program.Program object to a dict of task- + specific evaluation metrics (primitives). Special optional key "success" + is used for determining early stopping during training. + + library : Library + Library of Tokens. + + stochastic : bool + Whether the reward function of the task is stochastic. + + extra_info : dict + Extra task-specific info, e.g. reference to symbolic policies for + control task. + """ + + reward_function: Callable[[Program], float] + evaluate: Callable[[Program], float] + library: Library + stochastic: bool + extra_info: Dict[str, Any] + + +def make_task(task_type, **config_task): + """ + Factory function for Task object. + + Parameters + ---------- + + task_type : str + Type of task: + "regression" : Symbolic regression task. + + config_task : kwargs + Task-specific arguments. See specifications of task_dict. Special key + "name" is required, which defines the benchmark (i.e. dataset for + regression). + + Returns + ------- + + task : Task + Task object. + """ + + # Dictionary from task name to task factory function + task_dict = { + "regression" : make_regression_task, + } + + task = task_dict[task_type](**config_task) + return task + + +def set_task(config_task): + """Helper function to make set the Program class Task and execute function + from task config.""" + + # Use of protected functions is the same for all tasks, so it's handled separately + protected = config_task["protected"] if "protected" in config_task else False + + Program.set_execute(protected) + task = make_task(**config_task) + Program.set_task(task) diff --git a/dsr/dsr/test/__init__.py b/dsr/dsr/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dsr/dsr/test/data/test_model.data-00000-of-00001 b/dsr/dsr/test/data/test_model.data-00000-of-00001 new file mode 100644 index 00000000..ceaaf9ba Binary files /dev/null and b/dsr/dsr/test/data/test_model.data-00000-of-00001 differ diff --git a/dsr/dsr/test/data/test_model.index b/dsr/dsr/test/data/test_model.index new file mode 100644 index 00000000..9cc628d1 Binary files /dev/null and b/dsr/dsr/test/data/test_model.index differ diff --git a/dsr/dsr/test/data/test_model.meta b/dsr/dsr/test/data/test_model.meta new file mode 100644 index 00000000..bb26087f Binary files /dev/null and b/dsr/dsr/test/data/test_model.meta differ diff --git a/dsr/dsr/test/generate_test_data.py b/dsr/dsr/test/generate_test_data.py new file mode 100644 index 00000000..b0b68005 --- /dev/null +++ b/dsr/dsr/test/generate_test_data.py @@ -0,0 +1,28 @@ +"""Generate model parity test case data for DeepSymbolicOptimizer.""" + +from pkg_resources import resource_filename + +from dsr import DeepSymbolicOptimizer + + +# Shorter config run for parity test +CONFIG_TRAINING_OVERRIDE = { + "n_samples" : 1000, + "batch_size" : 100 +} + + +def main(): + + # Train the model + model = DeepSymbolicOptimizer("config.json") + model.config_training.update(CONFIG_TRAINING_OVERRIDE) + model.train() + + # Save the model + save_path = resource_filename("dsr.test", "data/test_model") + model.save(save_path) + + +if __name__ == "__main__": + main() diff --git a/dsr/dsr/test/test_core.py b/dsr/dsr/test/test_core.py new file mode 100644 index 00000000..ebdd55ca --- /dev/null +++ b/dsr/dsr/test/test_core.py @@ -0,0 +1,47 @@ +"""Test cases for DeepSymbolicOptimizer on each Task.""" + +from pkg_resources import resource_filename + +import pytest +import tensorflow as tf +import numpy as np + +from dsr import DeepSymbolicOptimizer +from dsr.test.generate_test_data import CONFIG_TRAINING_OVERRIDE + + +@pytest.fixture +def model(): + return DeepSymbolicOptimizer("config.json") + + +@pytest.fixture +def cached_results(model): + save_path = resource_filename("dsr.test", "data/test_model") + model.load(save_path) + results = model.sess.run(tf.trainable_variables()) + + return results + + +@pytest.mark.parametrize("config", ["config.json"]) +def test_task(model, config): + """Test that Tasks do not crash for various configs.""" + + model.update_config(config) + model.config_training.update({"n_samples" : 10, + "batch_size" : 5 + }) + model.train() + + +def test_model_parity(model, cached_results): + """Compare results to last""" + + model.config_training.update(CONFIG_TRAINING_OVERRIDE) + model.train() + results = model.sess.run(tf.trainable_variables()) + + cached_results = np.concatenate([a.flatten() for a in cached_results]) + results = np.concatenate([a.flatten() for a in results]) + np.testing.assert_array_almost_equal(results, cached_results) diff --git a/dsr/dsr/test/test_prior.py b/dsr/dsr/test/test_prior.py new file mode 100644 index 00000000..b94293fc --- /dev/null +++ b/dsr/dsr/test/test_prior.py @@ -0,0 +1,426 @@ +"""Tests for various Priors.""" + +import pytest + +from dsr.core import DeepSymbolicOptimizer +from dsr.test.generate_test_data import CONFIG_TRAINING_OVERRIDE +from dsr.program import from_tokens, Program +from dsr.memory import Batch +from dsr.controller import parents_siblings + +import numpy as np + + +BATCH_SIZE = 1000 + + +@pytest.fixture +def model(): + return DeepSymbolicOptimizer("config.json") + + +def assert_invalid(model, cases): + cases = [Program.library.actionize(case) for case in cases] + batch = make_batch(model, cases) + logp = model.controller.compute_probs(batch, log=True) + print(batch) + assert all(np.isneginf(logp)), \ + "Found invalid case with probability > 0." + + +def assert_valid(model, cases): + cases = [Program.library.actionize(case) for case in cases] + batch = make_batch(model, cases) + logp = model.controller.compute_probs(batch, log=True) + assert all(logp > -np.inf), \ + "Found valid case with probability 0." + + +def make_sequence(model, L): + """Utility function to generate a sequence of length L""" + X = Program.library.input_tokens[0] + U = Program.library.unary_tokens[0] + B = Program.library.binary_tokens[0] + num_B = (L - 1) // 2 + num_U = int(L % 2 == 0) + num_X = num_B + 1 + case = [B] * num_B + [U] * num_U + [X] * num_X + assert len(case) == L + case = case[:model.controller.max_length] + return case + + +def make_batch(model, actions): + """ + Utility function to generate a Batch from (unfinished) actions. + + This uses essentially the same logic as controller.py's loop_fn, except + actions are prescribed instead of samples. Is there a way to refactor these + with less code reuse? + """ + + batch_size = len(actions) + L = model.controller.max_length + + # Pad actions to maximum length + actions = np.array([np.pad(a, (0, L - len(a)), "constant") + for a in actions], dtype=np.int32) + + # Initialize obs + prev_actions = np.zeros_like(actions) + parents = np.zeros_like(actions) + siblings = np.zeros_like(actions) + + arities = Program.library.arities + parent_adjust = Program.library.parent_adjust + + # Set initial values + empty_parent = np.max(parent_adjust) + 1 + empty_sibling = len(arities) + action = empty_sibling + parent, sibling = empty_parent, empty_sibling + prior = np.array([model.prior.initial_prior()] * batch_size) + + priors = [] + lengths = np.zeros(batch_size, dtype=np.int32) + finished = np.zeros(batch_size, dtype=np.bool_) + dangling = np.ones(batch_size, dtype=np.int32) + for i in range(L): + partial_actions = actions[:, :(i + 1)] + + # Set prior and obs used to generate this action + prev_actions[:, i] = action + parents[:, i] = parent + siblings[:, i] = sibling + priors.append(prior) + + # Compute next obs and prior + action = actions[:, i] + parent, sibling = parents_siblings(tokens=partial_actions, + arities=arities, + parent_adjust=parent_adjust) + dangling += arities[action] - 1 + prior = model.prior(partial_actions, parent, sibling, dangling) + finished = np.where(np.logical_and(dangling == 0, lengths == 0), + True, + False) + lengths = np.where(finished, + i + 1, + lengths) + + lengths = np.where(lengths == 0, L, lengths) + obs = [prev_actions, parents, siblings] + priors = np.array(priors).swapaxes(0, 1) + rewards = np.zeros(batch_size, dtype=np.float32) + batch = Batch(actions, obs, priors, lengths, rewards) + return batch + + +def test_repeat(model): + """Test cases for RepeatConstraint.""" + + model.config_prior = {} # Turn off all other Priors + model.config_prior["repeat"] = { + "tokens" : ["sin", "cos"], + "min_" : None, # Not yet supported + "max_" : 2 + } + model.config_training.update(CONFIG_TRAINING_OVERRIDE) + model.train() + + invalid_cases = [] + invalid_cases.append(["sin"] * 3) + invalid_cases.append(["cos"] * 3) + invalid_cases.append(["sin", "cos", "sin"]) + invalid_cases.append(["mul", "sin"] * 3) + invalid_cases.append(["mul", "sin", "x1", "sin", "mul", "cos"]) + assert_invalid(model, invalid_cases) + + valid_cases = [] + valid_cases.append(["mul"] + ["sin"] * 2 + ["log"] * 2) + valid_cases.append(["sin"] + ["mul", "exp"] * 4 + ["cos"]) + assert_valid(model, valid_cases) + + +def test_descendant(model): + """Test cases for descendant RelationalConstraint.""" + + descendants = "add,mul" + ancestors = "exp,log" + + library = Program.library + model.config_prior = {} # Turn off all other Priors + model.config_prior["relational"] = { + "targets" : descendants, + "effectors" : ancestors, + "relationship" : "descendant" + } + + model.config_training.update(CONFIG_TRAINING_OVERRIDE) + model.train() + + descendants = library.actionize(descendants) + ancestors = library.actionize(ancestors) + + U = [i for i in library.unary_tokens + if i not in ancestors and i not in descendants][0] + B = [i for i in library.binary_tokens + if i not in ancestors and i not in descendants][0] + + # For each D-A combination, generate invalid cases where A is an ancestor + # of D + invalid_cases = [] + for A in ancestors: + for D in descendants: + invalid_cases.append([A, D]) + invalid_cases.append([A] * 10 + [D]) + invalid_cases.append([A] + [U, B] * 5 + [D]) + assert_invalid(model, invalid_cases) + + # For each D-A combination, generate valid cases where A is not an ancestor + # of D + valid_cases = [] + for A in ancestors: + for D in descendants: + valid_cases.append([U, D]) + valid_cases.append([D] + [U] * 10 + [A]) + assert_valid(model, valid_cases) + + +def test_trig(model): + """Test cases for TrigConstraint.""" + + library = Program.library + model.config_prior = {} # Turn off all other Priors + model.config_prior["trig"] = {} + model.config_training.update(CONFIG_TRAINING_OVERRIDE) + model.train() + + X = library.input_tokens[0] + U = [i for i in library.unary_tokens + if i not in library.trig_tokens][0] + B = library.binary_tokens[0] + + # For each trig-trig combination, generate invalid cases where one Token is + # a descendant the other + invalid_cases = [] + trig_tokens = library.trig_tokens + for t1 in trig_tokens: + for t2 in trig_tokens: + invalid_cases.append([t1, t2, X]) # E.g. sin(cos(x)) + invalid_cases.append([t1, B, X, t2, X]) # E.g. sin(x + cos(x)) + invalid_cases.append([t1] + [U] * 10 + [t2, X]) + assert_invalid(model, invalid_cases) + + # For each trig-trig pair, generate valid cases where one Token is the + # sibling the other + valid_cases = [] + for t1 in trig_tokens: + for t2 in trig_tokens: + valid_cases.append([B, U, t1, X, t2, X]) # E.g. log(sin(x)) + cos(x) + valid_cases.append([B, t1, X, t2, X]) # E.g. sin(x) + cos(x) + valid_cases.append([U] + valid_cases[-1]) # E.g. log(sin(x) + cos(x)) + assert_valid(model, valid_cases) + + +def test_child(model): + """Test cases for child RelationalConstraint.""" + + library = Program.library + parents = library.actionize("log,exp,mul") + children = library.actionize("exp,log,sin") + + model.config_prior = {} # Turn off all other Priors + model.config_prior["relational"] = { + "targets" : children, + "effectors" : parents, + "relationship" : "child" + } + model.config_training.update(CONFIG_TRAINING_OVERRIDE) + model.train() + + # For each parent-child pair, generate invalid cases where child is one of + # parent's children. + X = library.input_tokens[0] + assert X not in children, \ + "Error in test case specification. Do not include x1 in children." + invalid_cases = [] + for p, c in zip(parents, children): + arity = library.tokenize(p)[0].arity + for i in range(arity): + before = i + after = arity - i - 1 + case = [p] + [X] * before + [c] + [X] * after + invalid_cases.append(case) + assert_invalid(model, invalid_cases) + + +def test_uchild(model): + """Test cases for uchild RelationalConstraint.""" + + library = Program.library + targets = library.actionize("x1") + effectors = library.actionize("sub,div") # i.e. no x1 - x1 or x1 / x1 + + model.config_prior = {} # Turn off all other Priors + model.config_prior["relational"] = { + "targets" : targets, + "effectors" : effectors, + "relationship" : "uchild" + } + model.config_training.update(CONFIG_TRAINING_OVERRIDE) + model.train() + + # Generate valid test cases + valid_cases = [] + valid_cases.append("mul,x1,x1") + valid_cases.append("sub,x1,sub,x1,sub,x1,sin,x1") + valid_cases.append("sub,sub,sub,x1,sin,x1,x1") + valid_cases.append("sub,sin,x1,sin,x1") + assert_valid(model, valid_cases) + + # Generate invalid test cases + invalid_cases = [] + invalid_cases.append("add,sub,x1,x1,sin,x1") + invalid_cases.append("sin,sub,x1,x1") + invalid_cases.append("sub,sub,sub,x1,x1,x1") + assert_invalid(model, invalid_cases) + + +def test_const(model): + """Test cases for ConstConstraint.""" + + # This test case needs the const Token before creating the model + model.config["task"]["name"] = "Nguyen-1c" + model.pool = model.make_pool() # Resets Program.task with new Task + + library = Program.library + model.config_prior = {} # Turn off all other Priors + model.config_prior["const"] = {} + model.config_training.update(CONFIG_TRAINING_OVERRIDE) + model.train() + + # Generate valid test cases + valid_cases = [] + valid_cases.append("mul,const,x1") + valid_cases.append("sub,const,sub,const,x1") + assert_valid(model, valid_cases) + + # Generate invalid test cases + invalid_cases = [] + invalid_cases.append("sin,const") + invalid_cases.append("mul,const,const") + invalid_cases.append("sin,add,const,const") + assert_invalid(model, invalid_cases) + + +def test_sibling(model): + """Test cases for sibling RelationalConstraint.""" + + library = Program.library + targets = library.actionize("sin,cos") + effectors = library.actionize("x1") + + model.config_prior = {} # Turn off all other Priors + model.config_prior["relational"] = { + "targets" : targets, + "effectors" : effectors, + "relationship" : "sibling" + } + model.config_training.update(CONFIG_TRAINING_OVERRIDE) + model.train() + + # Generate valid test cases + valid_cases = [] + valid_cases.append("mul,sin,x1,cos,x1") + valid_cases.append("sin,cos,x1") + valid_cases.append("add,add,sin,mul,x1,x1,cos,x1,x1") + assert_valid(model, valid_cases) + + # Generate invalid test cases + invalid_cases = [] + invalid_cases.append("add,x1,sin,x1") + invalid_cases.append("add,sin,x1,x1") + invalid_cases.append("add,add,sin,mul,x1,x1,x1,sin,x1") + assert_invalid(model, invalid_cases) + + +def test_inverse(model): + """Test cases for InverseConstraint.""" + + library = Program.library + model.config_prior = {} # Turn off all other Priors + model.config_prior["inverse"] = {} + model.config_training.update(CONFIG_TRAINING_OVERRIDE) + model.train() + + # Generate valid cases + valid_cases = [] + valid_cases.append("exp,sin,log,cos,exp,x1") + valid_cases.append("mul,sin,log,x1,exp,cos,x1") + assert_valid(model, valid_cases) + + # Generate invalid cases for each inverse + invalid_cases = [] + invalid_cases.append("mul,sin,x1,exp,log,x1") + for t1, t2 in library.inverse_tokens.items(): + invalid_cases.append([t1, t2]) + invalid_cases.append([t2, t1]) + assert_invalid(model, invalid_cases) + + +@pytest.mark.parametrize("minmax", [(10, 10), (4, 30), (None, 10), (10, None)]) +def test_length(model, minmax): + """Test cases for LengthConstraint.""" + + min_, max_ = minmax + model.config_prior = {} # Turn off all other Priors + model.config_prior["length"] = {"min_" : min_, "max_" : max_} + model.config_training.update(CONFIG_TRAINING_OVERRIDE) + model.train() + + # First, check that randomly generated samples do not violate constraints + actions, _, _ = model.controller.sample(BATCH_SIZE) + programs = [from_tokens(a, optimize=True) for a in actions] + lengths = [len(p.traversal) for p in programs] + if min_ is not None: + min_L = min(lengths) + assert min_L >= min_, \ + "Found min length {} but constrained to {}.".format(min_L, min_) + if max_ is not None: + max_L = max(lengths) + assert max_L <= max_, \ + "Found max length {} but constrained to {}.".format(max_L, max_) + + # Next, check valid and invalid test cases based on min_ and max_ + # Valid test cases should not be constrained + # Invalid test cases should all be constrained + valid_cases = [] + invalid_cases = [] + + # Initial prior prevents length-1 tokens + case = make_sequence(model, 1) + invalid_cases.append(case) + + if min_ is not None: + # Generate an invalid case that is one Token too short + if min_ > 1: + case = make_sequence(model, min_ - 1) + invalid_cases.append(case) + + # Generate a valid case that is exactly the minimum length + case = make_sequence(model, min_) + valid_cases.append(case) + + if max_ is not None: + # Generate an invalid case that is one Token too long (which will be + # truncated to dangling == 1) + case = make_sequence(model, max_ + 1) + invalid_cases.append(case) + + # Generate a valid case that is exactly the maximum length + case = make_sequence(model, max_) + valid_cases.append(case) + + assert_valid(model, valid_cases) + assert_invalid(model, invalid_cases) diff --git a/dsr/dsr/train.py b/dsr/dsr/train.py new file mode 100644 index 00000000..bd819eb3 --- /dev/null +++ b/dsr/dsr/train.py @@ -0,0 +1,508 @@ +"""Defines main training loop for deep symbolic regression.""" + +import os +import multiprocessing +from itertools import compress +from datetime import datetime +from collections import defaultdict + +import tensorflow as tf +import pandas as pd +import numpy as np + +from dsr.program import Program, from_tokens +from dsr.utils import empirical_entropy, is_pareto_efficient, setup_output_files +from dsr.memory import Batch, make_queue + +# Ignore TensorFlow warnings +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +# Set TensorFlow seed +tf.random.set_random_seed(0) + + +# Work for multiprocessing pool: optimize constants and compute reward +def work(p): + optimized_constants = p.optimize() + return optimized_constants, p.base_r + + +def hof_work(p): + return [p.r, p.base_r, p.count, repr(p.sympy_expr), repr(p), p.evaluate] + + +def pf_work(p): + return [p.complexity_eureqa, p.r, p.base_r, p.count, repr(p.sympy_expr), repr(p), p.evaluate] + + +def learn(sess, controller, pool, + logdir="./log", n_epochs=None, n_samples=1e6, + batch_size=1000, complexity="length", complexity_weight=0.001, + const_optimizer="minimize", const_params=None, alpha=0.1, + epsilon=0.01, n_cores_batch=1, verbose=True, summary=True, + output_file=None, save_all_r=False, baseline="ewma_R", + b_jumpstart=True, early_stopping=False, hof=10, eval_all=False, + pareto_front=False, debug=0): + """ + Executes the main training loop. + + Parameters + ---------- + sess : tf.Session + TenorFlow Session object. + + controller : dsr.controller.Controller + Controller object used to generate Programs. + + pool : multiprocessing.Pool or None + Pool to parallelize reward computation. For the control task, each + worker should have its own TensorFlow model. If None, a Pool will be + generated if n_cores_batch > 1. + + logdir : str, optional + Name of log directory. + + n_epochs : int or None, optional + Number of epochs to train when n_samples is None. + + n_samples : int or None, optional + Total number of expressions to sample when n_epochs is None. In this + case, n_epochs = int(n_samples / batch_size). + + batch_size : int, optional + Number of sampled expressions per epoch. + + complexity : str, optional + Complexity penalty name. + + complexity_weight : float, optional + Coefficient for complexity penalty. + + const_optimizer : str or None, optional + Name of constant optimizer. + + const_params : dict, optional + Dict of constant optimizer kwargs. + + alpha : float, optional + Coefficient of exponentially-weighted moving average of baseline. + + epsilon : float or None, optional + Fraction of top expressions used for training. None (or + equivalently, 1.0) turns off risk-seeking. + + n_cores_batch : int, optional + Number of cores to spread out over the batch for constant optimization + and evaluating reward. If -1, uses multiprocessing.cpu_count(). + + verbose : bool, optional + Whether to print progress. + + summary : bool, optional + Whether to write TensorFlow summaries. + + output_file : str, optional + Filename to write results for each iteration. + + save_all_r : bool, optional + Whether to save all rewards for each iteration. + + baseline : str, optional + Type of baseline to use: grad J = (R - b) * grad-log-prob(expression). + Choices: + (1) "ewma_R" : b = EWMA() + (2) "R_e" : b = R_e + (3) "ewma_R_e" : b = EWMA(R_e) + (4) "combined" : b = R_e + EWMA( - R_e) + In the above, is the sample average _after_ epsilon sub-sampling and + R_e is the (1-epsilon)-quantile estimate. + + b_jumpstart : bool, optional + Whether EWMA part of the baseline starts at the average of the first + iteration. If False, the EWMA starts at 0.0. + + early_stopping : bool, optional + Whether to stop early if stopping criteria is reached. + + hof : int or None, optional + If not None, number of top Programs to evaluate after training. + + eval_all : bool, optional + If True, evaluate all Programs. While expensive, this is useful for + noisy data when you can't be certain of success solely based on reward. + If False, only the top Program is evaluated each iteration. + + pareto_front : bool, optional + If True, compute and save the Pareto front at the end of training. + + debug : int, optional + Debug level, also passed to Controller. 0: No debug. 1: Print initial + parameter means. 2: Print parameter means each step. + + Returns + ------- + result : dict + A dict describing the best-fit expression (determined by base_r). + """ + + # Config assertions and warnings + assert n_samples is None or n_epochs is None, "At least one of 'n_samples' or 'n_epochs' must be None." + + # Create the summary writer + if summary: + timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S") + summary_dir = os.path.join("summary", timestamp) + writer = tf.summary.FileWriter(summary_dir, sess.graph) + + # Create log file + if output_file is not None: + all_r_output_file, hof_output_file, pf_output_file = setup_output_files(logdir, output_file) + else: + all_r_output_file = hof_output_file = pf_output_file = None + + # Set the complexity functions + Program.set_complexity_penalty(complexity, complexity_weight) + + # Set the constant optimizer + const_params = const_params if const_params is not None else {} + Program.set_const_optimizer(const_optimizer, **const_params) + + # Initialize compute graph + sess.run(tf.global_variables_initializer()) + + if debug: + tvars = tf.trainable_variables() + def print_var_means(): + tvars_vals = sess.run(tvars) + for var, val in zip(tvars, tvars_vals): + print(var.name, "mean:", val.mean(),"var:", val.var()) + + # Create the pool of workers, if pool is not already given + if pool is None: + if n_cores_batch == -1: + n_cores_batch = multiprocessing.cpu_count() + if n_cores_batch > 1: + pool = multiprocessing.Pool(n_cores_batch) + + # Create the priority queue + k = controller.pqt_k + if controller.pqt and k is not None and k > 0: + priority_queue = make_queue(priority=True, capacity=k) + else: + priority_queue = None + + if debug >= 1: + print("\nInitial parameter means:") + print_var_means() + + base_r_history = None + + # Main training loop + p_final = None + base_r_best = -np.inf + r_best = -np.inf + prev_r_best = None + prev_base_r_best = None + ewma = None if b_jumpstart else 0.0 # EWMA portion of baseline + n_epochs = n_epochs if n_epochs is not None else int(n_samples / batch_size) + all_r = np.zeros(shape=(n_epochs, batch_size), dtype=np.float32) + + for step in range(n_epochs): + + # Set of str representations for all Programs ever seen + s_history = set(Program.cache.keys()) + + # Sample batch of expressions from controller + # Shape of actions: (batch_size, max_length) + # Shape of obs: [(batch_size, max_length)] * 3 + # Shape of priors: (batch_size, max_length, n_choices) + actions, obs, priors = controller.sample(batch_size) + + # Instantiate, optimize, and evaluate expressions + if pool is None: + programs = [from_tokens(a, optimize=True) for a in actions] + else: + # To prevent interfering with the cache, un-optimized programs are + # first generated serially. Programs that need optimizing are + # optimized optimized in parallel. Since multiprocessing operates on + # copies of programs, we manually set the optimized constants and + # base reward after the pool joins. + programs = [from_tokens(a, optimize=False) for a in actions] + + # Filter programs that have not yet computed base_r + programs_to_optimize = list(set([p for p in programs if "base_r" not in p.__dict__])) + + # Optimize and compute base_r + results = pool.map(work, programs_to_optimize) + for (optimized_constants, base_r), p in zip(results, programs_to_optimize): + p.set_constants(optimized_constants) + p.base_r = base_r + + # Retrieve metrics + base_r = np.array([p.base_r for p in programs]) + r = np.array([p.r for p in programs]) + l = np.array([len(p.traversal) for p in programs]) + s = [p.str for p in programs] # Str representations of Programs + invalid = np.array([p.invalid for p in programs], dtype=bool) + all_r[step] = base_r + + if eval_all: + success = [p.evaluate.get("success") for p in programs] + # Check for success before risk-seeking, but don't break until after + if any(success): + p_final = programs[success.index(True)] + + # Update reward history + if base_r_history is not None: + for p in programs: + key = p.str + if key in base_r_history: + base_r_history[key].append(p.base_r) + else: + base_r_history[key] = [p.base_r] + + # Collect full-batch statistics + base_r_max = np.max(base_r) + base_r_best = max(base_r_max, base_r_best) + base_r_avg_full = np.mean(base_r) + r_max = np.max(r) + r_best = max(r_max, r_best) + r_avg_full = np.mean(r) + l_avg_full = np.mean(l) + a_ent_full = np.mean(np.apply_along_axis(empirical_entropy, 0, actions)) + n_unique_full = len(set(s)) + n_novel_full = len(set(s).difference(s_history)) + invalid_avg_full = np.mean(invalid) + + # Risk-seeking policy gradient: train on top epsilon fraction of samples + if epsilon is not None and epsilon < 1.0: + quantile = np.quantile(r, 1 - epsilon, interpolation="higher") + keep = base_r >= quantile + base_r = base_r[keep] + r_train = r = r[keep] + programs = list(compress(programs, keep)) + l = l[keep] + s = list(compress(s, keep)) + invalid = invalid[keep] + actions = actions[keep, :] + obs = [o[keep, :] for o in obs] + priors = priors[keep, :, :] + + # Clip bounds of rewards to prevent NaNs in gradient descent + r = np.clip(r, -1e6, 1e6) + + # Compute baseline + if baseline == "ewma_R": + ewma = np.mean(r) if ewma is None else alpha*np.mean(r) + (1 - alpha)*ewma + b_train = ewma + elif baseline == "R_e": # Default + ewma = -1 + b_train = quantile + + # Collect sub-batch statistics and write output + if output_file is not None: + base_r_avg_sub = np.mean(base_r) + r_avg_sub = np.mean(r) + l_avg_sub = np.mean(l) + a_ent_sub = np.mean(np.apply_along_axis(empirical_entropy, 0, actions)) + n_unique_sub = len(set(s)) + n_novel_sub = len(set(s).difference(s_history)) + invalid_avg_sub = np.mean(invalid) + stats = np.array([[ + base_r_best, + base_r_max, + base_r_avg_full, + base_r_avg_sub, + r_best, + r_max, + r_avg_full, + r_avg_sub, + l_avg_full, + l_avg_sub, + ewma, + n_unique_full, + n_unique_sub, + n_novel_full, + n_novel_sub, + a_ent_full, + a_ent_sub, + invalid_avg_full, + invalid_avg_sub + ]], dtype=np.float32) + with open(os.path.join(logdir, output_file), 'ab') as f: + np.savetxt(f, stats, delimiter=',') + + # Compute sequence lengths + lengths = np.array([min(len(p.traversal), controller.max_length) + for p in programs], dtype=np.int32) + + # Create the Batch + sampled_batch = Batch(actions=actions, obs=obs, priors=priors, + lengths=lengths, rewards=r) + + # Update and sample from the priority queue + if priority_queue is not None: + priority_queue.push_best(sampled_batch, programs) + pqt_batch = priority_queue.sample_batch(controller.pqt_batch_size) + else: + pqt_batch = None + + # Train the controller + summaries = controller.train_step(b_train, sampled_batch, pqt_batch) + if summary: + writer.add_summary(summaries, step) + writer.flush() + + # Update new best expression + new_r_best = False + new_base_r_best = False + + if prev_r_best is None or r_max > prev_r_best: + new_r_best = True + p_r_best = programs[np.argmax(r)] + + if prev_base_r_best is None or base_r_max > prev_base_r_best: + new_base_r_best = True + p_base_r_best = programs[np.argmax(base_r)] + + prev_r_best = r_best + prev_base_r_best = base_r_best + + # Print new best expression + if verbose: + if new_r_best and new_base_r_best: + if p_r_best == p_base_r_best: + print("\nNew best overall") + p_r_best.print_stats() + else: + print("\nNew best reward") + p_r_best.print_stats() + print("...and new best base reward") + p_base_r_best.print_stats() + + elif new_r_best: + print("\nNew best reward") + p_r_best.print_stats() + + elif new_base_r_best: + print("\nNew best base reward") + p_base_r_best.print_stats() + + # Stop if early stopping criteria is met + if eval_all and any(success): + all_r = all_r[:(step + 1)] + print("Early stopping criteria met; breaking early.") + break + if early_stopping and p_base_r_best.evaluate.get("success"): + all_r = all_r[:(step + 1)] + print("Early stopping criteria met; breaking early.") + break + + if verbose and step > 0 and step % 10 == 0: + print("Completed {} steps".format(step)) + + if debug >= 2: + print("\nParameter means after step {} of {}:".format(step+1, n_epochs)) + print_var_means() + + if save_all_r: + with open(all_r_output_file, 'ab') as f: + np.save(f, all_r) + + # Save the hall of fame + if hof is not None and hof > 0: + programs = list(Program.cache.values()) # All unique Programs found during training + + base_r = [p.base_r for p in programs] + i_hof = np.argsort(base_r)[-hof:][::-1] # Indices of top hof Programs + hof = [programs[i] for i in i_hof] + + if verbose: + print("Evaluating the hall of fame...") + if pool is not None: + results = pool.map(hof_work, hof) + else: + results = list(map(hof_work, hof)) + + eval_keys = list(results[0][-1].keys()) + columns = ["r", "base_r", "count", "expression", "traversal"] + eval_keys + hof_results = [result[:-1] + [result[-1][k] for k in eval_keys] for result in results] + df = pd.DataFrame(hof_results, columns=columns) + if hof_output_file is not None: + print("Saving Hall of Fame to {}".format(hof_output_file)) + df.to_csv(hof_output_file, header=True, index=False) + + # Print error statistics of the cache + n_invalid = 0 + error_types = defaultdict(lambda : 0) + error_nodes = defaultdict(lambda : 0) + for p in Program.cache.values(): + if p.invalid: + n_invalid += p.count + error_types[p.error_type] += p.count + error_nodes[p.error_node] += p.count + if n_invalid > 0: + total_samples = (step + 1)*batch_size # May be less than n_samples if breaking early + print("Invalid expressions: {} of {} ({:.1%}).".format(n_invalid, total_samples, n_invalid/total_samples)) + print("Error type counts:") + for error_type, count in error_types.items(): + print(" {}: {} ({:.1%})".format(error_type, count, count/n_invalid)) + print("Error node counts:") + for error_node, count in error_nodes.items(): + print(" {}: {} ({:.1%})".format(error_node, count, count/n_invalid)) + + # Print the priority queue at the end of training + if verbose and priority_queue is not None: + for i, item in enumerate(priority_queue.iter_in_order()): + print("\nPriority queue entry {}:".format(i)) + p = Program.cache[item[0]] + p.print_stats() + + # Compute the pareto front + if pareto_front: + if verbose: + print("Evaluating the pareto front...") + all_programs = list(Program.cache.values()) + costs = np.array([(p.complexity_eureqa, -p.r) for p in all_programs]) + pareto_efficient_mask = is_pareto_efficient(costs) # List of bool + pf = list(compress(all_programs, pareto_efficient_mask)) + pf.sort(key=lambda p : p.complexity_eureqa) # Sort by complexity + + if pool is not None: + results = pool.map(pf_work, pf) + else: + results = list(map(pf_work, pf)) + + eval_keys = list(results[0][-1].keys()) + columns = ["complexity", "r", "base_r", "count", "expression", "traversal"] + eval_keys + pf_results = [result[:-1] + [result[-1][k] for k in eval_keys] for result in results] + df = pd.DataFrame(pf_results, columns=columns) + if pf_output_file is not None: + print("Saving Pareto Front to {}".format(pf_output_file)) + df.to_csv(pf_output_file, header=True, index=False) + + # Look for a success=True case within the Pareto front + for p in pf: + if p.evaluate.get("success"): + p_final = p + break + + # Close the pool + if pool is not None: + pool.close() + + # Return statistics of best Program + p = p_final if p_final is not None else p_base_r_best + result = { + "r" : p.r, + "base_r" : p.base_r, + } + result.update(p.evaluate) + result.update({ + "expression" : repr(p.sympy_expr), + "traversal" : repr(p), + "program" : p + }) + + return result diff --git a/dsr/dsr/utils.py b/dsr/dsr/utils.py new file mode 100644 index 00000000..1c8113b4 --- /dev/null +++ b/dsr/dsr/utils.py @@ -0,0 +1,154 @@ +"""Utility functions used in deep symbolic regression.""" + +import os +import functools +import numpy as np + + +def is_float(s): + """Determine whether str can be cast to float.""" + + try: + float(s) + return True + except ValueError: + return False + + +# Adapted from: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python +def is_pareto_efficient(costs): + """ + Find the pareto-efficient points given an array of costs. + + Parameters + ---------- + + costs : np.ndarray + Array of shape (n_points, n_costs). + + Returns + ------- + + is_efficient_maek : np.ndarray (dtype:bool) + Array of which elements in costs are pareto-efficient. + """ + + is_efficient = np.arange(costs.shape[0]) + n_points = costs.shape[0] + next_point_index = 0 # Next index in the is_efficient array to search for + while next_point_index < len(costs): + nondominated_point_mask = np.any(costs < costs[next_point_index], axis=1) + nondominated_point_mask[next_point_index] = True + is_efficient = is_efficient[nondominated_point_mask] # Remove dominated points + costs = costs[nondominated_point_mask] + next_point_index = np.sum(nondominated_point_mask[:next_point_index]) + 1 + is_efficient_mask = np.zeros(n_points, dtype=bool) + is_efficient_mask[is_efficient] = True + return is_efficient_mask + + +def setup_output_files(logdir, output_file): + """ + Writes the main output file header and returns the reward, hall of fame, and Pareto front config filenames. + + Parameters: + ----------- + + logdir : string + Directory to log to. + + output_file : string + Name of output file. + + Returns: + -------- + + all_r_output_file : string + all_r output filename + + hof_output_file : string + hof output filename + + pf_output_file : string + pf output filename + """ + os.makedirs(logdir, exist_ok=True) + output_file = os.path.join(logdir, output_file) + prefix, _ = os.path.splitext(output_file) + all_r_output_file = "{}_all_r.npy".format(prefix) + hof_output_file = "{}_hof.csv".format(prefix) + pf_output_file = "{}_pf.csv".format(prefix) + with open(output_file, 'w') as f: + # r_best : Maximum across all iterations so far + # r_max : Maximum across this iteration's batch + # r_avg_full : Average across this iteration's full batch (before taking epsilon subset) + # r_avg_sub : Average across this iteration's epsilon-subset batch + # n_unique_* : Number of unique Programs in batch + # n_novel_* : Number of never-before-seen Programs per batch + # a_ent_* : Empirical positional entropy across sequences averaged over positions + # invalid_avg_* : Fraction of invalid Programs per batch + headers = ["base_r_best", + "base_r_max", + "base_r_avg_full", + "base_r_avg_sub", + "r_best", + "r_max", + "r_avg_full", + "r_avg_sub", + "l_avg_full", + "l_avg_sub", + "ewma", + "n_unique_full", + "n_unique_sub", + "n_novel_full", + "n_novel_sub", + "a_ent_full", + "a_ent_sub", + "invalid_avg_full", + "invalid_avg_sub"] + f.write("{}\n".format(",".join(headers))) + + return all_r_output_file, hof_output_file, pf_output_file + + +class cached_property(object): + """ + Decorator used for lazy evaluation of an object attribute. The property + should be non-mutable, since it replaces itself. + """ + + def __init__(self, getter): + self.getter = getter + + functools.update_wrapper(self, getter) + + def __get__(self, obj, cls): + if obj is None: + return self + + value = self.getter(obj) + setattr(obj, self.getter.__name__, value) + return value + + +# Entropy computation in batch +def empirical_entropy(labels): + + n_labels = len(labels) + + if n_labels <= 1: + return 0 + + value, counts = np.unique(labels, return_counts=True) + probs = counts / n_labels + n_classes = np.count_nonzero(probs) + + if n_classes <= 1: + return 0 + + ent = 0. + # Compute entropy + for i in probs: + ent -= i * np.log(i) + + return ent diff --git a/dsr/setup.py b/dsr/setup.py new file mode 100644 index 00000000..e192d213 --- /dev/null +++ b/dsr/setup.py @@ -0,0 +1,16 @@ +from distutils.core import setup +from Cython.Build import cythonize +import numpy +import os + +# To build cython code using setup try: +# python setup.py build_ext --inplace + +setup( name='dsr', + version='1.0dev', + description='Deep symbolic regression.', + author='LLNL', + packages=['dsr'], + ext_modules=cythonize([os.path.join('dsr','cyfunc.pyx')]), + include_dirs=[numpy.get_include()] + ) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..c00628f1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +pytest +cython +numpy +tensorflow==1.14 +numba +sympy +pandas +scikit-learn +click +mpi4py +dataclasses +