diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..09ce2199
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+*.DS_Store
+*.pyc
+*.egg*
+venv*
+dsr/dsr/summary*
+*log_*
+.gitignore
+.ipynb_checkpoints
+~$*
+*.vscode/
+dsr/build
+dsr/dsr/cyfunc*
+**/log/
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..92216caa
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,30 @@
+BSD 3-Clause License
+
+Copyright (c) 2018, Lawrence Livermore National Security, LLC
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 00000000..3737d5a8
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1,21 @@
+This work was produced under the auspices of the U.S. Department of
+Energy by Lawrence Livermore National Laboratory under Contract
+DE-AC52-07NA27344.
+
+This work was prepared as an account of work sponsored by an agency of
+the United States Government. Neither the United States Government nor
+Lawrence Livermore National Security, LLC, nor any of their employees
+makes any warranty, expressed or implied, or assumes any legal liability
+or responsibility for the accuracy, completeness, or usefulness of any
+information, apparatus, product, or process disclosed, or represents that
+its use would not infringe privately owned rights.
+
+Reference herein to any specific commercial product, process, or service
+by trade name, trademark, manufacturer, or otherwise does not necessarily
+constitute or imply its endorsement, recommendation, or favoring by the
+United States Government or Lawrence Livermore National Security, LLC.
+
+The views and opinions of authors expressed herein do not necessarily
+state or reflect those of the United States Government or Lawrence
+Livermore National Security, LLC, and shall not be used for advertising
+or product endorsement purposes.
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..61248c9b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,134 @@
+# Deep symbolic regression
+
+Deep symbolic regression (DSR) is a deep learning algorithm for symbolic regression--the task of recovering tractable mathematical expressions from an input dataset. The package `dsr` contains the code for DSR, including a single-point, parallelized launch script (`dsr/run.py`), baseline genetic programming-based symbolic regression algorithm, and an sklearn-like interface for use with your own data.
+
+This code supports the ICLR 2021 paper [Deep symbolic regression: Recovering mathematical expressions from data via risk-seeking policy gradients](https://openreview.net/forum?id=m5Qsh0kBQG).
+
+# Installation
+
+Installation is straightforward in a Python 3 virtual environment using Pip. From the repository root:
+
+```
+python3 -m venv venv3 # Create a Python 3 virtual environment
+source venv3/bin/activate # Activate the virtual environmnet
+pip install -r requirements.txt # Install Python dependencies
+export CFLAGS="-I $(python -c "import numpy; print(numpy.get_include())") $CFLAGS" # Needed on Mac to prevent fatal error: 'numpy/arrayobject.h' file not found
+pip install -e ./dsr # Install DSR package
+```
+
+To perform experiments involving the GP baseline, you will need the additional package `deap`.
+
+# Example usage
+
+To try out DSR, use the following command from the repository root:
+
+```
+python -m dsr.run ./dsr/dsr/config.json --b=Nguyen-6
+```
+
+This should solve in around 50 training steps (~30 seconds on a laptop).
+
+# Getting started
+
+## Configuring runs
+
+DSR uses JSON files to configure training.
+
+Top-level key "task" specifies details of the benchmark expression for DSR or GP. See docs in `regression.py` for details.
+
+Top-level key "training" specifies the training hyperparameters for DSR. See docs in `train.py` for details.
+
+Top-level key "controller" specifies the RNN controller hyperparameters for DSR. See docs for in `controller.py` for details.
+
+Top-level key "gp" specifies the hyperparameters for GP if using the GP baseline. See docs for `dsr.baselines.gspr.GP` for details.
+
+## Launching runs
+
+After configuring a run, launching it is simple:
+
+```
+python -m dsr.run [PATH_TO_CONFIG] [--OPTIONS]
+```
+
+## Sklearn interface
+
+DSR also provides an [sklearn-like regressor interface](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html). Example usage:
+
+```
+from dsr import DeepSymbolicRegressor
+import numpy as np
+
+# Generate some data
+np.random.seed(0)
+X = np.random.random((10, 2))
+y = np.sin(X[:,0]) + X[:,1] ** 2
+
+# Create the model
+model = DeepSymbolicRegressor("config.json")
+
+# Fit the model
+model.fit(X, y) # Should solve in ~10 seconds
+
+# View the best expression
+print(model.program_.pretty())
+
+# Make predictions
+model.predict(2 * X)
+```
+
+## Using an external dataset
+
+To use your own dataset, simply provide the path to the `"dataset"` key in the config, and give your task an arbitary name.
+
+```
+"task": {
+    "task_type": "regression",
+    "name": "my_task",
+    "dataset": "./path/to/my_dataset.csv",
+    ...
+}
+```
+
+Then run DSR:
+
+```
+python -m dsr.run path/to/config.json
+```
+
+Note the `--b` flag matches the name of the CSV file (-`.csv` ).
+
+## Command-line examples
+
+Show command-line help and quit
+
+```
+python -m dsr.run --help
+```
+
+Train 2 indepdent runs of DSR on the Nguyen-1 benchmark using 2 cores
+
+```
+python -m dsr.run config.json --b=Nguyen-1 --mc=2 --num_cores=2
+```
+
+Train DSR on all 12 Nguyen benchmarks using 12 cores
+
+```
+python -m dsr.run config.json --b=Nguyen --num_cores=12
+```
+
+Train 2 independent runs of GP on Nguyen-1
+
+```
+python -m dsr.run config.json --method=gp --b=Nguyen-1 --mc=2 --num_cores=2
+```
+
+Train DSR on Nguyen-1 and Nguyen-4
+
+```
+python -m dsr.run config.json --b=Nguyen-1 --b=Nguyen-4
+```
+
+# Release
+
+LLNL-CODE-647188
diff --git a/dsr/dsr/__init__.py b/dsr/dsr/__init__.py
new file mode 100644
index 00000000..b18aa77a
--- /dev/null
+++ b/dsr/dsr/__init__.py
@@ -0,0 +1,3 @@
+from dsr.core import DeepSymbolicOptimizer
+from dsr.task.regression.sklearn import DeepSymbolicRegressor
+
diff --git a/dsr/dsr/baselines/__init__.py b/dsr/dsr/baselines/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/dsr/dsr/baselines/constraints.py b/dsr/dsr/baselines/constraints.py
new file mode 100644
index 00000000..45c4526e
--- /dev/null
+++ b/dsr/dsr/baselines/constraints.py
@@ -0,0 +1,128 @@
+"""Defines constraints for GP individuals, to be used as decorators for
+evolutionary operations."""
+
+from dsr.functions import UNARY_TOKENS, BINARY_TOKENS
+
+TRIG_TOKENS = ["sin", "cos", "tan", "csc", "sec", "cot"]
+
+# Define inverse tokens
+INVERSE_TOKENS = {
+    "exp" : "log",
+    "neg" : "neg",
+    "inv" : "inv",
+    "sqrt" : "n2"
+}
+
+# Add inverse trig functions
+INVERSE_TOKENS.update({
+    t : "arc" + t for t in TRIG_TOKENS
+    })
+
+# Add reverse
+INVERSE_TOKENS.update({
+    v : k for k, v in INVERSE_TOKENS.items()
+    })
+
+DEBUG = False
+
+
+def check_inv(ind):
+    """Returns True if two sequential tokens are inverse unary operators."""
+
+    names = [node.name for node in ind]
+    for i, name in enumerate(names[:-1]):
+        if name in INVERSE_TOKENS and names[i+1] == INVERSE_TOKENS[name]:
+            if DEBUG:
+                print("Constrained inverse:", ind)
+            return True
+    return False
+
+
+def check_const(ind):
+    """Returns True if children of a parent are all const tokens."""
+
+    names = [node.name for node in ind]
+    for i, name in enumerate(names):
+        if name in UNARY_TOKENS and names[i+1] == "const":
+            if DEBUG:
+                print("Constrained const (unary)", ind)
+            return True
+        if name in BINARY_TOKENS and names[i+1] == "const" and names[i+1] == "const":
+            if DEBUG:
+                print(print("Constrained const (binary)", ind))
+            return True
+    return False
+
+
+def check_trig(ind):
+    """Returns True if a descendant of a trig operator is another trig
+    operator."""
+    
+    names = [node.name for node in ind]
+    trig_descendant = False # True when current node is a descendant of a trig operator
+    trig_dangling = None # Number of unselected nodes in trig subtree
+    for i, name in enumerate(names):
+        if name in TRIG_TOKENS:
+            if trig_descendant:
+                if DEBUG:
+                    print("Constrained trig:", ind)
+                return True
+            trig_descendant = True
+            trig_dangling = 1
+        elif trig_descendant:
+            if name in BINARY_TOKENS:
+                trig_dangling += 1
+            elif name not in UNARY_TOKENS:
+                trig_dangling -= 1
+            if trig_dangling == 0:
+                trig_descendant = False
+    return False
+
+
+def make_check_min_len(min_length):
+    """Creates closure for minimum length constraint"""
+
+    def check_min_len(ind):
+        """Returns True if individual is less than minimum length"""
+
+        if len(ind) < min_length:
+            if DEBUG:
+                print("Constrained min len: {} (length {})".format(ind, len(ind)))
+            return True
+
+        return False
+
+    return check_min_len
+
+
+def make_check_max_len(max_length):
+    """Creates closure for maximum length constraint"""
+
+    def check_max_len(ind):
+        """Returns True if individual is greater than maximum length"""
+
+        if len(ind) > max_length:
+            if DEBUG:
+                print("Constrained max len: {} (length {})".format(ind, len(ind)))
+            return True
+
+        return False
+
+    return check_max_len
+
+
+def make_check_num_const(max_const):
+    """Creates closure for maximum number of constants constraint"""
+
+    def check_num_const(ind):
+        """Returns True if individual has more than max_const const tokens"""
+
+        num_const = len([t for t in ind if t.name == "const"])
+        if num_const > max_const:
+            if DEBUG:
+                print("Constrained max const: {} ({} consts)".format(ind, num_const))
+            return True
+
+        return False
+
+    return check_num_const
diff --git a/dsr/dsr/baselines/gpsr.py b/dsr/dsr/baselines/gpsr.py
new file mode 100644
index 00000000..f3e7c186
--- /dev/null
+++ b/dsr/dsr/baselines/gpsr.py
@@ -0,0 +1,297 @@
+import random
+import operator
+import importlib
+from functools import partial
+
+import numpy as np
+
+from dsr.functions import function_map
+from dsr.const import make_const_optimizer
+
+from . import constraints
+
+
+GP_MOD = "deap"
+OBJECTS = ["base", "gp", "creator", "tools", "algorithms"]
+gp = importlib.import_module(GP_MOD + ".gp")
+base = importlib.import_module(GP_MOD + ".base")
+creator = importlib.import_module(GP_MOD + ".creator")
+tools = importlib.import_module(GP_MOD + ".tools")
+algorithms = importlib.import_module(GP_MOD + ".algorithms")
+
+
+class GP():
+    """Genetic-programming based symbolic regression class"""
+
+    def __init__(self, dataset, metric="nmse", population_size=1000,
+                 generations=1000, n_samples=None, tournament_size=3,
+                 p_crossover=0.5, p_mutate=0.1,
+                 const_range=[-1, 1], const_optimizer="scipy",
+                 const_params=None, seed=0, early_stopping=False,
+                 threshold=1e-12, verbose=True, protected=True,
+                 pareto_front=False,
+                 # Constraint hyperparameters
+                 constrain_const=True,
+                 constrain_trig=True,
+                 constrain_inv=True,
+                 constrain_min_len=True,
+                 constrain_max_len=True,
+                 constrain_num_const=True,
+                 min_length=4,
+                 max_length=30,
+                 max_const=3):
+
+        self.dataset = dataset
+        self.fitted = False
+
+        assert n_samples is None or generations is None, "At least one of 'n_samples' or 'generations' must be None."
+        if generations is None:
+            generations = int(n_samples / population_size)
+
+        # Set hyperparameters
+        self.population_size = population_size
+        self.generations = generations
+        self.tournament_size = tournament_size
+        self.p_mutate = p_mutate
+        self.p_crossover = p_crossover
+        self.seed = seed
+        self.early_stopping = early_stopping
+        self.threshold = threshold
+        self.verbose = verbose
+        self.pareto_front = pareto_front
+
+        # Fitness function used during training
+        # Includes closure for fitness function metric and training data
+        fitness = partial(self.make_fitness(metric), y=dataset.y_train, var_y=np.var(dataset.y_train)) # Function of y_hat
+        self.fitness = partial(self.compute_fitness, optimize=True, fitness=fitness, X=dataset.X_train.T) # Function of individual
+
+        # Test NMSE, used as final performance metric
+        # Includes closure for test data
+        nmse_test = partial(self.make_fitness("nmse"), y=dataset.y_test, var_y=np.var(dataset.y_test)) # Function of y_hat
+        self.nmse_test = partial(self.compute_fitness, optimize=False, fitness=nmse_test, X=dataset.X_test.T) # Function of individual
+
+        # Noiseless test NMSE, only used to determine success for final performance
+        # Includes closure for noiseless test data
+        nmse_test_noiseless = partial(self.make_fitness("nmse"), y=dataset.y_test_noiseless, var_y=np.var(dataset.y_test_noiseless)) # Function of y_hat
+        self.nmse_test_noiseless = partial(self.compute_fitness, optimize=False, fitness=nmse_test_noiseless, X=dataset.X_test.T) # Function of individual
+        self.success = lambda ind : self.nmse_test_noiseless(ind)[0] < self.threshold # Function of individual
+
+        # Create the primitive set
+        pset = gp.PrimitiveSet("MAIN", dataset.X_train.shape[1])
+
+        # Add input variables
+        rename_kwargs = {"ARG{}".format(i) : "x{}".format(i + 1) for i in range(dataset.n_input_var)}
+        pset.renameArguments(**rename_kwargs)
+
+        # Add primitives
+        for op_name in dataset.function_set:
+            if op_name == "const":
+                continue
+            assert op_name in function_map, "Operation {} not recognized.".format(op_name)
+
+            # Prepend available protected operators with "protected_"
+            if protected and not op_name.startswith("protected_"):
+                protected_op_name = "protected_{}".format(op_name)
+                if protected_op_name in function_map:
+                    op_name = protected_op_name
+
+            op = function_map[op_name]
+            pset.addPrimitive(op.function, op.arity, name=op.name)
+
+        # # Add constant
+        # if "const" in dataset.function_set:
+        #     pset.addEphemeralConstant("const", lambda : random.uniform(const_range[0], const_range[1]))
+
+        # Add constant
+        const = "const" in dataset.function_set
+        if const:
+            const_params = const_params if const_params is not None else {}
+            self.const_opt = make_const_optimizer(const_optimizer, **const_params)
+            pset.addTerminal(1.0, name="const")
+
+        # Create custom fitness and individual classes
+        if self.pareto_front:
+            # Fitness it compared lexographically, so second dimension
+            # (complexity) is only used in selection if first dimension (error)
+            # is the same.
+            creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0))
+        else:
+            creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
+        creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)
+
+        # Define the evolutionary operators
+        self.toolbox = base.Toolbox()
+        self.toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
+        self.toolbox.register("individual", tools.initIterate, creator.Individual, self.toolbox.expr)
+        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
+        self.toolbox.register("compile", gp.compile, pset=pset)
+        self.toolbox.register("evaluate", self.fitness)
+        self.toolbox.register("select", tools.selTournament, tournsize=tournament_size)
+        self.toolbox.register("mate", gp.cxOnePoint)
+        self.toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
+        self.toolbox.register('mutate', gp.mutUniform, expr=self.toolbox.expr_mut, pset=pset)
+
+        # Define constraints, each defined by a func : gp.Individual -> bool.
+        # We decorate mutation/crossover operators with constrain, which
+        # replaces a child with a random parent if func(ind) is True.
+        constrain = partial(gp.staticLimit, max_value=0) # Constraint decorator
+        funcs = []
+        if constrain_min_len:
+            funcs.append(constraints.make_check_min_len(min_length)) # Minimum length
+        if constrain_max_len:
+            funcs.append(constraints.make_check_max_len(max_length)) # Maximum length
+        if constrain_inv:
+            funcs.append(constraints.check_inv) # Subsequence inverse unary operators
+        if constrain_trig:
+            funcs.append(constraints.check_trig) # Nested trig operators
+        if constrain_const and const:
+            funcs.append(constraints.check_const) # All children are constants
+        if constrain_num_const and const:
+            funcs.append(constraints.make_check_num_const(max_const)) # Number of constants
+        for func in funcs:
+            for variation in ["mate", "mutate"]:
+                self.toolbox.decorate(variation, constrain(func))
+
+        # Create the training function
+        self.algorithm = algorithms.eaSimple
+    
+
+    def compute_fitness(self, individual, fitness, X, optimize=False):
+        """Compute the given fitness function on an individual using X."""
+
+        if optimize:
+            # Retrieve symbolic constants
+            const_idxs = [i for i, node in enumerate(individual) if node.name == "const"]
+
+            # Check if best individual (or any individual in Pareto front) has success=True
+            # (i.e. NMSE below threshold on noiseless test set)
+            if self.early_stopping and any([self.success(ind) for ind in self.hof]):
+                return (999,)
+
+        if optimize and len(const_idxs) > 0:
+
+            # Objective function for evaluating constants
+            def obj(consts):                
+                for i, const in zip(const_idxs, consts):
+                    individual[i] = gp.Terminal(const, False, object)
+                    individual[i].name = "const" # For good measure
+                f = self.toolbox.compile(expr=individual)
+                y_hat = f(*X)
+                y = self.dataset.y_train
+                if np.isfinite(y_hat).all():
+                    # Squash error to prevent consts from becoming inf
+                    return -1/(1 + np.mean((y - y_hat)**2))
+                else:
+                    return 0
+
+            # Do the optimization and set the optimized constants
+            x0 = np.ones(len(const_idxs))
+            optimized_consts = self.const_opt(obj, x0)
+            for i, const in zip(const_idxs, optimized_consts):
+                individual[i] = gp.Terminal(const, False, object)
+                individual[i].name = "const" # This is necessary to ensure the constant is re-optimized in the next generation
+
+        # Execute the individual
+        f = self.toolbox.compile(expr=individual)
+        with np.errstate(all="ignore"):
+            y_hat = f(*X)
+
+        # Check for validity
+        if np.isfinite(y_hat).all():
+            fitness = (fitness(y_hat=y_hat),)
+        else:
+            fitness = (np.inf,)
+
+        # Compute complexity (only if using Pareto front)
+        if self.pareto_front:
+            complexity = sum([function_map[prim.name].complexity \
+                                if prim.name in function_map \
+                                else 1 for prim in individual])                    
+            fitness += (complexity,)
+
+        return fitness
+
+
+    def train(self):
+        """Train the GP"""
+
+        if self.fitted:
+            raise RuntimeError("This GP has already been fitted!")
+
+        random.seed(self.seed)
+
+        pop = self.toolbox.population(n=self.population_size)
+        if self.pareto_front:
+            self.hof = tools.ParetoFront()
+        else:
+            self.hof = tools.HallOfFame(maxsize=1)
+
+        stats_fit = tools.Statistics(lambda p : p.fitness.values[0])
+        stats_fit.register("avg", np.mean)
+        stats_fit.register("min", np.min)
+        stats_size = tools.Statistics(len)
+        stats_size.register("avg", np.mean)
+        mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
+        
+        pop, logbook = self.algorithm(population=pop,
+                                      toolbox=self.toolbox,
+                                      cxpb=self.p_crossover,
+                                      mutpb=self.p_mutate,
+                                      ngen=self.generations,
+                                      stats=mstats,
+                                      halloffame=self.hof,
+                                      verbose=self.verbose)
+
+        self.fitted = True
+
+        # Delete custom classes
+        del creator.FitnessMin
+        del creator.Individual
+        if "const" in dir(gp):
+            del gp.const
+
+        # The best individual is the first one in self.hof with success=True,
+        # otherwise the highest reward. This mimics DSR's train.py.
+        ind_best = None
+        for ind in self.hof:
+            if self.success(ind):
+                ind_best = ind
+                break
+        ind_best = ind_best if ind_best is not None else self.hof[0] # first element in self.hof is the fittest
+
+        if self.verbose:
+            print("Printing {}:".format("Pareto front" if self.pareto_front else "hall of fame"))
+            print("Fitness  |  Individual")
+            for ind in self.hof:
+                print(ind.fitness, [token.name for token in ind])
+
+        return ind_best, logbook
+
+
+    def make_fitness(self, metric):
+        """Generates a fitness function by name"""
+
+        if metric == "mse":
+            fitness = lambda y, y_hat, var_y : np.mean((y - y_hat)**2)
+
+        elif metric == "rmse":
+            fitness = lambda y, y_hat, var_y : np.sqrt(np.mean((y - y_hat)**2))
+
+        elif metric == "nmse":
+            fitness = lambda y, y_hat, var_y : np.mean((y - y_hat)**2 / var_y)
+
+        elif metric == "nrmse":
+            fitness = lambda y, y_hat, var_y : np.sqrt(np.mean((y - y_hat)**2 / var_y))
+
+        # Complementary inverse NMSE
+        elif metric == "cinv_nmse":
+            fitness = lambda y, y_hat, var_y : 1 - 1/(1 + np.mean((y - y_hat)**2 / var_y))
+
+        # Complementary inverse NRMSE
+        elif metric == "cinv_nrmse":
+            fitness = lambda y, y_hat, var_y : 1 - 1/(1 + np.sqrt(np.mean((y - y_hat)**2 / var_y)))
+
+        else:
+            raise ValueError("Metric not recognized.")
+
+        return fitness
diff --git a/dsr/dsr/config.json b/dsr/dsr/config.json
new file mode 100644
index 00000000..831c8707
--- /dev/null
+++ b/dsr/dsr/config.json
@@ -0,0 +1,99 @@
+{
+   "task": {
+      "task_type" : "regression",
+      "name" : "Nguyen-1",
+      "function_set": null,
+      "dataset" : {
+         "name" : null,
+         "noise": null,
+         "dataset_size_multiplier": 1.0
+      },
+      "metric" : "inv_nrmse",
+      "metric_params" : [1.0],
+      "threshold" : 1e-12,
+      "protected" : false,
+      "reward_noise" : 0.0
+   },
+   "prior": {
+      "length" : {"min_" : 4, "max_" : 30},
+      "repeat" : {"tokens" : "const", "max_" : 3},
+      "inverse" : {},
+      "trig" : {},
+      "const" : {}
+   },
+   "training": {
+      "logdir": "./log",
+        "n_epochs": null,
+        "n_samples": 2000000,
+        "batch_size": 1000,
+        "complexity": "length",
+        "complexity_weight": 0.0,
+        "const_optimizer": "scipy",
+        "const_params": {},
+        "alpha": 0.5,
+        "epsilon": 0.05,
+        "verbose": true,
+        "baseline": "R_e",
+        "b_jumpstart": false,
+        "n_cores_batch": 1,
+        "summary": false,
+        "debug": 0,
+        "output_file": null,
+        "save_all_r": false,
+        "early_stopping": true,
+        "pareto_front": false,
+        "hof": 100
+   },
+   "controller": {
+      "cell": "lstm",
+      "num_layers": 1,
+      "num_units": 32,
+      "initializer": "zeros",
+      "embedding": false,
+      "embedding_size": 8,
+      "optimizer": "adam",
+      "learning_rate": 0.0005,
+      "observe_action": false,
+      "observe_parent": true,
+      "observe_sibling": true,
+      "entropy_weight": 0.005,
+      "ppo": false,
+      "ppo_clip_ratio": 0.2,
+      "ppo_n_iters": 10,
+      "ppo_n_mb": 4,
+      "pqt": false,
+      "pqt_k": 10,
+      "pqt_batch_size": 1,
+      "pqt_weight": 200.0,
+      "pqt_use_pg": false,
+      "max_length": 30
+   },
+   "gp": {
+      "population_size": 1000,
+      "generations": null,
+      "n_samples" : 2000000,
+      "tournament_size": 2,
+      "metric": "nmse",
+      "const_range": [
+         -1.0,
+         1.0
+      ],
+      "p_crossover": 0.95,
+      "p_mutate": 0.03,
+      "seed": 0,
+      "early_stopping": true,
+      "pareto_front": false,
+      "threshold": 1e-12,
+      "verbose": false,
+      "protected": true,
+      "constrain_const": true,
+      "constrain_trig": true,
+      "constrain_inv": true,
+      "constrain_min_len": true,
+      "constrain_max_len": true,
+      "constrain_num_const": true,
+      "min_length": 4,
+      "max_length": 30,
+      "max_const" : 3
+   }
+}
diff --git a/dsr/dsr/const.py b/dsr/dsr/const.py
new file mode 100644
index 00000000..dd41cbf9
--- /dev/null
+++ b/dsr/dsr/const.py
@@ -0,0 +1,74 @@
+"""Constant optimizer used for deep symbolic regression."""
+
+from functools import partial
+
+import numpy as np
+from scipy.optimize import minimize
+
+
+def make_const_optimizer(name, **kwargs):
+    """Returns a ConstOptimizer given a name and keyword arguments"""
+
+    const_optimizers = {
+        None : Dummy,
+        "dummy" : Dummy,
+        "scipy" : ScipyMinimize,
+    }
+
+    return const_optimizers[name](**kwargs)
+
+
+class ConstOptimizer(object):
+    """Base class for constant optimizer"""
+    
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+
+
+    def __call__(self, f, x0):
+        """
+        Optimizes an objective function from an initial guess.
+
+        The objective function is the negative of the base reward (reward
+        without penalty) used for training. Optimization excludes any penalties
+        because they are constant w.r.t. to the constants being optimized.
+
+        Parameters
+        ----------
+        f : function mapping np.ndarray to float
+            Objective function (negative base reward).
+
+        x0 : np.ndarray
+            Initial guess for constant placeholders.
+
+        Returns
+        -------
+        x : np.ndarray
+            Vector of optimized constants.
+        """
+        raise NotImplementedError
+
+
+class Dummy(ConstOptimizer):
+    """Dummy class that selects the initial guess for each constant"""
+
+    def __init__(self, **kwargs):
+        super(Dummy, self).__init__(**kwargs)
+
+    
+    def __call__(self, f, x0):
+        return x0
+        
+
+class ScipyMinimize(ConstOptimizer):
+    """SciPy's non-linear optimizer"""
+
+    def __init__(self, **kwargs):
+        super(ScipyMinimize, self).__init__(**kwargs)
+
+    
+    def __call__(self, f, x0):
+        with np.errstate(divide='ignore'):
+            opt_result = partial(minimize, **self.kwargs)(f, x0)
+        x = opt_result['x']
+        return x
diff --git a/dsr/dsr/controller.py b/dsr/dsr/controller.py
new file mode 100644
index 00000000..1872c7f0
--- /dev/null
+++ b/dsr/dsr/controller.py
@@ -0,0 +1,666 @@
+"""Controller used to generate distribution over hierarchical, variable-length objects."""
+
+import tensorflow as tf
+import numpy as np
+
+from dsr.program import Program
+from dsr.memory import Batch
+from dsr.subroutines import parents_siblings
+from dsr.prior import LengthConstraint
+
+
+class LinearWrapper(tf.contrib.rnn.LayerRNNCell):
+    """
+    RNNCell wrapper that adds a linear layer to the output.
+
+    See: https://github.com/tensorflow/models/blob/master/research/brain_coder/single_task/pg_agent.py
+    """
+
+    def __init__(self, cell, output_size):
+        self.cell = cell
+        self._output_size = output_size
+
+    def __call__(self, inputs, state, scope=None):
+        with tf.variable_scope(type(self).__name__):
+            outputs, state = self.cell(inputs, state, scope=scope)
+            logits = tf.layers.dense(outputs, units=self._output_size)
+
+        return logits, state
+
+    @property
+    def output_size(self):
+        return self._output_size
+
+    @property
+    def state_size(self):
+        return self.cell.state_size
+
+    def zero_state(self, batch_size, dtype):
+        return self.cell.zero_state(batch_size, dtype)
+
+
+class Controller(object):
+    """
+    Recurrent neural network (RNN) controller used to generate expressions.
+
+    Specifically, the RNN outputs a distribution over pre-order traversals of
+    symbolic expression trees. It is trained using REINFORCE with baseline.
+
+    Parameters
+    ----------
+    sess : tf.Session
+        TenorFlow Session object.
+
+    prior : dsr.prior.JointPrior
+        JointPrior object used to adjust probabilities during sampling.
+
+    summary : bool
+        Write tensorboard summaries?
+
+    debug : int
+        Debug level, also used in learn(). 0: No debug. 1: Print shapes and
+        number of parameters for each variable.
+
+    cell : str
+        Recurrent cell to use. Supports 'lstm' and 'gru'.
+
+    num_layers : int
+        Number of RNN layers.
+
+    num_units : int or list of ints
+        Number of RNN cell units in each of the RNN's layers. If int, the value
+        is repeated for each layer.
+
+    initiailizer : str
+        Initializer for the recurrent cell. Supports 'zeros' and 'var_scale'.
+
+    embedding : bool
+        Embed each observation?
+
+    embedding_size : int
+        Size of embedding for each observation if embedding=True.
+
+    optimizer : str
+        Optimizer to use. Supports 'adam', 'rmsprop', and 'sgd'.
+
+    learning_rate : float
+        Learning rate for optimizer.
+
+    observe_action : bool
+        Observe previous action token?
+
+    observe_parent : bool
+        Observe parent token?
+
+    observe_sibling : bool
+        Observe sibling token?
+
+    entropy_weight : float
+        Coefficient for entropy bonus.
+        
+    ppo : bool
+        Use proximal policy optimization (instead of vanilla policy gradient)?
+
+    ppo_clip_ratio : float
+        Clip ratio to use for PPO.
+
+    ppo_n_iters : int
+        Number of optimization iterations for PPO.
+
+    ppo_n_mb : int
+        Number of minibatches per optimization iteration for PPO.
+
+    pqt : bool
+        Train with priority queue training (PQT)?
+
+    pqt_k : int
+        Size of priority queue.
+
+    pqt_batch_size : int
+        Size of batch to sample (with replacement) from priority queue.
+
+    pqt_weight : float
+        Coefficient for PQT loss function.
+
+    pqt_use_pg : bool
+        Use policy gradient loss when using PQT?
+
+    max_length : int or None
+        Maximum sequence length. This will be overridden if a LengthConstraint
+        with a maximum length is part of the prior.
+
+    """
+
+    def __init__(self, sess, prior, debug=0, summary=True,
+                 # RNN cell hyperparameters
+                 cell='lstm',
+                 num_layers=1,
+                 num_units=32,
+                 initializer='zeros',
+                 # Embedding hyperparameters
+                 embedding=False,
+                 embedding_size=4,
+                 # Optimizer hyperparameters
+                 optimizer='adam',
+                 learning_rate=0.001,
+                 # Observation space hyperparameters
+                 observe_action=True,
+                 observe_parent=True,
+                 observe_sibling=True,
+                 # Loss hyperparameters
+                 entropy_weight=0.0,
+                 # PPO hyperparameters
+                 ppo=False,
+                 ppo_clip_ratio=0.2,
+                 ppo_n_iters=10,
+                 ppo_n_mb=4,
+                 # PQT hyperparameters
+                 pqt=False,
+                 pqt_k=10,
+                 pqt_batch_size=1,
+                 pqt_weight=200.0,
+                 pqt_use_pg=False,
+                 # Other hyperparameters
+                 max_length=None):
+
+        self.sess = sess
+        self.prior = prior
+        self.summary = summary
+        self.rng = np.random.RandomState(0) # Used for PPO minibatch sampling
+
+        lib = Program.library
+
+        # Find max_length from the LengthConstraint prior, if it exists
+        prior_max_length = None
+        for single_prior in self.prior.priors:
+            if isinstance(single_prior, LengthConstraint):
+                if single_prior.max is not None:
+                    prior_max_length = single_prior.max
+                    self.max_length = prior_max_length
+                break
+        if prior_max_length is None:
+            assert max_length is not None, "max_length must be specified if "\
+                "there is no LengthConstraint."
+            self.max_length = max_length
+            print("WARNING: Maximum length not constrained. Sequences will "
+                  "stop at {} and complete by repeating the first input "
+                  "variable.".format(self.max_length))
+        elif max_length is not None and max_length != self.max_length:
+            print("WARNING: max_length ({}) will be overridden by value from "
+                  "LengthConstraint ({}).".format(max_length, self.max_length))
+        max_length = self.max_length
+
+        # Hyperparameters
+        self.observe_parent = observe_parent
+        self.observe_sibling = observe_sibling
+        self.entropy_weight = entropy_weight
+        self.ppo = ppo
+        self.ppo_n_iters = ppo_n_iters
+        self.ppo_n_mb = ppo_n_mb
+        self.pqt = pqt
+        self.pqt_k = pqt_k
+        self.pqt_batch_size = pqt_batch_size
+
+        n_choices = lib.L
+
+        # Placeholders, computed after instantiating expressions
+        self.batch_size = tf.placeholder(dtype=tf.int32, shape=(), name="batch_size")
+        self.baseline = tf.placeholder(dtype=tf.float32, shape=(), name="baseline")
+        
+        # Parameter assertions/warnings
+        assert observe_action + observe_parent + observe_sibling > 0, "Must include at least one observation."
+
+        self.compute_parents_siblings = any([self.observe_parent,
+                                             self.observe_sibling,
+                                             self.prior.requires_parents_siblings])
+
+        # Build controller RNN
+        with tf.name_scope("controller"):
+
+            def make_initializer(name):
+                if name == "zeros":
+                    return tf.zeros_initializer()
+                if name == "var_scale":
+                    return tf.contrib.layers.variance_scaling_initializer(
+                            factor=0.5, mode='FAN_AVG', uniform=True, seed=0)
+                raise ValueError("Did not recognize initializer '{}'".format(name))
+
+            def make_cell(name, num_units, initializer):
+                if name == 'lstm':
+                    return tf.nn.rnn_cell.LSTMCell(num_units, initializer=initializer)
+                if name == 'gru':
+                    return tf.nn.rnn_cell.GRUCell(num_units, kernel_initializer=initializer, bias_initializer=initializer)
+                raise ValueError("Did not recognize cell type '{}'".format(name))
+
+            # Create recurrent cell
+            if isinstance(num_units, int):
+                num_units = [num_units] * num_layers
+            initializer = make_initializer(initializer)
+            cell = tf.contrib.rnn.MultiRNNCell(
+                    [make_cell(cell, n, initializer=initializer) for n in num_units])
+            cell = LinearWrapper(cell=cell, output_size=n_choices)
+
+            # Define input dimensions
+            n_action_inputs = n_choices + 1 # lib tokens + empty token
+            n_parent_inputs = n_choices + 1 - len(lib.terminal_tokens) # Parent sub-lib tokens + empty token
+            n_sibling_inputs = n_choices + 1 # lib tokens + empty tokens
+
+            # Create embeddings
+            if embedding:
+                with tf.variable_scope("embeddings",
+                                       initializer=tf.random_uniform_initializer(minval=-1.0, maxval=1.0, seed=0)):
+                    if observe_action:
+                        action_embeddings = tf.get_variable("action_embeddings", [n_action_inputs, embedding_size], trainable=True)
+                    if observe_parent:
+                        parent_embeddings = tf.get_variable("parent_embeddings", [n_parent_inputs, embedding_size], trainable=True)
+                    if observe_sibling:
+                        sibling_embeddings = tf.get_variable("sibling_embeddings", [n_sibling_inputs, embedding_size], trainable=True)
+
+            # First observation is all empty tokens
+            initial_obs = tuple()
+            for n in [n_action_inputs, n_parent_inputs, n_sibling_inputs]:
+                obs = tf.constant(n - 1, dtype=np.int32)
+                obs = tf.broadcast_to(obs, [self.batch_size])
+                initial_obs += (obs,)            
+
+            # Get initial prior
+            initial_prior = self.prior.initial_prior()
+            initial_prior = tf.constant(initial_prior, dtype=tf.float32)
+            prior_dims = tf.stack([self.batch_size, n_choices])
+            initial_prior = tf.broadcast_to(initial_prior, prior_dims)
+            # arities = np.array([Program.arities[i] for i in range(n_choices)])
+            # prior = np.zeros(n_choices, dtype=np.float32)
+            # if self.min_length is not None and self.min_length > 1:
+            #     prior[arities == 0] = -np.inf
+            # prior = tf.constant(prior, dtype=tf.float32)
+            # prior_dims = tf.stack([self.batch_size, n_choices])
+            # prior = tf.broadcast_to(prior, prior_dims)
+            # initial_prior = prior
+
+
+            # Returns concatenated one-hot or embeddings from observation tokens
+            # Used for both raw_rnn and dynamic_rnn
+            def get_input(obs):
+                action, parent, sibling = obs
+                observations = []
+                if observe_action:
+                    if embedding:
+                        obs = tf.nn.embedding_lookup(action_embeddings, action)
+                    else:
+                        obs = tf.one_hot(action, depth=n_action_inputs)
+                    observations.append(obs)
+                if observe_parent:
+                    if embedding:
+                        obs = tf.nn.embedding_lookup(parent_embeddings, parent)
+                    else:
+                        obs = tf.one_hot(parent, depth=n_parent_inputs)
+                    observations.append(obs)
+                if observe_sibling:
+                    if embedding:
+                        obs = tf.nn.embedding_lookup(sibling_embeddings, sibling)
+                    else:
+                        obs = tf.one_hot(sibling, depth=n_sibling_inputs)
+                    observations.append(obs)
+                input_ = tf.concat(observations, -1)                
+                return input_
+
+
+            # Applies constraints
+            def get_action_parent_sibling_prior_dangling(actions, dangling):
+                n = actions.shape[0] # Batch size
+                i = actions.shape[1] - 1 # Current index
+                action = actions[:, -1] # Current action
+
+                # Depending on the constraints, may need to compute parents and siblings
+                if self.compute_parents_siblings:
+                    parent, sibling = parents_siblings(actions, arities=lib.arities, parent_adjust=lib.parent_adjust)
+                else:
+                    parent = np.zeros(n, dtype=np.int32)
+                    sibling = np.zeros(n, dtype=np.int32)
+
+                # Update dangling with (arity - 1) for each element in action
+                dangling += lib.arities[action] - 1
+
+                prior = self.prior(actions, parent, sibling, dangling)
+
+                return action, parent, sibling, prior, dangling
+
+
+            # Given the actions chosen so far, return the observation, the prior, and the updated dangling
+            # Uses py_func to retrieve action/parent/sibling/dangling
+            def get_next_obs_prior_dangling(actions_ta, dangling):
+
+                # Get current action batch
+                actions = tf.transpose(actions_ta.stack()) # Shape: (?, time)
+                
+                # Compute parent, sibling, prior, and dangling
+                action, parent, sibling, prior, dangling = tf.py_func(func=get_action_parent_sibling_prior_dangling,
+                                                              inp=[actions, dangling],
+                                                              Tout=[tf.int32, tf.int32, tf.int32, tf.float32, tf.int32])
+
+                # Observe previous action, parent, and/or sibling
+                obs = (action, parent, sibling)
+
+                # Set the shapes for returned Tensors
+                action.set_shape([None])
+                parent.set_shape([None])
+                sibling.set_shape([None])                
+                prior.set_shape([None, lib.L])
+                dangling.set_shape([None])
+
+                return obs, prior, dangling
+
+
+            # Define loop function to be used by tf.nn.raw_rnn.
+            initial_cell_input = get_input(initial_obs)
+            def loop_fn(time, cell_output, cell_state, loop_state):
+
+                if cell_output is None: # time == 0
+                    finished = tf.zeros(shape=[self.batch_size], dtype=tf.bool)
+                    obs = initial_obs
+                    next_input = get_input(obs)
+                    next_cell_state = cell.zero_state(batch_size=self.batch_size, dtype=tf.float32) # 2-tuple, each shape (?, num_units)                    
+                    emit_output = None
+                    actions_ta = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=False) # Read twice
+                    obs_tas = (tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=True), # Action inputs
+                              tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=True), # Parent inputs
+                              tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=True)) # Sibling inputs
+                    priors_ta = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True, clear_after_read=True)
+                    prior = initial_prior
+                    lengths = tf.ones(shape=[self.batch_size], dtype=tf.int32)
+                    dangling = tf.ones(shape=[self.batch_size], dtype=tf.int32)
+                    next_loop_state = (
+                        actions_ta,
+                        obs_tas,
+                        priors_ta,
+                        obs,
+                        prior,
+                        dangling,
+                        lengths, # Unused until implementing variable length
+                        finished)
+                else:
+                    actions_ta, obs_tas, priors_ta, obs, prior, dangling, lengths, finished = loop_state
+                    logits = cell_output + prior
+                    next_cell_state = cell_state
+                    emit_output = logits
+                    action = tf.multinomial(logits=logits, num_samples=1, output_dtype=tf.int32, seed=1)[:, 0]
+                    # When implementing variable length:
+                    # action = tf.where(
+                    #     tf.logical_not(finished),
+                    #     tf.multinomial(logits=logits, num_samples=1, output_dtype=tf.int32)[:, 0],
+                    #     tf.zeros(shape=[self.batch_size], dtype=tf.int32))
+                    next_actions_ta = actions_ta.write(time - 1, action) # Write chosen actions
+                    next_obs, next_prior, next_dangling = get_next_obs_prior_dangling(next_actions_ta, dangling)
+                    next_input = get_input(next_obs)
+                    next_obs_tas = ( # Write OLD observation
+                        obs_tas[0].write(time - 1, obs[0]), # Action inputs
+                        obs_tas[1].write(time - 1, obs[1]), # Parent inputs
+                        obs_tas[2].write(time - 1, obs[2])) # Sibling inputs
+                    next_priors_ta = priors_ta.write(time - 1, prior) # Write OLD prior
+                    finished = next_finished = tf.logical_or(
+                        finished,
+                        time >= max_length)
+                    # When implementing variable length:
+                    # finished = next_finished = tf.logical_or(tf.logical_or(
+                    #     finished, # Already finished
+                    #     next_dangling == 0), # Currently, this will be 0 not just the first time, but also at max_length
+                    #     time >= max_length)
+                    next_lengths = tf.where(
+                        finished, # Ever finished
+                        lengths,
+                        tf.tile(tf.expand_dims(time + 1, 0), [self.batch_size]))
+                    next_loop_state = (next_actions_ta,
+                                       next_obs_tas,
+                                       next_priors_ta,
+                                       next_obs,
+                                       next_prior,
+                                       next_dangling,
+                                       next_lengths,
+                                       next_finished)
+
+                return (finished, next_input, next_cell_state, emit_output, next_loop_state)
+
+            # Returns RNN emit outputs TensorArray (i.e. logits), final cell state, and final loop state
+            with tf.variable_scope('policy'):
+                _, _, loop_state = tf.nn.raw_rnn(cell=cell, loop_fn=loop_fn)
+                actions_ta, obs_tas, priors_ta, _, _, _, _, _ = loop_state
+
+            self.actions = tf.transpose(actions_ta.stack(), perm=[1, 0]) # (?, max_length)
+            self.obs = [tf.transpose(obs_ta.stack(), perm=[1, 0]) for obs_ta in obs_tas] # [(?, max_length)] * 3
+            self.priors = tf.transpose(priors_ta.stack(), perm=[1, 0, 2]) # (?, max_length, n_choices)
+
+
+        # Generates dictionary containing placeholders needed for a batch of sequences
+        def make_batch_ph(name):
+            with tf.name_scope(name):
+                batch_ph = {
+                    "actions" : tf.placeholder(tf.int32, [None, max_length]),
+                    "obs" : (tf.placeholder(tf.int32, [None, max_length]),
+                             tf.placeholder(tf.int32, [None, max_length]),
+                             tf.placeholder(tf.int32, [None, max_length])),
+                    "priors" : tf.placeholder(tf.float32, [None, max_length, n_choices]),
+                    "lengths" : tf.placeholder(tf.int32, [None,]),
+                    "rewards" : tf.placeholder(tf.float32, [None], name="r")
+                }
+                batch_ph = Batch(**batch_ph)
+
+            return batch_ph
+
+        def safe_cross_entropy(p, logq, axis=-1):
+            safe_logq = tf.where(tf.equal(p, 0.), tf.ones_like(logq), logq)
+            return - tf.reduce_sum(p * safe_logq, axis)
+
+        # Generates tensor for neglogp of a given batch
+        def make_neglogp_and_entropy(B):
+            with tf.variable_scope('policy', reuse=True):
+                logits, _ = tf.nn.dynamic_rnn(cell=cell,
+                                              inputs=get_input(B.obs),
+                                              sequence_length=B.lengths, # Backpropagates only through sequence length
+                                              dtype=tf.float32)
+            logits += B.priors
+            probs = tf.nn.softmax(logits)
+            logprobs = tf.nn.log_softmax(logits)
+
+            # Generate mask from sequence lengths
+            # NOTE: Using this mask for neglogp and entropy actually does NOT
+            # affect training because gradients are zero outside the lengths.
+            # However, the mask makes tensorflow summaries accurate.
+            mask = tf.sequence_mask(B.lengths, maxlen=max_length, dtype=tf.float32)
+
+            # Negative log probabilities of sequences
+            actions_one_hot = tf.one_hot(B.actions, depth=n_choices, axis=-1, dtype=tf.float32)
+            neglogp_per_step = safe_cross_entropy(actions_one_hot, logprobs, axis=2) # Sum over action dim
+            neglogp = tf.reduce_sum(neglogp_per_step * mask, axis=1) # Sum over time dim
+
+            # NOTE 1: The above implementation is the same as the one below:
+            # neglogp_per_step = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=actions)
+            # neglogp = tf.reduce_sum(neglogp_per_step, axis=1) # Sum over time
+            # NOTE 2: The above implementation is also the same as the one below, with a few caveats:
+            #   Exactly equivalent when removing priors.
+            #   Equivalent up to precision when including clipped prior.
+            #   Crashes when prior is not clipped due to multiplying zero by -inf.
+            # neglogp_per_step = -tf.nn.log_softmax(logits + tf.clip_by_value(priors, -2.4e38, 0)) * actions_one_hot
+            # neglogp_per_step = tf.reduce_sum(neglogp_per_step, axis=2)
+            # neglogp = tf.reduce_sum(neglogp_per_step, axis=1) # Sum over time
+            
+            entropy_per_step = safe_cross_entropy(probs, logprobs, axis=2) # Sum over action dim -> (batch_size, max_length)
+            entropy = tf.reduce_sum(entropy_per_step * mask, axis=1) # Sum over time dim -> (batch_size, )   
+                    
+            return neglogp, entropy
+
+
+        # On policy batch
+        self.sampled_batch_ph = make_batch_ph("sampled_batch")
+
+        # Memory batch
+        self.memory_batch_ph = make_batch_ph("memory_batch")
+        memory_neglogp, _ = make_neglogp_and_entropy(self.memory_batch_ph)
+        self.memory_probs = tf.exp(-memory_neglogp)
+        self.memory_logps = -memory_neglogp
+
+        # PQT batch
+        if pqt:
+            self.pqt_batch_ph = make_batch_ph("pqt_batch")
+
+        # Setup losses
+        with tf.name_scope("losses"):
+
+            neglogp, entropy = make_neglogp_and_entropy(self.sampled_batch_ph)
+            r = self.sampled_batch_ph.rewards
+
+            # Entropy loss
+            entropy_loss = -self.entropy_weight * tf.reduce_mean(entropy, name="entropy_loss")
+            loss = entropy_loss
+
+            # PPO loss
+            if ppo:
+                assert not pqt, "PPO is not compatible with PQT"
+
+                self.old_neglogp_ph = tf.placeholder(dtype=tf.float32, shape=(None,), name="old_neglogp")
+                ratio = tf.exp(self.old_neglogp_ph - neglogp)
+                clipped_ratio = tf.clip_by_value(ratio, 1. - ppo_clip_ratio, 1. + ppo_clip_ratio)
+                ppo_loss = -tf.reduce_mean(tf.minimum(ratio * (r - self.baseline), clipped_ratio * (r - self.baseline)))
+                loss += ppo_loss
+
+                # Define PPO diagnostics
+                clipped = tf.logical_or(ratio < (1. - ppo_clip_ratio), ratio > 1. + ppo_clip_ratio)
+                self.clip_fraction = tf.reduce_mean(tf.cast(clipped, tf.float32))
+                self.sample_kl = tf.reduce_mean(neglogp - self.old_neglogp_ph)
+
+            # Policy gradient loss
+            else:
+                if not pqt or (pqt and pqt_use_pg):
+                    pg_loss = tf.reduce_mean((r - self.baseline) * neglogp, name="pg_loss")                    
+                    loss += pg_loss
+
+            # Priority queue training loss
+            if pqt:
+                pqt_neglogp, _ = make_neglogp_and_entropy(self.pqt_batch_ph)
+                pqt_loss = pqt_weight * tf.reduce_mean(pqt_neglogp, name="pqt_loss")
+                loss += pqt_loss
+
+            self.loss = loss
+
+        def make_optimizer(name, learning_rate):
+            if name == "adam":
+                return tf.train.AdamOptimizer(learning_rate=learning_rate)
+            if name == "rmsprop":
+                return tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=0.99)
+            if name == "sgd":
+                return tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
+            raise ValueError("Did not recognize optimizer '{}'".format(name))
+
+        # Create training op
+        optimizer = make_optimizer(name=optimizer, learning_rate=learning_rate)
+        with tf.name_scope("train"):
+            self.grads_and_vars = optimizer.compute_gradients(self.loss)
+            self.train_op = optimizer.apply_gradients(self.grads_and_vars)
+            # The two lines above are equivalent to:
+            # self.train_op = optimizer.minimize(self.loss)
+        with tf.name_scope("grad_norm"):
+            self.grads, _ = list(zip(*self.grads_and_vars))
+            self.norms = tf.global_norm(self.grads)
+
+        if debug >= 1:
+            total_parameters = 0
+            print("")
+            for variable in tf.trainable_variables():
+                shape = variable.get_shape()
+                n_parameters = np.product(shape)
+                total_parameters += n_parameters
+                print("Variable:    ", variable.name)
+                print("  Shape:     ", shape)
+                print("  Parameters:", n_parameters)
+            print("Total parameters:", total_parameters)
+
+        # Create summaries
+        with tf.name_scope("summary"):
+            if self.summary:
+                if ppo:
+                    tf.summary.scalar("ppo_loss", ppo_loss)
+                else:
+                    if not pqt or (pqt and pqt_use_pg):
+                        tf.summary.scalar("pg_loss", pg_loss)
+                if pqt:
+                    tf.summary.scalar("pqt_loss", pqt_loss)
+                tf.summary.scalar("entropy_loss", entropy_loss)
+                tf.summary.scalar("total_loss", self.loss)
+                tf.summary.scalar("reward", tf.reduce_mean(r))
+                tf.summary.scalar("baseline", self.baseline)
+                tf.summary.histogram("reward", r)
+                tf.summary.histogram("length", self.sampled_batch_ph.lengths)
+                for g, v in self.grads_and_vars:
+                    tf.summary.histogram(v.name, v)
+                    tf.summary.scalar(v.name + '_norm', tf.norm(v))
+                    tf.summary.histogram(v.name + '_grad', g)
+                    tf.summary.scalar(v.name + '_grad_norm', tf.norm(g))
+                tf.summary.scalar('gradient norm', self.norms)
+                self.summaries = tf.summary.merge_all()
+
+    def sample(self, n):
+        """Sample batch of n expressions"""
+        
+        feed_dict = {self.batch_size : n}
+
+        actions, obs, priors = self.sess.run([self.actions, self.obs, self.priors], feed_dict=feed_dict)
+
+        return actions, obs, priors
+
+
+    def compute_probs(self, memory_batch, log=False):
+        """Compute the probabilities of a Batch."""
+
+        feed_dict = {
+            self.memory_batch_ph : memory_batch
+        }
+
+        if log:
+            fetch = self.memory_logps
+        else:
+            fetch = self.memory_probs
+        probs = self.sess.run([fetch], feed_dict=feed_dict)[0]
+        return probs
+
+
+    def train_step(self, b, sampled_batch, pqt_batch):
+        """Computes loss, trains model, and returns summaries."""
+
+        feed_dict = {
+            self.baseline : b,
+            self.sampled_batch_ph : sampled_batch
+        }
+
+        if self.pqt:
+            feed_dict.update({
+                self.pqt_batch_ph : pqt_batch
+            })
+
+        if self.ppo:
+            # Compute old_neglogp to be used for training
+            old_neglogp = self.sess.run(self.neglogp, feed_dict=feed_dict)
+
+            # Perform multiple epochs of minibatch training
+            feed_dict[self.old_neglogp_ph] = old_neglogp
+            indices = np.arange(len(r))
+            for epoch in range(self.ppo_n_iters):
+                self.rng.shuffle(indices)
+                minibatches = np.array_split(indices, self.ppo_n_mb)
+                for i, mb in enumerate(minibatches):
+                    mb_feed_dict = {k : v[mb] for k, v in feed_dict.items() if k not in [self.baseline, self.batch_size]}
+                    mb_feed_dict.update({
+                        self.baseline : b,
+                        self.batch_size : len(mb)
+                        })
+
+                    _ = self.sess.run([self.train_op], feed_dict=mb_feed_dict)
+
+        else:
+            _ = self.sess.run([self.train_op], feed_dict=feed_dict)
+
+        # Return summaries
+        if self.summary:
+            summaries = self.sess.run(self.summaries, feed_dict=feed_dict)
+        else:
+            summaries = None
+
+        return summaries
diff --git a/dsr/dsr/core.py b/dsr/dsr/core.py
new file mode 100644
index 00000000..daec11cd
--- /dev/null
+++ b/dsr/dsr/core.py
@@ -0,0 +1,126 @@
+"""Core deep symbolic optimizer construct."""
+
+import json
+import zlib
+from collections import defaultdict
+from multiprocessing import Pool
+
+import tensorflow as tf
+
+from dsr.task import set_task
+from dsr.controller import Controller
+from dsr.train import learn
+from dsr.prior import make_prior
+from dsr.program import Program
+
+
+class DeepSymbolicOptimizer():
+    """
+    Deep symbolic optimization model. Includes model hyperparameters and
+    training configuration.
+
+    Parameters
+    ----------
+    config : dict or str
+        Config dictionary or path to JSON. See dsr/dsr/config.json for template.
+
+    Attributes
+    ----------
+    config : dict
+        Configuration parameters for training.
+
+    Methods
+    -------
+    train
+        Builds and trains the model according to config.
+    """
+
+    def __init__(self, config=None):
+        self.update_config(config)
+        self.sess = None
+
+    def setup(self, seed=0):
+
+        # Clear the cache, reset the compute graph, and set the seed
+        Program.clear_cache()
+        tf.reset_default_graph()
+        self.seed(seed) # Must be called _after_ resetting graph
+
+        self.pool = self.make_pool()
+        self.sess = tf.Session()
+        self.prior = self.make_prior()
+        self.controller = self.make_controller()
+
+    def train(self, seed=0):
+
+        # Setup the model
+        self.setup(seed)
+
+        # Train the model
+        result = learn(self.sess,
+                       self.controller,
+                       self.pool,
+                       **self.config_training)
+        return result
+
+    def update_config(self, config):
+        if config is None:
+            config = {}
+        elif isinstance(config, str):
+            with open(config, 'rb') as f:
+                config = json.load(f)
+
+        self.config = defaultdict(dict, config)
+        self.config_task = self.config["task"]
+        self.config_prior = self.config["prior"]
+        self.config_training = self.config["training"]
+        self.config_controller = self.config["controller"]
+
+    def seed(self, seed_=0):
+        """Set the tensorflow seed, which will be offset by a checksum on the
+        task name to ensure seeds differ across different tasks."""
+
+        if "name" in self.config_task:
+            task_name = self.config_task["name"]
+        else:
+            task_name = ""
+        seed_ += zlib.adler32(task_name.encode("utf-8"))
+        tf.set_random_seed(seed_)
+
+        return seed_
+
+    def make_prior(self):
+        prior = make_prior(Program.library, self.config_prior)
+        return prior
+
+    def make_controller(self):
+        controller = Controller(self.sess,
+                                self.prior,
+                                **self.config_controller)
+        return controller
+
+    def make_pool(self):
+        # Create the pool and set the Task for each worker
+        pool = None
+        n_cores_batch = self.config_training.get("n_cores_batch")
+        if n_cores_batch is not None and n_cores_batch > 1:
+            pool = Pool(n_cores_batch,
+                        initializer=set_task,
+                        initargs=(self.config_task,))
+
+        # Set the Task for the parent process
+        set_task(self.config_task)
+
+        return pool
+
+    def save(self, save_path):
+
+        saver = tf.train.Saver()
+        saver.save(self.sess, save_path)
+
+    def load(self, load_path):
+
+        if self.sess is None:
+            self.setup()
+        saver = tf.train.Saver()
+        saver.restore(self.sess, load_path)
diff --git a/dsr/dsr/cyfunc.pyx b/dsr/dsr/cyfunc.pyx
new file mode 100644
index 00000000..11ebd6b5
--- /dev/null
+++ b/dsr/dsr/cyfunc.pyx
@@ -0,0 +1,90 @@
+'''
+# cython: linetrace=True
+# distutils: define_macros=CYTHON_TRACE_NOGIL=1
+'''
+# Uncomment the above lines for cProfile
+
+import numpy as np
+import array
+
+# Cython specific C imports
+cimport numpy as np
+from cpython cimport array
+cimport cython
+from libc.stdlib cimport malloc, free
+from cpython.ref cimport PyObject
+
+# Static inits
+cdef list apply_stack   = [[None for i in range(25)] for i in range(1024)]
+cdef int *stack_count   = <int *> malloc(1024 * sizeof(int))
+
+@cython.boundscheck(False) # turn off bounds-checking for entire function
+@cython.wraparound(False)  # turn off negative index wrapping for entire function  
+def execute(np.ndarray X, int len_traversal, list traversal, int[:] is_input_var):    
+            
+    """Executes the program according to X.
+
+    Parameters
+    ----------
+    X : array-like, shape = [n_samples, n_features]
+        Training vectors, where n_samples is the number of samples and
+        n_features is the number of features.
+    
+    Returns
+    -------
+    y_hats : array-like, shape = [n_samples]
+        The result of executing the program on X.
+    """
+    #sp              = 0 # allow a dummy first row, requires a none type function with arity of -1
+    
+    # Init some ints
+    cdef int        sp              = -1 # Stack pointer
+    cdef int        Xs              = X.shape[0]
+    
+    # Give cdef hints for object types  
+    cdef int        i
+    cdef int        n
+    cdef int        arity
+    cdef np.ndarray intermediate_result
+    cdef list       stack_end
+    cdef object     stack_end_function
+    
+    for i in range(len_traversal):
+        
+        if not is_input_var[i]:
+            sp += 1
+            # Move this to the front with a memset call
+            stack_count[sp]                     = 0
+            # Store the reference to stack_count[sp] rather than keep calling
+            apply_stack[sp][stack_count[sp]]    = traversal[i]
+            stack_end                           = apply_stack[sp]
+            # The first element is the function itself
+            stack_end_function                  = stack_end[0]
+            arity                               = stack_end_function.arity
+        else:
+            # Not a function, so lazily evaluate later
+            stack_count[sp] += 1
+            stack_end[stack_count[sp]]          = X[:, traversal[i].input_var]
+
+        # Keep on doing this so long as arity matches up, we can 
+        # add in numbers above and complete the arity later.
+        while stack_count[sp] == arity:
+            intermediate_result = stack_end_function(*stack_end[1:(stack_count[sp] + 1)]) # 85% of overhead
+
+            # I think we can get rid of this line, but will require a major rewrite.
+            if sp == 0:    
+                return intermediate_result
+            
+            sp -= 1
+            # Adjust pointer at the end of the stack
+            stack_end                   = apply_stack[sp]
+            stack_count[sp] += 1
+            stack_end[stack_count[sp]]  = intermediate_result
+
+            # The first element is the function itself
+            stack_end_function          = stack_end[0]
+            arity                       = stack_end_function.arity
+      
+    # We should never get here
+    assert False, "Function should never get here!"
+    return None
diff --git a/dsr/dsr/functions.py b/dsr/dsr/functions.py
new file mode 100644
index 00000000..705eb602
--- /dev/null
+++ b/dsr/dsr/functions.py
@@ -0,0 +1,195 @@
+"""Common Tokens used for executable Programs."""
+
+import numpy as np
+from fractions import Fraction
+
+from dsr.library import Token, PlaceholderConstant
+
+GAMMA = 0.57721566490153286060651209008240243104215933593992
+
+
+"""Define custom unprotected operators"""
+def logabs(x1):
+    """Closure of log for non-positive arguments."""
+    return np.log(np.abs(x1))
+
+def expneg(x1):
+    return np.exp(-x1)
+
+def n3(x1):
+    return np.power(x1, 3)
+
+def n4(x1):
+    return np.power(x1, 4)
+
+def sigmoid(x1):
+    return 1 / (1 + np.exp(-x1))
+
+def harmonic(x1):
+    if all(val.is_integer() for val in x1):
+        return np.array([sum(Fraction(1, d) for d in range(1, int(val)+1)) for val in x1], dtype=np.float32)
+    else:
+        return GAMMA + np.log(x1) + 0.5/x1 - 1./(12*x1**2) + 1./(120*x1**4)
+
+
+# Annotate unprotected ops
+unprotected_ops = [
+    # Binary operators
+    Token(np.add, "add", arity=2, complexity=1),
+    Token(np.subtract, "sub", arity=2, complexity=1),
+    Token(np.multiply, "mul", arity=2, complexity=1),
+    Token(np.divide, "div", arity=2, complexity=2),
+
+    # Built-in unary operators
+    Token(np.sin, "sin", arity=1, complexity=3),
+    Token(np.cos, "cos", arity=1, complexity=3),
+    Token(np.tan, "tan", arity=1, complexity=4),
+    Token(np.exp, "exp", arity=1, complexity=4),
+    Token(np.log, "log", arity=1, complexity=4),
+    Token(np.sqrt, "sqrt", arity=1, complexity=4),
+    Token(np.square, "n2", arity=1, complexity=2),
+    Token(np.negative, "neg", arity=1, complexity=1),
+    Token(np.abs, "abs", arity=1, complexity=2),
+    Token(np.maximum, "max", arity=1, complexity=4),
+    Token(np.minimum, "min", arity=1, complexity=4),
+    Token(np.tanh, "tanh", arity=1, complexity=4),
+    Token(np.reciprocal, "inv", arity=1, complexity=2),
+
+    # Custom unary operators
+    Token(logabs, "logabs", arity=1, complexity=4),
+    Token(expneg, "expneg", arity=1, complexity=4),
+    Token(n3, "n3", arity=1, complexity=3),
+    Token(n4, "n4", arity=1, complexity=3),
+    Token(sigmoid, "sigmoid", arity=1, complexity=4),
+    Token(harmonic, "harmonic", arity=1, complexity=4)
+]
+
+
+"""Define custom protected operators"""
+def protected_div(x1, x2):
+    with np.errstate(divide='ignore', invalid='ignore', over='ignore'):
+        return np.where(np.abs(x2) > 0.001, np.divide(x1, x2), 1.)
+
+def protected_exp(x1):
+    with np.errstate(over='ignore'):
+        return np.where(x1 < 100, np.exp(x1), 0.0)
+
+def protected_log(x1):
+    """Closure of log for non-positive arguments."""
+    with np.errstate(divide='ignore', invalid='ignore'):
+        return np.where(np.abs(x1) > 0.001, np.log(np.abs(x1)), 0.)
+
+def protected_sqrt(x1):
+    """Closure of sqrt for negative arguments."""
+    return np.sqrt(np.abs(x1))
+
+def protected_inv(x1):
+    """Closure of inverse for zero arguments."""
+    with np.errstate(divide='ignore', invalid='ignore'):
+        return np.where(np.abs(x1) > 0.001, 1. / x1, 0.)
+
+def protected_expneg(x1):
+    with np.errstate(over='ignore'):
+        return np.where(x1 > -100, np.exp(-x1), 0.0)
+
+def protected_n2(x1):
+    with np.errstate(over='ignore'):
+        return np.where(np.abs(x1) < 1e6, np.square(x1), 0.0)
+
+def protected_n3(x1):
+    with np.errstate(over='ignore'):
+        return np.where(np.abs(x1) < 1e6, np.power(x1, 3), 0.0)
+
+def protected_n4(x1):
+    with np.errstate(over='ignore'):
+        return np.where(np.abs(x1) < 1e6, np.power(x1, 4), 0.0)
+
+def protected_sigmoid(x1):
+    return 1 / (1 + protected_expneg(x1))
+
+# Annotate protected ops
+protected_ops = [
+    # Protected binary operators
+    Token(protected_div, "div", arity=2, complexity=2),
+
+    # Protected unary operators
+
+    Token(protected_exp, "exp", arity=1, complexity=4),
+    Token(protected_log, "log", arity=1, complexity=4),
+    Token(protected_log, "logabs", arity=1, complexity=4), # Protected logabs is support, but redundant
+    Token(protected_sqrt, "sqrt", arity=1, complexity=4),
+    Token(protected_inv, "inv", arity=1, complexity=2),
+    Token(protected_expneg, "expneg", arity=1, complexity=4),
+    Token(protected_n2, "n2", arity=1, complexity=2),
+    Token(protected_n3, "n3", arity=1, complexity=3),
+    Token(protected_n4, "n4", arity=1, complexity=3),
+    Token(protected_sigmoid, "sigmoid", arity=1, complexity=4)
+]
+
+# Add unprotected ops to function map
+function_map = {
+    op.name : op for op in unprotected_ops
+    }
+
+# Add protected ops to function map
+function_map.update({
+    "protected_{}".format(op.name) : op for op in protected_ops
+    })
+
+UNARY_TOKENS = set([op.name for op in function_map.values() if op.arity == 1])
+BINARY_TOKENS = set([op.name for op in function_map.values() if op.arity == 2])
+
+
+def create_tokens(n_input_var, function_set, protected):
+    """
+    Helper function to create Tokens.
+
+    Parameters
+    ----------
+    n_input_var : int
+        Number of input variable Tokens.
+
+    function_set : list
+        Names of registered Tokens, or floats that will create new Tokens.
+
+    protected : bool
+        Whether to use protected versions of registered Tokens.
+    """
+
+    tokens = []
+
+    # Create input variable Tokens
+    for i in range(n_input_var):
+        token = Token(name="x{}".format(i + 1), arity=0, complexity=1,
+                      function=None, input_var=i)
+        tokens.append(token)
+
+    for op in function_set:
+
+        # Registered Token
+        if op in function_map:
+            # Overwrite available protected operators
+            if protected and not op.startswith("protected_"):
+                protected_op = "protected_{}".format(op)
+                if protected_op in function_map:
+                    op = protected_op
+
+            token = function_map[op]
+
+        # Hard-coded floating-point constant
+        elif isinstance(op, float) or isinstance(op, int):
+            name = str(op)
+            value = np.atleast_1d(np.float32(op))
+            function = lambda : value
+            token = Token(name=name, arity=0, complexity=1, function=function)
+
+        # Constant placeholder (to-be-optimized)
+        elif op == "const":
+            token = PlaceholderConstant()
+
+        else:
+            raise ValueError("Operation {} not recognized.".format(op))
+
+        tokens.append(token)
+
+    return tokens
diff --git a/dsr/dsr/library.py b/dsr/dsr/library.py
new file mode 100644
index 00000000..e016e4f5
--- /dev/null
+++ b/dsr/dsr/library.py
@@ -0,0 +1,196 @@
+"""Classes for Token and Library"""
+
+from collections import defaultdict
+
+import numpy as np
+
+
+class Token():
+    """
+    An arbitrary token or "building block" of a Program object.
+
+    Attributes
+    ----------
+    name : str
+        Name of token.
+
+    arity : int
+        Arity (number of arguments) of token.
+
+    complexity : float
+        Complexity of token.
+
+    function : callable
+        Function associated with the token; used for exectuable Programs.
+
+    input_var : int or None
+        Index of input if this Token is an input variable, otherwise None.
+
+    Methods
+    -------
+    __call__(input)
+        Call the Token's function according to input.
+    """
+
+    def __init__(self, function, name, arity, complexity, input_var=None):
+        self.function = function
+        self.name = name
+        self.arity = arity
+        self.complexity = complexity
+        self.input_var = input_var
+
+        if input_var is not None:
+            assert function is None, "Input variables should not have functions."
+            assert arity == 0, "Input variables should have arity zero."
+
+    def __call__(self, *args):
+        assert self.function is not None, \
+            "Token {} is not callable.".format(self.name)
+
+        return self.function(*args)
+
+    def __repr__(self):
+        return self.name
+
+
+class PlaceholderConstant(Token):
+    """
+    A Token for placeholder constants that will be optimized with respect to
+    the reward function. The function simply returns the "value" attribute.
+
+    Parameters
+    ----------
+    value : float or None
+        Current value of the constant, or None if not yet set.
+    """
+
+    def __init__(self, value=None):
+        if value is not None:
+            value = np.atleast_1d(value)
+        self.value = value
+
+        def function():
+            assert self.value is not None, \
+                "Constant is not callable with value None."
+            return self.value
+
+        super().__init__(function=function, name="const", arity=0, complexity=1)
+
+    def __repr__(self):
+        if self.value is None:
+            return self.name
+        return str(self.value[0])
+
+
+class Library():
+    """
+    Library of Tokens. We use a list of Tokens (instead of set or dict) since
+    we so often index by integers given by the Controller.
+
+    Attributes
+    ----------
+    tokens : list of Token
+        List of available Tokens in the library.
+
+    names : list of str
+        Names corresponding to Tokens in the library.
+
+    arities : list of int
+        Arities corresponding to Tokens in the library.
+    """
+
+    def __init__(self, tokens):
+
+        self.tokens = tokens
+        self.L = len(tokens)
+        self.names = [t.name for t in tokens]
+        self.arities = np.array([t.arity for t in tokens], dtype=np.int32)
+
+        self.input_tokens = np.array(
+            [i for i, t in enumerate(self.tokens) if t.input_var is not None],
+            dtype=np.int32)
+
+        def get_tokens_of_arity(arity):
+            _tokens = [i for i in range(self.L) if self.arities[i] == arity]
+            return np.array(_tokens, dtype=np.int32)
+
+        self.tokens_of_arity = defaultdict(lambda : np.array([], dtype=np.int32))
+        for arity in self.arities:
+            self.tokens_of_arity[arity] = get_tokens_of_arity(arity)
+        self.terminal_tokens = self.tokens_of_arity[0]
+        self.unary_tokens = self.tokens_of_arity[1]
+        self.binary_tokens = self.tokens_of_arity[2]
+
+        try:
+            self.const_token = self.names.index("const")
+        except ValueError:
+            self.const_token = None
+        self.parent_adjust = np.full_like(self.arities, -1)
+        count = 0
+        for i in range(len(self.arities)):
+            if self.arities[i] > 0:
+                self.parent_adjust[i] = count
+                count += 1
+
+        trig_names = ["sin", "cos", "tan", "csc", "sec", "cot"]
+        trig_names += ["arc" + name for name in trig_names]
+
+        self.float_tokens = np.array(
+            [i for i, t in enumerate(self.tokens) if t.arity == 0 and t.input_var is None],
+            dtype=np.int32)
+        self.trig_tokens = np.array(
+            [i for i, t in enumerate(self.tokens) if t.name in trig_names],
+            dtype=np.int32)
+
+        inverse_tokens = {
+            "inv" : "inv",
+            "neg" : "neg",
+            "exp" : "log",
+            "log" : "exp",
+            "sqrt" : "n2",
+            "n2" : "sqrt"
+        }
+        token_from_name = {t.name : i for i, t in enumerate(self.tokens)}
+        self.inverse_tokens = {token_from_name[k] : token_from_name[v] for k, v in inverse_tokens.items() if k in token_from_name and v in token_from_name}        
+
+    def __getitem__(self, val):
+        """Shortcut to get Token by name or index."""
+
+        if isinstance(val, str):
+            try:
+                i = self.names.index(val)
+            except ValueError:
+                raise TokenNotFoundError("Token {} does not exist.".format(val))
+        elif isinstance(val, (int, np.integer)):
+            i = val
+        else:
+            raise TokenNotFoundError("Library must be indexed by str or int, not {}.".format(type(val)))
+
+        try:
+            token = self.tokens[i]
+        except IndexError:
+            raise TokenNotFoundError("Token index {} does not exist".format(i))
+        return token
+
+    def tokenize(self, inputs):
+        """Convert inputs to list of Tokens."""
+
+        if isinstance(inputs, str):
+            inputs = inputs.split(',')
+        elif not isinstance(inputs, list) and not isinstance(inputs, np.ndarray):
+            inputs = [inputs]
+        tokens = [input_ if isinstance(input_, Token) else self[input_] for input_ in inputs]
+        return tokens
+
+    def actionize(self, inputs):
+        """Convert inputs to array of 'actions', i.e. ints corresponding to
+        Tokens in the Library."""
+
+        tokens = self.tokenize(inputs)
+        actions = np.array([self.tokens.index(t) for t in tokens],
+                           dtype=np.int32)
+        return actions
+
+
+class TokenNotFoundError(Exception):
+    pass
diff --git a/dsr/dsr/memory.py b/dsr/dsr/memory.py
new file mode 100644
index 00000000..88c8eb0d
--- /dev/null
+++ b/dsr/dsr/memory.py
@@ -0,0 +1,358 @@
+"""Classes for memory buffers, priority queues, and quantile estimation."""
+
+import heapq
+from collections import namedtuple
+
+import numpy as np
+
+
+Batch = namedtuple(
+    "Batch", ["actions", "obs", "priors", "lengths", "rewards"])
+
+
+def make_queue(controller=None, priority=False, capacity=np.inf, seed=0):
+    """Factory function for various Queues.
+
+    Parameters
+    ----------
+    controller : dsr.controller.Controller
+        Reference to the Controller, used to compute probabilities of items in
+        the Queue.
+
+    priority : bool
+        If True, returns an object inheriting UniquePriorityQueue. Otherwise,
+        returns an object inheriting from UniqueQueue.
+
+    capacity : int
+        Maximum queue length.
+
+    seed : int
+        RNG seed used for random sampling.
+
+    Returns
+    -------
+    queue : ProgramQueue
+        Dynamic class inheriting from ProgramQueueMixin and a Queue subclass.
+    """
+
+    if priority:
+        Base = UniquePriorityQueue
+    else:
+        Base = UniqueQueue
+
+    class ProgramQueue(ProgramQueueMixin, Base):
+        def __init__(self, controller, capacity, seed):
+            ProgramQueueMixin.__init__(self, controller)
+            Base.__init__(self, capacity, seed)
+
+    queue = ProgramQueue(controller, capacity, seed)
+    return queue
+
+
+def get_samples(batch, key):
+    """
+    Returns a sub-Batch with samples from the given indices.
+
+    Parameters
+    ----------
+    key : int or slice
+        Indices of samples to return.
+
+    Returns
+    -------
+    batch : Batch
+        Sub-Batch with samples from the given indices.
+    """
+
+    batch = Batch(
+        actions=batch.actions[key],
+        obs=tuple(o[key] for o in batch.obs),
+        priors=batch.priors[key],
+        lengths=batch.lengths[key],
+        rewards=batch.rewards[key])
+    return batch
+
+
+# Adapted from https://github.com/tensorflow/models/blob/1af55e018eebce03fb61bba9959a04672536107d/research/brain_coder/common/utils.py
+class ItemContainer(object):
+    """Class for holding an item with its score.
+
+    Defines a comparison function for use in the heap-queue.
+    """
+
+    def __init__(self, score, item, extra_data):
+        self.item = item
+        self.score = score
+        self.extra_data = extra_data
+
+    def __lt__(self, other):
+        assert isinstance(other, type(self))
+        return self.score < other.score
+
+    def __eq__(self, other):
+        assert isinstance(other, type(self))
+        return self.item == other.item
+
+    def __iter__(self):
+        """Allows unpacking like a tuple."""
+        yield self.score
+        yield self.item
+        yield self.extra_data
+
+    def __repr__(self):
+        """String representation of this item.
+
+        `extra_data` is not included in the representation. We are assuming that
+        `extra_data` is not easily interpreted by a human (if it was, it should be
+        hashable, like a string or tuple).
+
+        Returns:
+            String representation of `self`.
+        """
+        return str((self.score, self.item))
+
+    def __str__(self):
+        return repr(self)
+
+
+class Queue(object):
+    """Abstract class for queue that must define a push and pop routine"""
+
+    def __init__(self, capacity, seed=0):
+        self.capacity = capacity
+        self.rng = np.random.RandomState(seed)
+        self.heap = []
+        self.unique_items = set()
+
+    def push(self, score, item, extra_data):
+        raise NotImplementedError
+
+    def pop(self):
+        raise NotImplementedError
+
+    def random_sample(self, sample_size):
+        """Uniform randomly select items from the queue.
+
+        Args:
+            sample_size: Number of random samples to draw. The same item can be
+                    sampled multiple times.
+
+        Returns:
+            List of sampled items (of length `sample_size`). Each element in the list
+            is a tuple: (item, extra_data).
+        """
+        idx = self.rng.choice(len(self.heap), sample_size, )
+        return [(self.heap[i].item, self.heap[i].extra_data) for i in idx]
+
+    def __len__(self):
+        return len(self.heap)
+
+    def __iter__(self):
+        for _, item, _ in self.heap:
+            yield item
+
+    def __repr__(self):
+        return '[' + ', '.join(repr(c) for c in self.heap) + ']'
+
+    def __str__(self):
+        return repr(self)
+
+
+class UniqueQueue(Queue):
+    """A queue in which duplicates are not allowed. Instead, adding a duplicate
+    moves that item to the back of the queue."""
+
+    def push(self, score, item, extra_data=None):
+        """Push an item onto the queue, or move it to the back if already
+        present.
+
+        Score is unused but included as an argument to follow the interface.
+        """
+
+        container = ItemContainer(None, item, extra_data)
+
+        # If the item is already in the queue, move it to the back of the queue
+        # and return
+        if item in self.unique_items:
+            self.heap.remove(container)
+            self.heap.append(container)
+            return
+
+        # If the queue is at capacity, first pop the front of the queue
+        if len(self.heap) >= self.capacity:
+            self.pop()
+
+        # Add the item
+        self.heap.append(container)
+        self.unique_items.add(item)
+
+    def pop(self):
+        """Pop the front of the queue (the oldest item)."""
+
+        if not self.heap:
+            return ()
+        score, item, extra_data = self.heap.pop(0)
+        self.unique_items.remove(item)
+        return (score, item, extra_data)
+
+
+# Adapted from https://github.com/tensorflow/models/blob/1af55e018eebce03fb61bba9959a04672536107d/research/brain_coder/common/utils.py
+class UniquePriorityQueue(Queue):
+    """A priority queue where duplicates are not added.
+
+    The top items by score remain in the queue. When the capacity is reached,
+    the lowest scored item in the queue will be dropped.
+    """
+
+    def push(self, score, item, extra_data=None):
+        """Push an item onto the queue.
+
+        If the queue is at capacity, the item with the smallest score will be
+        dropped. Note that it is assumed each item has exactly one score. The same
+        item with a different score will still be dropped.
+
+        Args:
+            score: Number used to prioritize items in the queue. Largest scores are
+                    kept in the queue.
+            item: A hashable item to be stored. Duplicates of this item will not be
+                    added to the queue.
+            extra_data: An extra (possible not hashable) data to store with the item.
+        """
+        if item in self.unique_items:
+            return
+        if len(self.heap) >= self.capacity:
+            _, popped_item, _ = heapq.heappushpop(
+                self.heap, ItemContainer(score, item, extra_data))
+            self.unique_items.add(item)
+            self.unique_items.remove(popped_item)
+        else:
+            heapq.heappush(self.heap, ItemContainer(score, item, extra_data))
+            self.unique_items.add(item)
+
+    def pop(self):
+        """Pop the item with the lowest score.
+
+        Returns:
+            score: Item's score.
+            item: The item that was popped.
+            extra_data: Any extra data stored with the item.
+        """
+        if not self.heap:
+            return ()
+        score, item, extra_data = heapq.heappop(self.heap)
+        self.unique_items.remove(item)
+        return score, item, extra_data
+
+    def get_max(self):
+        """Peek at the item with the highest score.
+
+        Returns:
+            Same as `pop`.
+        """
+        if not self.heap:
+            return ()
+        score, item, extra_data = heapq.nlargest(1, self.heap)[0]
+        return score, item, extra_data
+
+    def get_min(self):
+        """Peek at the item with the lowest score.
+
+        Returns:
+            Same as `pop`.
+        """
+        if not self.heap:
+            return ()
+        score, item, extra_data = heapq.nsmallest(1, self.heap)[0]
+        return score, item, extra_data
+
+    def iter_in_order(self):
+        """Iterate over items in the queue from largest score to smallest.
+
+        Yields:
+            item: Hashable item.
+            extra_data: Extra data stored with the item.
+        """
+        for _, item, extra_data in heapq.nlargest(len(self.heap), self.heap):
+            yield item, extra_data
+
+
+class ProgramQueueMixin():
+    """A mixin for Queues with additional utilities specific to Batch and
+    Program."""
+
+    def __init__(self, controller=None):
+        self.controller = controller
+
+    def push_sample(self, sample, program):
+        """
+        Push a single sample corresponding to Program to the queue.
+
+        Parameters
+        ----------
+        sample : Batch
+            A Batch comprising a single sample.
+
+        program : Program
+            Program corresponding to the sample.
+        """
+
+        id_ = program.str
+        score = sample.rewards
+        self.push(score, id_, sample)
+
+    def push_batch(self, batch, programs):
+        """Push a Batch corresponding to Programs to the queue."""
+
+        for i, program in enumerate(programs):
+            sample = get_samples(batch, i)
+            self.push_sample(sample, program)
+
+    def push_best(self, batch, programs):
+        """Push the single best sample from a Batch"""
+
+        i = np.argmax(batch.rewards)
+        sample = get_samples(batch, i)
+        program = programs[i]
+        self.push_sample(sample, program)
+
+    def sample_batch(self, sample_size):
+        """Randomly select items from the queue and return them as a Batch."""
+
+        assert len(self.heap) > 0, "Cannot sample from an empty queue."
+        samples = [sample for (id_, sample) in self.random_sample(sample_size)]
+        batch = self._make_batch(samples)
+        return batch
+
+    def _make_batch(self, samples):
+        """Turns a list of samples into a Batch."""
+
+        actions = np.stack([s.actions for s in samples], axis=0)
+        obs = tuple([np.stack([s.obs[i] for s in samples], axis=0) for i in range(3)])
+        priors = np.stack([s.priors for s in samples], axis=0)
+        lengths = np.array([s.lengths for s in samples], dtype=np.int32)
+        rewards = np.array([s.rewards for s in samples], dtype=np.float32)
+        batch = Batch(actions=actions, obs=obs, priors=priors,
+                      lengths=lengths, rewards=rewards)
+        return batch
+
+    def to_batch(self):
+        """Return the entire queue as a Batch."""
+
+        samples = [container.extra_data for container in self.heap]
+        batch = self._make_batch(samples)
+        return batch
+
+    def compute_probs(self):
+        """Computes the probabilities of items in the queue according to the
+        Controller."""
+
+        if self.controller is None:
+            raise RuntimeError("Cannot compute probabilities. This Queue does \
+                not have a Controller.")
+        return self.controller.compute_probs(self.to_batch())
+
+    def get_rewards(self):
+        """Returns the rewards"""
+
+        r = [container.extra_data.rewards for container in self.heap]
+        return r
diff --git a/dsr/dsr/prior.py b/dsr/dsr/prior.py
new file mode 100644
index 00000000..510d79f8
--- /dev/null
+++ b/dsr/dsr/prior.py
@@ -0,0 +1,527 @@
+"""Class for Prior object."""
+
+import numpy as np
+
+from dsr.subroutines import ancestors
+from dsr.library import TokenNotFoundError
+
+
+def make_prior(library, config_prior):
+    """Factory function for JointPrior object."""
+
+    prior_dict = {
+        "relational" : RelationalConstraint,
+        "length" : LengthConstraint,
+        "repeat" : RepeatConstraint,
+        "inverse" : InverseUnaryConstraint,
+        "trig" : TrigConstraint,
+        "const" : ConstConstraint
+    }
+
+    priors = []
+    warnings = []
+    for prior_type, prior_args in config_prior.items():
+        assert prior_type in prior_dict, \
+            "Unrecognized prior type: {}.".format(prior_type)
+        prior_class = prior_dict[prior_type]
+
+        if isinstance(prior_args, dict):
+            prior_args = [prior_args]
+        for single_prior_args in prior_args:
+
+            # Attempt to build the Prior. Any Prior can fail if it references a
+            # Token not in the Library.
+            try:
+                prior = prior_class(library, **single_prior_args)
+                warning = prior.validate()
+            except TokenNotFoundError:
+                prior = None
+                warning = "Uses Tokens not in the Library."
+
+            # Add warning context
+            if warning is not None:
+                warning = "Skipping invalid '{}' with arguments {}. " \
+                    "Reason: {}" \
+                    .format(prior_class.__name__, single_prior_args, warning)
+                warnings.append(warning)
+
+            # Add the Prior if there are no warnings
+            if warning is None:
+                priors.append(prior)
+
+    joint_prior = JointPrior(library, priors)
+
+    print("-- Building prior -------------------")
+    print("\n".join(["WARNING: " + message for message in warnings]))
+    print(joint_prior.describe())
+    print("-------------------------------------")
+
+    return joint_prior
+
+
+class JointPrior():
+    """A collection of joint Priors."""
+
+    def __init__(self, library, priors):
+        """
+        Parameters
+        ----------
+        library : Library
+            The Library associated with the Priors.
+
+        priors : list of Prior
+            The individual Priors to be joined.
+        """
+
+        self.library = library
+        self.L = self.library.L
+        self.priors = priors
+        assert all([prior.library is library for prior in priors]), \
+            "All Libraries must be identical."
+
+        self.requires_parents_siblings = True
+
+        self.describe()
+
+    def initial_prior(self):
+        combined_prior = np.zeros((self.L,), dtype=np.float32)
+        for prior in self.priors:
+            combined_prior += prior.initial_prior()
+        return combined_prior
+
+    def __call__(self, actions, parent, sibling, dangling):
+        zero_prior = np.zeros((actions.shape[0], self.L), dtype=np.float32)
+        ind_priors = [zero_prior.copy() for _ in range(len(self.priors))]
+        for i in range(len(self.priors)):
+            ind_priors[i] += self.priors[i](actions, parent, sibling, dangling)
+        combined_prior = sum(ind_priors) + zero_prior
+        return combined_prior
+
+    def describe(self):
+        message = "\n".join(prior.describe() for prior in self.priors)
+        return message
+
+
+class Prior():
+    """Abstract class whose call method return logits."""
+
+    def __init__(self, library):
+        self.library = library
+        self.L = library.L
+
+    def validate(self):
+        """
+        Determine whether the Prior has a valid configuration. This is useful
+        when other algorithmic parameters may render the Prior degenerate. For
+        example, having a TrigConstraint with no trig Tokens.
+
+        Returns
+        -------
+        message : str or None
+            Error message if Prior is invalid, or None if it is valid.
+        """
+
+        return None
+
+    def init_zeros(self, actions):
+        """Helper function to generate a starting prior of zeros."""
+
+        batch_size = actions.shape[0]
+        prior = np.zeros((batch_size, self.L), dtype=np.float32)
+        return prior
+
+    def initial_prior(self):
+        """
+        Compute the initial prior, before any actions are selected.
+
+        Returns
+        -------
+        initial_prior : array
+            Initial logit adjustment before actions are selected. Shape is
+            (self.L,) as it will be broadcast to batch size later.
+        """
+
+        return np.zeros((self.L,), dtype=np.float32)
+
+    def __call__(self, actions, parent, sibling, dangling):
+        """
+        Compute the prior (logit adjustment) given the current actions.
+
+        Returns
+        -------
+        prior : array
+            Logit adjustment for selecting next action. Shape is (batch_size,
+            self.L).
+        """
+
+        raise NotImplementedError
+
+    def describe(self):
+        """Describe the Prior."""
+
+        message = "No description."
+        return message
+
+
+class Constraint(Prior):
+    def __init__(self, library):
+        Prior.__init__(self, library)
+
+    def make_constraint(self, mask, tokens):
+        """
+        Generate the prior for a batch of constraints and the corresponding
+        Tokens to constrain.
+
+        For example, with L=5 and tokens=[1,2], a constrained row of the prior
+        will be: [0.0, -np.inf, -np.inf, 0.0, 0.0].
+
+        Parameters
+        __________
+
+        mask : np.ndarray, shape=(?,), dtype=np.bool_
+            Boolean mask of samples to constrain.
+
+        tokens : np.ndarray, dtype=np.int32
+            Tokens to constrain.
+
+        Returns
+        _______
+
+        prior : np.ndarray, shape=(?, L), dtype=np.float32
+            Logit adjustment. Since these are hard constraints, each element is
+            either 0.0 or -np.inf.
+        """
+
+        prior = np.zeros((mask.shape[0], self.L), dtype=np.float32)
+        for t in tokens:
+            prior[mask, t] = -np.inf
+        return prior
+
+
+class RelationalConstraint(Constraint):
+    """
+    Class that constrains the following:
+
+        Constrain (any of) `targets` from being the `relationship` of (any of)
+        `effectors`.
+
+    Parameters
+    ----------
+    targets : list of Tokens
+        List of Tokens, all of which will be constrained if any of effectors
+        are the given relationship.
+
+    effectors : list of Tokens
+        List of Tokens, any of which will cause all targets to be constrained
+        if they are the given relationship.
+
+    relationship : choice of ["child", "descendant", "sibling", "uchild"]
+        The type of relationship to constrain.
+    """
+
+    def __init__(self, library, targets, effectors, relationship):
+        Prior.__init__(self, library)
+        self.targets = library.actionize(targets)
+        self.effectors = library.actionize(effectors)
+        self.relationship = relationship
+
+    def validate(self):
+        message = []
+        if self.relationship in ["child", "descendant", "uchild"]:
+            if np.isin(self.effectors, self.library.terminal_tokens).any():
+                message = "{} relationship cannot have terminal effectors." \
+                          .format(self.relationship.capitalize())
+                return message
+        if len(self.targets) == 0:
+            message = "There are no target Tokens."
+            return message
+        if len(self.effectors) == 0:
+            message = "There are no effector Tokens."
+            return message
+        return None
+
+    def __call__(self, actions, parent, sibling, dangling):
+
+        if self.relationship == "descendant":
+            mask = ancestors(actions=actions,
+                             arities=self.library.arities,
+                             ancestor_tokens=self.effectors)
+            prior = self.make_constraint(mask, self.targets)
+
+        elif self.relationship == "child":
+            parents = self.effectors
+            adj_parents = self.library.parent_adjust[parents]
+            mask = np.isin(parent, adj_parents)
+            prior = self.make_constraint(mask, self.targets)
+
+        elif self.relationship == "sibling":
+            # The sibling relationship is reflexive: if A is a sibling of B,
+            # then B is also a sibling of A. Thus, we combine two priors, where
+            # targets and effectors are swapped.
+            mask = np.isin(sibling, self.effectors)
+            prior = self.make_constraint(mask, self.targets)
+            mask = np.isin(sibling, self.targets)
+            prior += self.make_constraint(mask, self.effectors)
+
+        elif self.relationship == "uchild":
+            # Case 1: parent is a unary effector
+            unary_effectors = np.intersect1d(self.effectors,
+                                             self.library.unary_tokens)
+            adj_unary_effectors = self.library.parent_adjust[unary_effectors]
+            mask = np.isin(parent, adj_unary_effectors)
+            # Case 2: sibling is a target and parent is an effector
+            adj_effectors = self.library.parent_adjust[self.effectors]
+            mask += np.logical_and(np.isin(sibling, self.targets),
+                                   np.isin(parent, adj_effectors))
+            prior = self.make_constraint(mask, [self.targets])
+
+        return prior
+
+    def describe(self):
+
+        targets = ", ".join([self.library.names[t] for t in self.targets])
+        effectors = ", ".join([self.library.names[t] for t in self.effectors])
+        relationship = {
+            "child" : "a child",
+            "sibling" : "a sibling",
+            "descendant" : "a descendant",
+            "uchild" : "the only unique child"
+        }[self.relationship]
+        message = "[{}] cannot be {} of [{}]." \
+                  .format(targets, relationship, effectors)
+        return message
+
+
+class TrigConstraint(RelationalConstraint):
+    """Class that constrains trig Tokens from being the desendants of trig
+    Tokens."""
+
+    def __init__(self, library):
+        targets = library.trig_tokens
+        effectors = library.trig_tokens
+        RelationalConstraint.__init__(self, library,
+                                      targets=targets,
+                                      effectors=effectors,
+                                      relationship="descendant")
+
+
+class ConstConstraint(RelationalConstraint):
+    """Class that constrains the const Token from being the only unique child
+    of all non-terminal Tokens."""
+
+    def __init__(self, library):
+        targets = library.const_token
+        effectors = np.concatenate([library.unary_tokens,
+                                    library.binary_tokens])
+        RelationalConstraint.__init__(self, library,
+                                      targets=targets,
+                                      effectors=effectors,
+                                      relationship="uchild")
+
+
+class InverseUnaryConstraint(Constraint):
+    """Class that constrains each unary Token from being the child of its
+    corresponding inverse unary Tokens."""
+
+    def __init__(self, library):
+        Prior.__init__(self, library)
+        self.priors = []
+        for target, effector in library.inverse_tokens.items():
+            targets = [target]
+            effectors = [effector]
+            prior = RelationalConstraint(library,
+                                         targets=targets,
+                                         effectors=effectors,
+                                         relationship="child")
+            self.priors.append(prior)
+
+    def validate(self):
+        if len(self.priors) == 0:
+            message = "There are no inverse unary Token pairs in the Library."
+            return message
+        return None
+
+    def __call__(self, actions, parent, sibling, dangling):
+        prior = sum([prior(actions, parent, sibling, dangling)
+                     for prior in self.priors])
+        return prior
+
+    def describe(self):
+        message = [prior.describe() for prior in self.priors]
+        return "\n".join(message)
+
+
+class RepeatConstraint(Constraint):
+    """Class that constrains Tokens to appear between a minimum and/or maximum
+    number of times."""
+
+    def __init__(self, library, tokens, min_=None, max_=None):
+        """
+        Parameters
+        ----------
+        tokens : Token or list of Tokens
+            Token(s) which should, in total, occur between min_ and max_ times.
+
+        min_ : int or None
+            Minimum number of times tokens should occur.
+
+        max_ : int or None
+            Maximum number of times tokens should occur.
+        """
+
+        Prior.__init__(self, library)
+        assert min_ is not None or max_ is not None, \
+            "At least one of (min_, max_) must not be None."
+        self.min = min_
+        self.max = max_
+        self.tokens = library.actionize(tokens)
+
+        assert min_ is None, "Repeat minimum constraints are not yet " \
+            "supported. This requires knowledge of length constraints."
+
+    def __call__(self, actions, parent, sibling, dangling):
+        counts = np.sum(np.isin(actions, self.tokens), axis=1)
+        prior = self.init_zeros(actions)
+        if self.min is not None:
+            raise NotImplementedError
+        if self.max is not None:
+            mask = counts >= self.max
+            prior += self.make_constraint(mask, self.tokens)
+        return prior
+
+    def describe(self):
+        names = ", ".join([self.library.names[t] for t in self.tokens])
+        if self.min is None:
+            message = "[{}] cannot occur more than {} times."\
+                .format(names, self.max)
+        elif self.max is None:
+            message = "[{}] must occur at least {} times."\
+                .format(names, self.min)
+        else:
+            message = "[{}] must occur between {} and {} times."\
+                .format(names, self.min, self.max)
+        return message
+
+
+class LengthConstraint(Constraint):
+    """Class that constrains the Program from falling within a minimum and/or
+    maximum length"""
+
+    def __init__(self, library, min_=None, max_=None):
+        """
+        Parameters
+        ----------
+        min_ : int or None
+            Minimum length of the Program.
+
+        max_ : int or None
+            Maximum length of the Program.
+        """
+
+        Prior.__init__(self, library)
+        self.min = min_
+        self.max = max_
+
+        assert min_ is not None or max_ is not None, \
+            "At least one of (min_, max_) must not be None."
+
+    def initial_prior(self):
+        prior = Prior.initial_prior(self)
+        for t in self.library.terminal_tokens:
+            prior[t] = -np.inf
+        return prior
+
+    def __call__(self, actions, parent, sibling, dangling):
+
+        # Initialize the prior
+        prior = self.init_zeros(actions)
+        i = actions.shape[1] - 1 # Current time
+
+        # Never need to constrain max length for first half of expression
+        if self.max is not None and (i + 2) >= self.max // 2:
+            remaining = self.max - (i + 1)
+            # assert sum(dangling > remaining) == 0, (dangling, remaining)
+            mask = dangling >= remaining - 1 # Constrain binary
+            prior += self.make_constraint(mask, self.library.binary_tokens)
+            mask = dangling == remaining # Constrain unary
+            prior += self.make_constraint(mask, self.library.unary_tokens)
+
+        # Constrain terminals when dangling == 1 until selecting the
+        # (min_length)th token
+        if self.min is not None and (i + 2) < self.min:
+            mask = dangling == 1 # Constrain terminals
+            prior += self.make_constraint(mask, self.library.terminal_tokens)
+
+        return prior
+
+    def describe(self):
+        message = []
+        if self.min is not None:
+            message.append("Sequences have minimum length {}.".format(self.min))
+        if self.max is not None:
+            message.append("Sequences have maximum length {}.".format(self.max))
+        message = "\n".join(message)
+        return message
+
+
+class UniformArityPrior(Prior):
+    """Class that puts a fixed prior on arities by transforming the initial
+    distribution from uniform over tokens to uniform over arities."""
+
+    def __init__(self, library):
+
+        Prior.__init__(self, library)
+
+        # For each token, subtract log(n), where n is the total number of tokens
+        # in the library with the same arity as that token. This is equivalent
+        # to... For each arity, subtract log(n) from tokens of that arity, where
+        # n is the total number of tokens of that arity
+        self.logit_adjust = np.zeros((self.L,), dtype=np.float32)
+        for arity, tokens in self.library.tokens_of_arity.items():
+            self.logit_adjust[tokens] -= np.log(len(tokens))
+
+    def initial_prior(self):
+        return self.logit_adjust
+
+    def __call__(self, actions, parent, sibling, dangling):
+
+        # This will be broadcast when added to the joint prior
+        prior = self.logit_adjust
+        return prior
+
+
+class SoftLengthPrior(Prior):
+    """Class the puts a soft prior on length. Before loc, terminal probabilities
+    are scaled by exp(-(t - loc) ** 2 / (2 * scale)) where dangling == 1. After
+    loc, non-terminal probabilities are scaled by that number."""
+
+    def __init__(self, library, loc, scale):
+
+        Prior.__init__(self, library)
+
+        self.loc = loc
+        self.scale = scale
+
+        self.terminal_mask = np.zeros((self.L,), dtype=np.bool)
+        self.terminal_mask[self.library.terminal_tokens] = True
+
+        self.nonterminal_mask = ~self.terminal_mask
+
+    def __call__(self, actions, parent, sibling, dangling):
+
+        # Initialize the prior
+        prior = self.init_zeros(actions)
+        t = actions.shape[1] # Current time
+
+        # Adjustment to terminal or non-terminal logits
+        logit_adjust = -(t - self.loc) ** 2 / (2 * self.scale)
+
+        # Before loc, decrease p(terminal) where dangling == 1
+        if t < self.loc:
+            prior[dangling == 1] += self.terminal_mask * logit_adjust
+
+        # After loc, decrease p(non-terminal)
+        else:
+            prior += self.nonterminal_mask * logit_adjust
+
+        return prior
diff --git a/dsr/dsr/program.py b/dsr/dsr/program.py
new file mode 100644
index 00000000..d00f3e30
--- /dev/null
+++ b/dsr/dsr/program.py
@@ -0,0 +1,640 @@
+"""Class for symbolic expression object or program."""
+
+import array
+import os
+import warnings
+from textwrap import indent
+
+import numpy as np
+from sympy.parsing.sympy_parser import parse_expr
+from sympy import pretty
+
+from dsr.functions import PlaceholderConstant
+from dsr.const import make_const_optimizer
+from dsr.utils import cached_property
+import dsr.utils as U
+
+
+def _finish_tokens(tokens):
+    """
+    Complete the pre-order traversal.
+
+    Parameters
+    ----------
+    tokens : list of integers
+        A list of integers corresponding to tokens in the library. The list
+        defines an expression's pre-order traversal.
+
+    Returns
+    _______
+    tokens : list of integers
+        A list of integers corresponding to tokens in the library. The list
+        defines an expression's pre-order traversal. "Dangling" programs are
+        completed with repeated "x1" until the expression completes.
+
+    """
+
+    arities = np.array([Program.library.arities[t] for t in tokens])
+    dangling = 1 + np.cumsum(arities - 1)
+
+    if 0 in dangling:
+        expr_length = 1 + np.argmax(dangling == 0)
+        tokens = tokens[:expr_length]
+    else:
+        # Extend with first variable until complete
+        tokens = np.append(tokens, np.random.choice(Program.library.input_tokens, size=dangling[-1]))
+
+    return tokens
+
+
+def from_str_tokens(str_tokens, optimize, skip_cache=False):
+    """
+    Memoized function to generate a Program from a list of str and/or float.
+    See from_tokens() for details.
+
+    Parameters
+    ----------
+    str_tokens : str | list of (str | float)
+        Either a comma-separated string of tokens and/or floats, or a list of
+        str and/or floats.
+
+    optimize : bool
+        See from_tokens().
+
+    skip_cache : bool
+        See from_tokens().
+
+    Returns
+    -------
+    program : Program
+        See from_tokens().
+    """
+
+    # Convert str to list of str
+    if isinstance(str_tokens, str):
+        str_tokens = str_tokens.split(",")
+
+    # Convert list of str|float to list of tokens
+    if isinstance(str_tokens, list):
+        traversal = []
+        constants = []
+        for s in str_tokens:
+            if s in Program.library.names:
+                t = Program.library.names.index(s.lower())
+            elif U.is_float(s):
+                assert "const" not in str_tokens, "Currently does not support both placeholder and hard-coded constants."
+                assert not optimize, "Currently does not support optimization with hard-coded constants."
+                t = Program.library.const_token
+                constants.append(float(s))
+            else:
+                raise ValueError("Did not recognize token {}.".format(s))
+            traversal.append(t)
+        traversal = np.array(traversal, dtype=np.int32)
+    else:
+        raise ValueError("Input must be list or string.")
+
+    # Generate base Program (with "const" for constants)
+    p = from_tokens(traversal, optimize=optimize, skip_cache=skip_cache)
+
+    # Replace any constants
+    p.set_constants(constants)
+
+    return p
+
+
+def from_tokens(tokens, optimize, skip_cache=False):
+    """
+    Memoized function to generate a Program from a list of tokens.
+
+    Since some tokens are nonfunctional, this first computes the corresponding
+    traversal. If that traversal exists in the cache, the corresponding Program
+    is returned. Otherwise, a new Program is returned.
+
+    Parameters
+    ----------
+    tokens : list of integers
+        A list of integers corresponding to tokens in the library. The list
+        defines an expression's pre-order traversal. "Dangling" programs are
+        completed with repeated "x1" until the expression completes.
+
+    optimize : bool
+        Whether to optimize the program before returning it.
+
+    skip_cache : bool
+        Whether to bypass the cache when creating the program.
+
+    Returns
+    _______
+    program : Program
+        The Program corresponding to the tokens, either pulled from memoization
+        or generated from scratch.
+    """
+
+    '''
+        Truncate expressions that complete early; extend ones that don't complete
+    '''
+    tokens = _finish_tokens(tokens)
+
+    # For stochastic Tasks, there is no cache; always generate a new Program.
+    # For deterministic Programs, if the Program is in the cache, return it;
+    # otherwise, create a new one and add it to the cache.
+    if skip_cache:
+        p = Program(tokens, optimize=optimize)
+    elif Program.task.stochastic:
+        p = Program(tokens, optimize=optimize)
+    else:
+        key = tokens.tostring()
+        if key in Program.cache:
+            p = Program.cache[key]
+            p.count += 1
+        else:
+            p = Program(tokens, optimize=optimize)
+            Program.cache[key] = p
+
+    return p
+
+
+class Program(object):
+    """
+    The executable program representing the symbolic expression.
+
+    The program comprises unary/binary operators, constant placeholders
+    (to-be-optimized), input variables, and hard-coded constants.
+
+    Parameters
+    ----------
+    tokens : list of integers
+        A list of integers corresponding to tokens in the library. "Dangling"
+        programs are completed with repeated "x1" until the expression
+        completes.
+
+    optimize : bool
+        Whether to optimize the program upon initializing it.
+
+    Attributes
+    ----------
+    traversal : list
+        List of operators (type: Function) and terminals (type: int, float, or
+        str ("const")) encoding the pre-order traversal of the expression tree.
+
+    tokens : np.ndarry (dtype: int)
+        Array of integers whose values correspond to indices
+
+    const_pos : list of int
+        A list of indicies of constant placeholders along the traversal.
+
+    float_pos : list of float
+        A list of indices of constants placeholders or floating-point constants
+        along the traversal.
+
+    sympy_expr : str
+        The (lazily calculated) SymPy expression corresponding to the program.
+        Used for pretty printing _only_.
+
+    base_r : float
+        The base reward (reward without penalty) of the program on the training
+        data.
+
+    complexity : float
+        The (lazily calcualted) complexity of the program.
+
+    r : float
+        The (lazily calculated) reward of the program on the training data.
+
+    count : int
+        The number of times this Program has been sampled.
+
+    str : str
+        String representation of tokens. Useful as unique identifier.
+    """
+
+    # Static variables
+    task = None             # Task
+    library = None          # Library
+    const_optimizer = None  # Function to optimize constants
+    cache = {}
+
+    # Cython-related static variables
+    have_cython = None      # Do we have cython installed
+    execute = None          # Link to execute. Either cython or python
+    cyfunc = None           # Link to cyfunc lib since we do an include inline
+
+    def __init__(self, tokens, optimize):
+
+        """
+        Builds the Program from a list of Tokens, optimizes the Constants
+        against reward function, and evalutes the reward.
+        """
+
+        self.traversal = [Program.library[t] for t in tokens]
+        self.const_pos = [i for i, t in enumerate(tokens) if Program.library[t].name == "const"] # Just constant placeholder positions
+        self.len_traversal = len(self.traversal)
+
+        if self.have_cython and self.len_traversal > 1:
+            self.is_input_var = array.array('i', [t.input_var is not None for t in self.traversal])
+
+        self.invalid = False
+        self.str = tokens.tostring()
+
+        if optimize:
+            _ = self.optimize()
+
+        self.count = 1
+
+    def cython_execute(self, X):
+        """Executes the program according to X using Cython.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        Returns
+        -------
+        y_hats : array-like, shape = [n_samples]
+            The result of executing the program on X.
+        """
+
+        if self.len_traversal > 1:
+            return self.cyfunc.execute(X, self.len_traversal, self.traversal, self.is_input_var)
+        else:
+            return self.python_execute(X)
+
+    def python_execute(self, X):
+        """Executes the program according to X using Python.
+
+        Parameters
+        ----------
+        X : array-like, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        Returns
+        -------
+        y_hats : array-like, shape = [n_samples]
+            The result of executing the program on X.
+        """
+
+        # # Check for single-node programs
+        # node = self.traversal[0]
+        # if isinstance(node, float):
+        #     return np.repeat(node, X.shape[0])
+        # if isinstance(node, int):
+        #     return X[:, node]
+
+        apply_stack = []
+
+        for node in self.traversal:
+
+            apply_stack.append([node])
+
+            while len(apply_stack[-1]) == apply_stack[-1][0].arity + 1:
+                # Apply functions that have sufficient arguments
+                token = apply_stack[-1][0]
+                terminals = apply_stack[-1][1:]
+                # terminals = [np.repeat(t, X.shape[0]) if isinstance(t, float)
+                #              else X[:, t] if isinstance(t, int)
+                #              else t for t in apply_stack[-1][1:]]
+                if token.input_var is not None:
+                    intermediate_result = X[:, token.input_var]
+                else:
+                    intermediate_result = token(*terminals)
+                if len(apply_stack) != 1:
+                    apply_stack.pop()
+                    apply_stack[-1].append(intermediate_result)
+                else:
+                    return intermediate_result
+
+        # We should never get here
+        assert False, "Function should never get here!"
+        return None    
+    
+    
+    def optimize(self):
+        """
+        Optimizes the constant tokens against the training data and returns the
+        optimized constants.
+
+        This function generates an objective function based on the training
+        dataset, reward function, and constant optimizer. It ignores penalties
+        because the Program structure is fixed, thus penalties are all the same.
+        It then optimizes the constants of the program and returns the optimized
+        constants.
+
+        Returns
+        _______
+        optimized_constants : vector
+            Array of optimized constants.
+        """
+
+        # Create the objective function, which is a function of the constants being optimized
+        def f(consts):
+            self.set_constants(consts)
+            r = self.task.reward_function(self)
+            obj = -r # Constant optimizer minimizes the objective function
+
+            # Need to reset to False so that a single invalid call during
+            # constant optimization doesn't render the whole Program invalid.
+            self.invalid = False
+
+            return obj
+
+        assert self.execute is not None, "set_execute needs to be called first"
+
+        if len(self.const_pos) > 0:
+            # Do the optimization
+            x0 = np.ones(len(self.const_pos)) # Initial guess
+            optimized_constants = Program.const_optimizer(f, x0)
+            self.set_constants(optimized_constants)
+
+        else:
+            # No need to optimize if there are no constants
+            optimized_constants = []
+
+        return optimized_constants
+
+    def set_constants(self, consts):
+        """Sets the program's constants to the given values"""
+
+        for i, const in enumerate(consts):
+            # Create a new instance of PlaceholderConstant instead of changing
+            # the "values" attribute, otherwise all Programs will have the same
+            # instance and just overwrite each other's value.
+            self.traversal[self.const_pos[i]] = PlaceholderConstant(const)
+
+    @classmethod
+    def clear_cache(cls):
+        """Clears the class' cache"""
+
+        cls.cache = {}
+
+    @classmethod
+    def set_task(cls, task):
+        """Sets the class' Task"""
+
+        Program.task = task
+        Program.library = task.library
+
+    @classmethod
+    def set_const_optimizer(cls, name, **kwargs):
+        """Sets the class' constant optimizer"""
+
+        const_optimizer = make_const_optimizer(name, **kwargs)
+        Program.const_optimizer = const_optimizer
+
+    @classmethod
+    def set_complexity_penalty(cls, name, weight):
+        """Sets the class' complexity penalty"""
+
+        all_functions = {
+            # No penalty
+            None : lambda p : 0.0,
+
+            # Length of tree
+            "length" : lambda p : len(p)
+        }
+
+        assert name in all_functions, "Unrecognzied complexity penalty name"
+
+        if weight == 0:
+            Program.complexity_penalty = lambda p : 0.0
+        else:
+            Program.complexity_penalty = lambda p : weight * all_functions[name](p)
+
+    @classmethod
+    def set_execute(cls, protected):
+        """Sets which execute method to use"""
+
+        """
+        If cython ran, we will have a 'c' file generated. The dynamic libary can be 
+        given different names, so it's not reliable for testing if cython ran.
+        """
+        cpath = os.path.join(os.path.dirname(__file__),'cyfunc.c')
+        
+        if os.path.isfile(cpath):
+            from .                  import cyfunc
+            Program.cyfunc          = cyfunc
+            execute_function        = Program.cython_execute
+            Program.have_cython     = True
+        else:
+            execute_function        = Program.python_execute
+            Program.have_cython     = False
+
+        if protected:
+            Program.execute = execute_function
+        else:
+
+            class InvalidLog():
+                """Log class to catch and record numpy warning messages"""
+
+                def __init__(self):
+                    self.error_type = None # One of ['divide', 'overflow', 'underflow', 'invalid']
+                    self.error_node = None # E.g. 'exp', 'log', 'true_divide'
+                    self.new_entry = False # Flag for whether a warning has been encountered during a call to Program.execute()
+
+                def write(self, message):
+                    """This is called by numpy when encountering a warning"""
+
+                    if not self.new_entry: # Only record the first warning encounter
+                        message = message.strip().split(' ')
+                        self.error_type = message[1]
+                        self.error_node = message[-1]
+                    self.new_entry = True
+
+                def update(self, p):
+                    """If a floating-point error was encountered, set Program.invalid
+                    to True and record the error type and error node."""
+
+                    if self.new_entry:
+                        p.invalid = True
+                        p.error_type = self.error_type
+                        p.error_node = self.error_node
+                        self.new_entry = False
+
+
+            invalid_log = InvalidLog()
+            np.seterrcall(invalid_log) # Tells numpy to call InvalidLog.write() when encountering a warning
+
+            # Define closure for execute function
+            def unsafe_execute(p, X):
+                """This is a wrapper for execute_function. If a floating-point error
+                would be hit, a warning is logged instead, p.invalid is set to True,
+                and the appropriate nan/inf value is returned. It's up to the task's
+                reward function to decide how to handle nans/infs."""
+
+                with np.errstate(all='log'):
+                    y = execute_function(p, X)
+                    invalid_log.update(p)
+                    return y
+
+            Program.execute = unsafe_execute
+
+
+    @cached_property
+    def complexity(self):
+        """Evaluates and returns the complexity of the program"""
+
+        return Program.complexity_penalty(self.traversal)
+
+
+    @cached_property
+    def base_r(self):
+        """Evaluates and returns the base reward of the program on the training
+        set"""
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            
+            return self.task.reward_function(self)
+
+    @cached_property
+    def r(self):
+        """Evaluates and returns the reward of the program on the training
+        set"""
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            
+            return self.base_r - self.complexity
+
+
+    @cached_property
+    def evaluate(self):
+        """Evaluates and returns the evaluation metrics of the program."""
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            
+            return self.task.evaluate(self)
+    
+    @cached_property
+    def complexity_eureqa(self):
+        """Computes sum of token complexity based on Eureqa complexity measures."""
+
+        complexity = sum([t.complexity for t in self.traversal])
+        return complexity
+
+
+    @cached_property
+    def sympy_expr(self):
+        """
+        Returns the attribute self.sympy_expr.
+
+        This is actually a bit complicated because we have to go: traversal -->
+        tree --> serialized tree --> SymPy expression
+        """
+
+        tree = self.traversal.copy()
+        tree = build_tree(tree)
+        tree = convert_to_sympy(tree)
+        try:
+            expr = parse_expr(tree.__repr__()) # SymPy expression
+        except:
+            expr = "N/A"
+            
+        return expr
+
+
+    def pretty(self):
+        """Returns pretty printed string of the program"""
+        return pretty(self.sympy_expr)
+
+
+    def print_stats(self):
+        """Prints the statistics of the program"""
+        print("\tReward: {}".format(self.r))
+        print("\tBase reward: {}".format(self.base_r))
+        print("\tCount: {}".format(self.count))
+        print("\tInvalid: {}".format(self.invalid))
+        print("\tTraversal: {}".format(self))
+        print("\tExpression:")
+        print("{}\n".format(indent(self.pretty(), '\t  ')))
+
+
+    def __repr__(self):
+        """Prints the program's traversal"""
+
+        return ','.join([repr(t) for t in self.traversal])
+
+
+###############################################################################
+# Everything below this line is currently only being used for pretty printing #
+###############################################################################
+
+
+# Possible library elements that sympy capitalizes
+capital = ["add", "mul", "pow"]
+
+
+class Node(object):
+    """Basic tree class supporting printing"""
+
+    def __init__(self, val):
+        self.val = val
+        self.children = []
+
+    def __repr__(self):
+        children_repr = ",".join(repr(child) for child in self.children)
+        if len(self.children) == 0:
+            return self.val # Avoids unnecessary parantheses, e.g. x1()
+        return "{}({})".format(self.val, children_repr)
+
+
+def build_tree(traversal):
+    """Recursively builds tree from pre-order traversal"""
+
+    op = traversal.pop(0)
+    n_children = op.arity
+    val = repr(op)
+    if val in capital:
+        val = val.capitalize()
+
+    node = Node(val)
+
+    for _ in range(n_children):
+        node.children.append(build_tree(traversal))
+
+    return node
+
+
+def convert_to_sympy(node):
+    """Adjusts trees to only use node values supported by sympy"""
+
+    if node.val == "div":
+        node.val = "Mul"
+        new_right = Node("Pow")
+        new_right.children.append(node.children[1])
+        new_right.children.append(Node("-1"))
+        node.children[1] = new_right
+
+    elif node.val == "sub":
+        node.val = "Add"
+        new_right = Node("Mul")
+        new_right.children.append(node.children[1])
+        new_right.children.append(Node("-1"))
+        node.children[1] = new_right
+
+    elif node.val == "inv":
+        node.val = Node("Pow")
+        node.children.append(Node("-1"))
+
+    elif node.val == "neg":
+        node.val = Node("Mul")
+        node.children.append(Node("-1"))
+        
+    elif node.val == "n2":
+        node.val = "Pow"
+        node.children.append(Node("2"))
+        
+    elif node.val == "n3":
+        node.val = "Pow"
+        node.children.append(Node("3"))
+        
+    elif node.val == "n4":
+        node.val = "Pow"
+        node.children.append(Node("4"))
+        
+    for child in node.children:
+        convert_to_sympy(child)
+        
+
+        
+    return node
diff --git a/dsr/dsr/run.py b/dsr/dsr/run.py
new file mode 100644
index 00000000..94e2b65b
--- /dev/null
+++ b/dsr/dsr/run.py
@@ -0,0 +1,224 @@
+"""Parallelized, single-point launch script to run DSR or GP on a set of benchmarks."""
+
+import warnings
+warnings.filterwarnings('ignore', category=DeprecationWarning)
+warnings.filterwarnings('ignore', category=FutureWarning)
+
+import os
+import sys
+import json
+import time
+from datetime import datetime
+import multiprocessing
+from functools import partial
+from pkg_resources import resource_filename
+import zlib
+
+import click
+import numpy as np
+import pandas as pd
+from sympy.parsing.sympy_parser import parse_expr
+from sympy import srepr
+
+from dsr import DeepSymbolicOptimizer
+from dsr.program import Program
+from dsr.task.regression.dataset import BenchmarkDataset
+from dsr.baselines import gpsr
+
+
+def train_dsr(name_and_seed, config):
+    """Trains DSR and returns dict of reward, expression, and traversal"""
+
+    # Override the benchmark name and output file
+    name, seed = name_and_seed
+    config["task"]["name"] = name
+    config["training"]["output_file"] = "dsr_{}_{}.csv".format(name, seed)
+
+    # Try importing TensorFlow (with suppressed warnings), Controller, and learn
+    # When parallelizing across tasks, these will already be imported, hence try/except
+    try:
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+        import tensorflow as tf
+        tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+        from dsr.controller import Controller
+        from dsr.train import learn
+    except ModuleNotFoundError: # Specific subclass of ImportError for when module is not found, probably needs to be excepted first
+        print("One or more libraries not found")
+        raise ModuleNotFoundError
+    except ImportError:
+        # Have we already imported tf? If so, this is the error we want to dodge. 
+        if 'tf' in globals():
+            pass
+        else:
+            raise ImportError
+
+    # Train the model
+    model = DeepSymbolicOptimizer(config)
+    start = time.time()
+    result = {"name" : name, "seed" : seed} # Name and seed are listed first
+    result.update(model.train(seed=seed))
+    result["t"] = time.time() - start
+    result.pop("program")
+
+    return result
+
+
+def train_gp(name_and_seed, logdir, config_task, config_gp):
+    """Trains GP and returns dict of reward, expression, and program"""
+
+    name, seed = name_and_seed
+    config_gp["seed"] = seed + zlib.adler32(name.encode("utf-8"))
+
+    start = time.time()
+
+    # Load the dataset
+    config_dataset = config_task["dataset"]
+    config_dataset["name"] = name
+    dataset = BenchmarkDataset(**config_dataset)
+
+    # Fit the GP
+    gp = gpsr.GP(dataset=dataset, **config_gp)
+    p, logbook = gp.train()
+
+    # Retrieve results
+    r = base_r = p.fitness.values[0]
+    str_p = str(p)
+    nmse_test = gp.nmse_test(p)[0]
+    nmse_test_noiseless = gp.nmse_test_noiseless(p)[0]
+    success = gp.success(p)
+
+    # Many failure cases right now for converting to SymPy expression
+    try:
+        expression = repr(parse_expr(str_p.replace("X", "x").replace("add", "Add").replace("mul", "Mul")))
+    except:
+        expression = "N/A"
+
+    # Save run details
+    drop = ["gen", "nevals"]
+    df_fitness = pd.DataFrame(logbook.chapters["fitness"]).drop(drop, axis=1)
+    df_fitness = df_fitness.rename({"avg" : "fit_avg", "min" : "fit_min"}, axis=1)
+    df_fitness["fit_best"] = df_fitness["fit_min"].cummin()
+    df_len = pd.DataFrame(logbook.chapters["size"]).drop(drop, axis=1)
+    df_len = df_len.rename({"avg" : "l_avg"}, axis=1)
+    df = pd.concat([df_fitness, df_len], axis=1, sort=False)
+    df.to_csv(os.path.join(logdir, "gp_{}_{}.csv".format(name, seed)), index=False)
+
+    result = {
+        "name" : name,
+        "seed" : seed,
+        "r" : r,
+        "base_r" : base_r,
+        "nmse_test" : nmse_test,
+        "nmse_test_noiseless" : nmse_test_noiseless,
+        "success" : success,
+        "expression" : expression,
+        "traversal" : str_p,
+        "t" : time.time() - start
+    }
+
+    return result
+
+
+@click.command()
+@click.argument('config_template', default="config.json")
+@click.option('--method', default="dsr", type=click.Choice(["dsr", "gp"]), help="Symbolic regression method")
+@click.option('--mc', default=1, type=int, help="Number of Monte Carlo trials for each benchmark")
+@click.option('--output_filename', default=None, help="Filename to write results")
+@click.option('--n_cores_task', '--n', default=1, help="Number of cores to spread out across tasks")
+@click.option('--seed_shift', default=0, type=int, help="Integer to add to each seed (i.e. to combine multiple runs)")
+@click.option('--b', multiple=True, type=str, help="Name of benchmark or benchmark prefix")
+def main(config_template, method, mc, output_filename, n_cores_task, seed_shift, b):
+    """Runs DSR or GP on multiple benchmarks using multiprocessing."""
+
+    # Load the config file
+    with open(config_template, encoding='utf-8') as f:
+        config = json.load(f)
+
+    # Required configs
+    config_task = config["task"]            # Task specification parameters
+    config_training = config["training"]    # Training hyperparameters
+
+    # Optional configs
+    config_controller = config.get("controller")                        # Controller hyperparameters
+    config_language_model_prior = config.get("language_model_prior")    # Language model hyperparameters
+    config_gp = config.get("gp")                                        # GP hyperparameters
+
+    # Create output directories
+    if output_filename is None:
+        output_filename = "benchmark_{}.csv".format(method)
+    config_training["logdir"] = os.path.join(
+        config_training["logdir"],
+        "log_{}".format(datetime.now().strftime("%Y-%m-%d-%H%M%S")))
+    logdir = config_training["logdir"]
+    if "dataset" in config_task and "backup" in config_task["dataset"] and config_task["dataset"]["backup"]:
+        config_task["dataset"]["logdir"] = logdir
+    os.makedirs(logdir, exist_ok=True)
+    output_filename = os.path.join(logdir, output_filename)
+    # Use benchmark name from config if not specified as command-line arg
+    if len(b) == 0:
+        if isinstance(config_task["name"], str):
+            b = (config_task["name"],)
+        elif isinstance(config_task["name"], list):
+            b = tuple(config_task["name"])
+
+    # Shortcut to run all Nguyen benchmarks
+    benchmarks = list(b)
+    if "Nguyen" in benchmarks:
+        benchmarks.remove("Nguyen")
+        benchmarks += ["Nguyen-{}".format(i+1) for i in range(12)]
+
+    # Generate benchmark-seed pairs for each MC. When passed to the TF RNG,
+    # seeds will be added to checksums on the benchmark names
+    unique_benchmarks = benchmarks.copy()
+    benchmarks *= mc
+    seeds = (np.arange(mc) + seed_shift).repeat(len(unique_benchmarks)).tolist()
+    names_and_seeds = list(zip(benchmarks, seeds))
+
+    # Edit n_cores_task and/or n_cores_batch
+    if n_cores_task == -1:
+        n_cores_task = multiprocessing.cpu_count()
+    if n_cores_task > len(benchmarks):
+        print("Setting 'n_cores_task' to {} for batch because there are only {} benchmarks.".format(len(benchmarks), len(benchmarks)))
+        n_cores_task = len(benchmarks)
+    if method == "dsr":
+        if config_training["verbose"] and n_cores_task > 1:
+            print("Setting 'verbose' to False for parallelized run.")
+            config_training["verbose"] = False
+        if config_training["n_cores_batch"] != 1 and n_cores_task > 1:
+            print("Setting 'n_cores_batch' to 1 to avoid nested child processes.")
+            config_training["n_cores_batch"] = 1
+    print("Running {} for n={} on benchmarks {}".format(method, mc, unique_benchmarks))
+
+    # Write terminal command and config.json into log directory
+    cmd_filename = os.path.join(logdir, "cmd.out")
+    with open(cmd_filename, 'w') as f:
+        print(" ".join(sys.argv), file=f)
+    config_filename = os.path.join(logdir, "config.json")
+    with open(config_filename, 'w') as f:
+        json.dump(config, f, indent=4)
+
+    # Define the work
+    if method == "dsr":
+        work = partial(train_dsr, config=config)
+    elif method == "gp":
+        work = partial(train_gp, logdir=logdir, config_task=config_task, config_gp=config_gp)
+
+    # Farm out the work
+    write_header = True
+    if n_cores_task > 1:
+        pool = multiprocessing.Pool(n_cores_task)
+        for result in pool.imap_unordered(work, names_and_seeds):
+            pd.DataFrame(result, index=[0]).to_csv(output_filename, header=write_header, mode='a', index=False)
+            print("Completed {} ({} of {}) in {:.0f} s".format(result["name"], result["seed"]+1-seed_shift, mc, result["t"]))
+            write_header = False
+    else:
+        for name_and_seed in names_and_seeds:
+            result = work(name_and_seed)
+            pd.DataFrame(result, index=[0]).to_csv(output_filename, header=write_header, mode='a', index=False)
+            write_header = False
+
+    print("Results saved to: {}".format(output_filename))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/dsr/dsr/subroutines.py b/dsr/dsr/subroutines.py
new file mode 100644
index 00000000..fbe4221a
--- /dev/null
+++ b/dsr/dsr/subroutines.py
@@ -0,0 +1,120 @@
+"""Numba-compiled subroutines used for deep symbolic optimization."""
+
+from numba import jit, prange
+import numpy as np
+
+
+@jit(nopython=True, parallel=True)
+def parents_siblings(tokens, arities, parent_adjust):
+    """
+    Given a batch of action sequences, computes and returns the parents and
+    siblings of the next element of the sequence.
+
+    The batch has shape (N, L), where N is the number of sequences (i.e. batch
+    size) and L is the length of each sequence. In some cases, expressions may
+    already be complete; in these cases, this function sees the start of a new
+    expression, even though the return value for these elements won't matter
+    because their gradients will be zero because of sequence_length.
+
+    Parameters
+    __________
+
+    tokens : np.ndarray, shape=(N, L), dtype=np.int32
+        Batch of action sequences. Values correspond to library indices.
+
+    arities : np.ndarray, dtype=np.int32
+        Array of arities corresponding to library indices.
+
+    parent_adjust : np.ndarray, dtype=np.int32
+        Array of parent sub-library index corresponding to library indices.
+
+    Returns
+    _______
+
+    adj_parents : np.ndarray, shape=(N,), dtype=np.int32
+        Adjusted parents of the next element of each action sequence.
+
+    siblings : np.ndarray, shape=(N,), dtype=np.int32
+        Siblings of the next element of each action sequence.
+
+    """
+    N, L = tokens.shape
+
+    empty_parent = np.max(parent_adjust) + 1 # Empty token is after all non-empty tokens
+    empty_sibling = len(arities) # Empty token is after all non-empty tokens
+    adj_parents = np.full(shape=(N,), fill_value=empty_parent, dtype=np.int32)
+    siblings = np.full(shape=(N,), fill_value=empty_sibling, dtype=np.int32)
+    # Parallelized loop over action sequences
+    for r in prange(N):
+        arity = arities[tokens[r, -1]]
+        if arity > 0: # Parent is the previous element; no sibling
+            adj_parents[r] = parent_adjust[tokens[r, -1]]
+            continue
+        dangling = 0
+        # Loop over elements in an action sequence
+        for c in range(L):
+            arity = arities[tokens[r, L - c - 1]]
+            dangling += arity - 1
+            if dangling == 0: # Parent is L-c-1, sibling is the next
+                adj_parents[r] = parent_adjust[tokens[r, L - c - 1]]
+                siblings[r] = tokens[r, L - c]
+                break
+    return adj_parents, siblings
+
+
+@jit(nopython=True, parallel=True)
+def ancestors(actions, arities, ancestor_tokens):
+    """
+    Given a batch of action sequences, determines whether the next element of
+    the sequence has an ancestor in ancestor_tokens.
+
+    The batch has shape (N, L), where N is the number of sequences (i.e. batch
+    size) and L is the length of each sequence. In some cases, expressions may
+    already be complete; in these cases, this function sees the start of a new
+    expression, even though the return value for these elements won't matter
+    because their gradients will be zero because of sequence_length.
+
+    Parameters
+    __________
+
+    actions : np.ndarray, shape=(N, L), dtype=np.int32
+        Batch of action sequences. Values correspond to library indices.
+
+    arities : np.ndarray, dtype=np.int32
+        Array of arities corresponding to library indices.
+
+    ancestor_tokens : np.ndarray, dtype=np.int32
+        Array of ancestor library indices to check.
+
+    Returns
+    _______
+
+    mask : np.ndarray, shape=(N,), dtype=np.bool_
+        Mask of whether the next element of each sequence has an ancestor in
+        ancestor_tokens.
+    """
+
+    N, L = actions.shape
+    mask = np.zeros(shape=(N,), dtype=np.bool_)
+    # Parallelized loop over action sequences
+    for r in prange(N):
+        dangling = 0
+        threshold = None # If None, current branch does not have trig ancestor
+        for c in range(L):
+            arity = arities[actions[r, c]]
+            dangling += arity - 1
+            # Turn "on" if a trig function is found
+            # Remain "on" until branch completes
+            if threshold is None:
+                for trig_token in ancestor_tokens:
+                    if actions[r, c] == trig_token:
+                        threshold = dangling - 1
+                        break
+            # Turn "off" once the branch completes
+            else:
+                if dangling == threshold:
+                    threshold = None
+        # If the sequences ended "on", then there is a trig ancestor
+        if threshold is not None:
+            mask[r] = True
+    return mask
diff --git a/dsr/dsr/task/__init__.py b/dsr/dsr/task/__init__.py
new file mode 100644
index 00000000..8dc70998
--- /dev/null
+++ b/dsr/dsr/task/__init__.py
@@ -0,0 +1 @@
+from dsr.task.task import make_task, set_task, Task
diff --git a/dsr/dsr/task/regression/__init__.py b/dsr/dsr/task/regression/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/dsr/dsr/task/regression/benchmarks.csv b/dsr/dsr/task/regression/benchmarks.csv
new file mode 100644
index 00000000..3a2ce53f
--- /dev/null
+++ b/dsr/dsr/task/regression/benchmarks.csv
@@ -0,0 +1,38 @@
+name,variables,expression,train_spec,test_spec,function_set
+Nguyen-1,1,"pow(x1,3)+pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,Koza
+Nguyen-2,1,"pow(x1,4)+pow(x1,3)+pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,Koza
+Nguyen-3,1,"pow(x1,5)+pow(x1,4)+pow(x1,3)+pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,Koza
+Nguyen-4,1,"pow(x1,6)+pow(x1,5)+pow(x1,4)+pow(x1,3)+pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,Koza
+Nguyen-5,1,"sin(pow(x1,2))*cos(x1)-1","{""all"":{""U"":[-1,1,20]}}",None,Koza
+Nguyen-6,1,"sin(x1)+sin(x1+pow(x1,2))","{""all"":{""U"":[-1,1,20]}}",None,Koza
+Nguyen-7,1,"log(x1+1)+log(pow(x1,2)+1)","{""all"":{""U"":[0,2,20]}}",None,Koza
+Nguyen-8,1,sqrt(x1),"{""all"":{""U"":[0,4,20]}}",None,Koza
+Nguyen-9,2,"sin(x1)+sin(pow(x2,2))","{""all"":{""U"":[0,1,20]}}",None,Koza
+Nguyen-10,2,2*sin(x1)*cos(x2),"{""all"":{""U"":[0,1,20]}}",None,Koza
+Nguyen-11,2,"pow(x1,x2)","{""all"":{""U"":[0,1,20]}}",None,Koza
+Nguyen-12,2,"pow(x1,4)-pow(x1,3)+div(pow(x2,2),2)-x2","{""all"":{""U"":[0,1,20]}}",None,Koza
+Nguyen-2a,1,"4*pow(x1,4)+3*pow(x1,3)+2*pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,Koza
+Nguyen-5a,1,"sin(pow(x1,2))*cos(x1)-2","{""all"":{""U"":[-1,1,20]}}",None,Koza
+Nguyen-8a,1,"pow(x1,1/3)","{""all"":{""U"":[0,4,20]}}",None,Koza
+Nguyen-8aa,1,"pow(x1,2/3)","{""all"":{""U"":[0,4,20]}}",None,Koza
+Nguyen-1c,1,"3.39*pow(x1,3)+2.12*pow(x1,2)+1.78*x1","{""all"":{""U"":[-1,1,20]}}",None,CKoza
+Nguyen-5c,1,"sin(pow(x1,2))*cos(x1)-0.75","{""all"":{""U"":[-1,1,20]}}",None,CKoza
+Nguyen-7c,1,"log(x1+1.4)+log(pow(x1,2)+1.3)","{""all"":{""U"":[0,2,20]}}",None,CKoza
+Nguyen-8c,1,sqrt(1.23*x1),"{""all"":{""U"":[0,4,20]}}",None,CKoza
+Nguyen-10c,2,sin(1.5*x1)*cos(0.5*x2),"{""all"":{""U"":[0,1,20]}}",None,CKoza
+GrammarVAE-1,1,"1./3+x1+sin(pow(x1,2))","{""all"":{""E"":[-10,10,1000]}}",None,GrammarVAE
+Jin-1,2,"2.5*pow(x1,4)-1.3*pow(x1,3)+0.5*pow(x2,2)-1.7*x2","{""all"":{""U"":[-3.0,3.0,100]}}","{""all"":{""U"":[-3.0,3.0,30]}}",Jin
+Jin-2,2,"8.0*pow(x1,2)+8.0*pow(x2,3)-15.0","{""all"":{""U"":[-3.0,3.0,100]}}","{""all"":{""U"":[-3.0,3.0,30]}}",Jin
+Jin-3,2,"0.2*pow(x1,3)+0.5*pow(x2,3)-1.2*x2-0.5*x1","{""all"":{""U"":[-3.0,3.0,100]}}","{""all"":{""U"":[-3.0,3.0,30]}}",Jin
+Jin-4,2,1.5*exp(x1)+5.0*cos(x2),"{""all"":{""U"":[-3.0,3.0,100]}}","{""all"":{""U"":[-3.0,3.0,30]}}",Jin
+Jin-5,2,6.0*sin(x1)*cos(x2),"{""all"":{""U"":[-3.0,3.0,100]}}","{""all"":{""U"":[-3.0,3.0,30]}}",Jin
+Jin-6,2,1.35*x1*x2+5.5*sin((x1-1.0)*(x2-1.0)),"{""all"":{""U"":[-3.0,3.0,100]}}","{""all"":{""U"":[-3.0,3.0,30]}}",Jin
+Neat-1,1,"pow(x1,4)+pow(x1,3)+pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,KozaPlus1
+Neat-2,1,"pow(x1,5)+pow(x1,4)+pow(x1,3)+pow(x1,2)+x1","{""all"":{""U"":[-1,1,20]}}",None,KozaPlus1
+Neat-3,1,"sin(pow(x1,2))*cos(x1)-1","{""all"":{""U"":[-1,1,20]}}",None,KozaPlus1
+Neat-4,1,"log(x1+1)+log(pow(x1,2)+1)","{""all"":{""U"":[0,2,20]}}",None,KozaPlus1
+Neat-5,2,2*sin(x1)*cos(x2),"{""all"":{""U"":[-1,1,100]}}",None,Koza
+Neat-6,1,harmonic(x1),"{""all"":{""E"":[1,50,1]}}","{""all"":{""E"":[1,120,1]}}",KeijzerPlus1
+Neat-7,2,2-2.1*cos(9.8*x1)*sin(1.3*x2),"{""all"":{""U"":[-50,50,10000]}}",None,Korns
+Neat-8,2,"div(exp(-pow(x1-1,2)),(1.2+pow((x2-2.5),2)))","{""all"":{""U"":[0.3,4,100]}}",None,Vladislavleva-B
+Neat-9,2,"div(1,(1+pow(x1,-4)))+div(1,(1+pow(x2,-4)))","{""all"":{""E"":[-5,5,0.4]}}",None,Koza
diff --git a/dsr/dsr/task/regression/dataset.py b/dsr/dsr/task/regression/dataset.py
new file mode 100644
index 00000000..a6c9dfe6
--- /dev/null
+++ b/dsr/dsr/task/regression/dataset.py
@@ -0,0 +1,274 @@
+"""Class for deterministically generating a benchmark dataset from benchmark specifications."""
+
+import os
+import ast
+import itertools
+from pkg_resources import resource_filename
+import zlib
+
+import click
+import pandas as pd
+import numpy as np
+
+from dsr.functions import function_map
+
+
+class BenchmarkDataset(object):
+    """
+    Class used to generate (X, y) data from a named benchmark expression.
+
+    Parameters
+    ----------
+    name : str
+        Name of benchmark expression.
+
+    benchmark_source : str, optional
+        Filename of CSV describing benchmark expressions.
+
+    root : str, optional
+        Directory containing benchmark_source and function_sets.csv.
+
+    noise : float, optional
+        If not None, Gaussian noise is added to the y values with standard
+        deviation = noise * RMS of the noiseless y training values.
+
+    dataset_size_multiplier : float, optional
+        Multiplier for size of the dataset.
+
+    seed : int, optional
+        Random number seed used to generate data. Checksum on name is added to
+        seed.
+
+    logdir : str, optional
+        Directory where experiment logfiles are saved.
+
+    backup : bool, optional
+        Save generated dataset in logdir if logdir is provided.
+    """
+
+    def __init__(self, name, benchmark_source="benchmarks.csv", root=None, noise=0.0,
+                 dataset_size_multiplier=1.0, seed=0, logdir=None,
+                 backup=False):
+        # Set class variables
+        self.name = name
+        self.seed = seed
+        self.noise = noise if noise is not None else 0.0
+        self.dataset_size_multiplier = dataset_size_multiplier if dataset_size_multiplier is not None else 1.0
+
+        # Set random number generator used for sampling X values
+        seed += zlib.adler32(name.encode("utf-8")) # Different seed for each name, otherwise two benchmarks with the same domain will always have the same X values
+        self.rng = np.random.RandomState(seed)
+
+        # Load benchmark data
+        if root is None:
+            root = resource_filename("dsr.task", "regression")
+        benchmark_path = os.path.join(root, benchmark_source)
+        benchmark_df = pd.read_csv(benchmark_path, index_col=0, encoding="ISO-8859-1")
+        row = benchmark_df.loc[name]
+        self.n_input_var = row["variables"]
+
+        # Create symbolic expression
+        self.numpy_expr = self.make_numpy_expr(row["expression"])
+
+        # Create X values
+        train_spec = ast.literal_eval(row["train_spec"])
+        test_spec = ast.literal_eval(row["test_spec"])
+        if test_spec is None:
+            test_spec = train_spec
+        self.X_train = self.make_X(train_spec)
+        self.X_test = self.make_X(test_spec)
+        self.train_spec = train_spec
+        self.test_spec = test_spec
+
+        # Compute y values
+        self.y_train = self.numpy_expr(self.X_train)
+        self.y_test = self.numpy_expr(self.X_test)
+        self.y_train_noiseless = self.y_train.copy()
+        self.y_test_noiseless = self.y_test.copy()
+
+        # Add Gaussian noise
+        if self.noise > 0:
+            y_rms = np.sqrt(np.mean(self.y_train**2))
+            scale = self.noise * y_rms
+            self.y_train += self.rng.normal(loc=0, scale=scale, size=self.y_train.shape)
+            self.y_test += self.rng.normal(loc=0, scale=scale, size=self.y_test.shape)
+        elif self.noise < 0:
+            print('WARNING: Ignoring negative noise value: {}'.format(self.noise))
+
+        # Load default function set
+        function_set_path = os.path.join(root, "function_sets.csv")
+        function_set_df = pd.read_csv(function_set_path, index_col=0)
+        function_set_name = row["function_set"]
+        self.function_set = function_set_df.loc[function_set_name].tolist()[0].strip().split(',')
+
+        # Prepare status output
+        output_message = '\n-- Building dataset -----------------\n'
+        output_message += 'Benchmark path                 : {}\n'.format(benchmark_path)
+        output_message += 'Generated data for benchmark   : {}\n'.format(name)
+        output_message += 'Function set path              : {}\n'.format(function_set_path)
+        output_message += 'Function set                   : {} --> {}\n'.format(function_set_name, self.function_set)
+        if backup and logdir is not None:
+            output_message += self.save(logdir)
+        output_message += '-------------------------------------\n\n'
+        print(output_message)
+
+    def make_X(self, spec):
+        """Creates X values based on specification"""
+
+        features = []
+        for i in range(1, self.n_input_var + 1):
+
+            # Hierarchy: "all" --> "x{}".format(i)
+            input_var = "x{}".format(i)
+            if "all" in spec:
+                input_var = "all"
+            elif input_var not in spec:
+                input_var = "x1"
+
+            if "U" in spec[input_var]:
+                low, high, n = spec[input_var]["U"]
+                n = int(n * self.dataset_size_multiplier)
+                feature = self.rng.uniform(low=low, high=high, size=n)
+            elif "E" in spec[input_var]:
+                start, stop, step = spec[input_var]["E"]
+                if step > stop - start:
+                    n = step
+                else:
+                    n = int((stop - start)/step) + 1
+                n = int(n * self.dataset_size_multiplier)
+                feature = np.linspace(start=start, stop=stop, num=n, endpoint=True)
+            else:
+                raise ValueError("Did not recognize specification for {}: {}.".format(input_var, spec[input_var]))
+            features.append(feature)
+
+        # Do multivariable combinations
+        if "E" in spec[input_var] and self.n_input_var > 1:
+            X = np.array(list(itertools.product(*features)))
+        else:
+            X = np.column_stack(features)
+
+        return X
+
+    def make_numpy_expr(self, s):
+        # This isn't pretty, but unlike sympy's lambdify, this ensures we use
+        # our protected functions. Otherwise, some expressions may have large
+        # error even if the functional form is correct due to the training set
+        # not using protected functions.
+
+        # Replace function names
+        s = s.replace("ln(", "log(")
+        s = s.replace("pi", "np.pi")
+        s = s.replace("pow", "np.power")
+        for k in function_map.keys():
+            s = s.replace(k + '(', "function_map['{}'].function(".format(k))
+
+        # Replace variable names
+        for i in reversed(range(self.n_input_var)):
+            old = "x{}".format(i+1)
+            new = "x[:, {}]".format(i)
+            s = s.replace(old, new)
+
+        numpy_expr = lambda x : eval(s)
+
+        return numpy_expr
+
+    def save(self, logdir='./'):
+        save_path = os.path.join(logdir,'data_{}_n{:.2f}_d{:.0f}_s{}.csv'.format(
+                self.name, self.noise, self.dataset_size_multiplier, self.seed))
+        try:
+            os.makedirs(logdir, exist_ok=True)
+            np.savetxt(
+                save_path,
+                np.concatenate(
+                    (
+                        np.hstack((self.X_train, self.y_train[..., np.newaxis])),
+                        np.hstack((self.X_test, self.y_test[..., np.newaxis]))
+                    ), axis=0),
+                delimiter=',', fmt='%1.5f'
+            )
+            return 'Saved dataset to               : {}\n'.format(save_path)
+        except:
+            import sys
+            e = sys.exc_info()[0]
+            print("WARNING: Could not save dataset: {}".format(e))
+
+    def plot(self, logdir='./'):
+        """Plot Dataset with underlying ground truth."""
+        if self.X_train.shape[1] == 1:
+            from matplotlib import pyplot as plt
+            save_path = os.path.join(logdir,'plot_{}_n{:.2f}_d{:.0f}_s{}.png'.format(
+                    self.name, self.noise, self.dataset_size_multiplier, self.seed))
+
+            # Draw ground truth expression
+            bounds = list(list(self.train_spec.values())[0].values())[0][:2]
+            x = np.linspace(bounds[0], bounds[1], endpoint=True, num=100)
+            y = self.numpy_expr(x[:, None])
+            plt.plot(x, y, color='red', linestyle='dashed')
+            # Draw the actual points
+            plt.scatter(self.X_train, self.y_train)
+            # Add a title
+            plt.title(
+                "{} N:{} M:{} S:{}".format(
+                    self.name, self.noise, self.dataset_size_multiplier, self.seed),
+                fontsize=7)
+            try:
+                os.makedirs(logdir, exist_ok=True)
+                plt.savefig(save_path)
+                print('Saved plot to                  : {}'.format(save_path))
+            except:
+                import sys
+                e = sys.exc_info()[0]
+                print("WARNING: Could not plot dataset: {}".format(e))
+            plt.close()
+        else:
+            print("WARNING: Plotting only supported for 2D datasets.")
+
+
+@click.command()
+@click.argument("benchmark_source", default="benchmarks.csv")
+@click.option('--plot', is_flag=True)
+@click.option('--save_csv', is_flag=True)
+@click.option('--sweep', is_flag=True)
+def main(benchmark_source, plot, save_csv, sweep):
+    """Plots all benchmark expressions."""
+
+    regression_path = resource_filename("dsr.task", "regression/")
+    benchmark_path = os.path.join(regression_path, benchmark_source)
+    save_dir = os.path.join(regression_path, 'log')
+    df = pd.read_csv(benchmark_path, encoding="ISO-8859-1")
+    names = df["name"].to_list()
+    for name in names:
+
+        if not name.startswith("Nguyen") and not name.startswith("Constant") and not name.startswith("Custom"):
+            continue
+
+        datasets = []
+
+        # Noiseless
+        d = BenchmarkDataset(
+            name=name,
+            benchmark_source=benchmark_source)
+        datasets.append(d)
+
+        # Generate all combinations of noise levels and dataset size multipliers
+        if sweep and name.startswith("Nguyen"):
+            noises = [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]
+            dataset_size_multipliers = [1.0, 10.0]
+            for noise in noises:
+                for dataset_size_multiplier in dataset_size_multipliers:
+                    d = BenchmarkDataset(
+                        name=name,
+                        benchmark_source=benchmark_source,
+                        noise=noise,
+                        dataset_size_multiplier=dataset_size_multiplier,
+                        backup=save_csv,
+                        logdir=save_dir)
+                    datasets.append(d)
+
+        # Plot and/or save datasets
+        for dataset in datasets:
+            if plot and dataset.X_train.shape[1] == 1:
+                dataset.plot(save_dir)
+
+if __name__ == "__main__":
+    main()
diff --git a/dsr/dsr/task/regression/function_sets.csv b/dsr/dsr/task/regression/function_sets.csv
new file mode 100644
index 00000000..8c23cdd3
--- /dev/null
+++ b/dsr/dsr/task/regression/function_sets.csv
@@ -0,0 +1,13 @@
+name,function_set
+Koza,"add,sub,mul,div,sin,cos,exp,log"
+CKoza,"add,sub,mul,div,sin,cos,exp,log,const"
+KozaPlus1,"add,sub,mul,div,sin,cos,exp,log,1.0"
+Korns,"add,sub,mul,div,sin,cos,exp,log,n2,n3,sqrt,tan,tanh,const"
+Keijzer,"add,mul,inv,neg,sqrt,const"
+KeijzerPlus1,"add,mul,inv,neg,sqrt,1.0,const"
+Vladislavleva-A,"add,sub,mul,div,n2"
+Vladislavleva-B,"add,sub,mul,div,n2,exp,expneg"
+Vladislavleva-C,"add,sub,mul,div,n2,exp,expneg,sin,cos"
+None,"add,sub,mul,div,sin,cos,exp,log"
+Jin,"add,sub,mul,div,sin,cos,exp,n2,n3,const"
+GrammarVAE,"add,mul,div,sin,exp,1.0,2.0,3.0"
diff --git a/dsr/dsr/task/regression/regression.py b/dsr/dsr/task/regression/regression.py
new file mode 100644
index 00000000..0dbab848
--- /dev/null
+++ b/dsr/dsr/task/regression/regression.py
@@ -0,0 +1,352 @@
+import numpy as np
+import pandas as pd
+
+import dsr
+from dsr.library import Library
+from dsr.functions import create_tokens
+from dsr.task.regression.dataset import BenchmarkDataset
+
+
+def make_regression_task(name, function_set, dataset, metric="inv_nrmse",
+    metric_params=(1.0,), extra_metric_test=None, extra_metric_test_params=(),
+    reward_noise=0.0, reward_noise_type="r", threshold=1e-12,
+    normalize_variance=False, protected=False):
+    """
+    Factory function for regression rewards. This includes closures for a
+    dataset and regression metric (e.g. inverse NRMSE). Also sets regression-
+    specific metrics to be used by Programs.
+
+    Parameters
+    ----------
+    name : str or None
+        Name of regression benchmark, if using benchmark dataset.
+
+    function_set : list or None
+        List of allowable functions. If None, uses function_set according to
+        benchmark dataset.
+
+    dataset : dict, str, or tuple
+        If dict: .dataset.BenchmarkDataset kwargs.
+        If str: filename of dataset.
+        If tuple: (X, y) data
+
+    metric : str
+        Name of reward function metric to use.
+
+    metric_params : list
+        List of metric-specific parameters.
+
+    extra_metric_test : str
+        Name of extra function metric to use for testing.
+
+    extra_metric_test_params : list
+        List of metric-specific parameters for extra test metric.
+
+    reward_noise : float
+        Noise level to use when computing reward.
+
+    reward_noise_type : "y_hat" or "r"
+        "y_hat" : N(0, reward_noise * y_rms_train) is added to y_hat values.
+        "r" : N(0, reward_noise) is added to r.
+
+    normalize_variance : bool
+        If True and reward_noise_type=="r", reward is multiplied by
+        1 / sqrt(1 + 12*reward_noise**2) (We assume r is U[0,1]).
+
+    protected : bool
+        Whether to use protected functions.
+
+    threshold : float
+        Threshold of NMSE on noiseless data used to determine success.
+
+    Returns
+    -------
+
+    task : Task
+        Dynamically created Task object whose methods contains closures.
+    """
+
+    X_test = y_test = y_test_noiseless = None
+
+    # Benchmark dataset config
+    if isinstance(dataset, dict):
+        dataset["name"] = name
+        benchmark = BenchmarkDataset(**dataset)
+        X_train = benchmark.X_train
+        y_train = benchmark.y_train
+        X_test = benchmark.X_test
+        y_test = benchmark.y_test
+        y_test_noiseless = benchmark.y_test_noiseless
+
+        # Unless specified, use the benchmark's default function_set
+        if function_set is None:
+            function_set = benchmark.function_set
+
+    # Dataset filename
+    elif isinstance(dataset, str):
+        df = pd.read_csv(dataset, header=None) # Assuming data file does not have header rows
+        X_train = df.values[:, :-1]
+        y_train = df.values[:, -1]
+
+    # sklearn-like (X, y) data
+    elif isinstance(dataset, tuple):
+        X_train = dataset[0]
+        y_train = dataset[1]
+
+    if X_test is None:
+        X_test = X_train
+        y_test = y_train
+        y_test_noiseless = y_test
+
+    if function_set is None:
+        print("WARNING: Function set not provided. Using default set.")
+        function_set = ["add", "sub", "mul", "div", "sin", "cos", "exp", "log"]
+
+    # Save time by only computing these once
+    var_y_test = np.var(y_test)
+    var_y_test_noiseless = np.var(y_test_noiseless)
+
+    # Define closures for metric
+    metric, invalid_reward, max_reward = make_regression_metric(metric, y_train, *metric_params)
+    if extra_metric_test is not None:
+        print("Setting extra test metric to {}.".format(extra_metric_test))
+        metric_test, _, _ = make_regression_metric(extra_metric_test, y_test, *extra_metric_test_params) 
+    assert reward_noise >= 0.0, "Reward noise must be non-negative."
+    if reward_noise:
+        assert reward_noise_type in ["y_hat", "r"], "Reward noise type not recognized."
+        rng = np.random.RandomState(0)
+        y_rms_train = np.sqrt(np.mean(y_train ** 2))
+        if reward_noise_type == "y_hat":
+            scale = reward_noise * y_rms_train
+        elif reward_noise_type == "r":
+            scale = reward_noise
+
+    def reward(p):
+
+        # Compute estimated values
+        y_hat = p.execute(X_train)
+
+        # For invalid expressions, return invalid_reward
+        if p.invalid:
+            return invalid_reward
+
+        ### Observation noise
+        # For reward_noise_type == "y_hat", success must always be checked to 
+        # ensure success cases aren't overlooked due to noise. If successful,
+        # return max_reward.
+        if reward_noise and reward_noise_type == "y_hat":
+            if p.evaluate.get("success"):
+                return max_reward
+            y_hat += rng.normal(loc=0, scale=scale, size=y_hat.shape)
+
+        # Compute metric
+        r = metric(y_train, y_hat)
+
+        ### Direct reward noise
+        # For reward_noise_type == "r", success can for ~max_reward metrics be
+        # confirmed before adding noise. If successful, must return np.inf to
+        # avoid overlooking success cases.
+        if reward_noise and reward_noise_type == "r":
+            if r >= max_reward - 1e-5 and p.evaluate.get("success"):
+                return np.inf
+            r += rng.normal(loc=0, scale=scale)
+            if normalize_variance:
+                r /= np.sqrt(1 + 12*scale**2)
+
+        return r
+
+
+    def evaluate(p):
+
+        # Compute predictions on test data
+        y_hat = p.execute(X_test)
+        if p.invalid:
+            nmse_test = None
+            nmse_test_noiseless = None
+            success = False
+
+        else:
+            # NMSE on test data (used to report final error)
+            nmse_test = np.mean((y_test - y_hat)**2) / var_y_test
+
+            # NMSE on noiseless test data (used to determine recovery)
+            nmse_test_noiseless = np.mean((y_test_noiseless - y_hat)**2) / var_y_test_noiseless
+
+            # Success is defined by NMSE on noiseless test data below a threshold
+            success = nmse_test_noiseless < threshold
+            
+        info = {
+            "nmse_test" : nmse_test,
+            "nmse_test_noiseless" : nmse_test_noiseless,
+            "success" : success
+        }
+
+        if extra_metric_test is not None:
+            if p.invalid:
+                m_test = None
+                m_test_noiseless = None
+            else:
+                m_test = metric_test(y_test, y_hat)
+                m_test_noiseless = metric_test(y_test_noiseless, y_hat)     
+
+            info.update(
+                {
+                extra_metric_test : m_test,
+                extra_metric_test + '_noiseless' : m_test_noiseless
+                }
+            )
+
+        return info
+
+    tokens = create_tokens(n_input_var=X_train.shape[1],
+                           function_set=function_set,
+                           protected=protected)
+    library = Library(tokens)
+
+    stochastic = reward_noise > 0.0
+
+    extra_info = {}
+
+    task = dsr.task.Task(reward_function=reward,
+                evaluate=evaluate,
+                library=library,
+                stochastic=stochastic,
+                extra_info=extra_info)
+
+    return task
+
+
+def make_regression_metric(name, y_train, *args):
+    """
+    Factory function for a regression metric. This includes a closures for
+    metric parameters and the variance of the training data.
+
+    Parameters
+    ----------
+
+    name : str
+        Name of metric. See all_metrics for supported metrics.
+
+    args : args
+        Metric-specific parameters
+
+    Returns
+    -------
+
+    metric : function
+        Regression metric mapping true and estimated values to a scalar.
+
+    invalid_reward: float or None
+        Reward value to use for invalid expression. If None, the training
+        algorithm must handle it, e.g. by rejecting the sample.
+
+    max_reward: float
+        Maximum possible reward under this metric.
+    """
+
+    var_y = np.var(y_train)
+
+    all_metrics = {
+
+        # Negative mean squared error
+        # Range: [-inf, 0]
+        # Value = -var(y) when y_hat == mean(y)
+        "neg_mse" :     (lambda y, y_hat : -np.mean((y - y_hat)**2),
+                        0),
+
+        # Negative root mean squared error
+        # Range: [-inf, 0]
+        # Value = -sqrt(var(y)) when y_hat == mean(y)
+        "neg_rmse" :     (lambda y, y_hat : -np.sqrt(np.mean((y - y_hat)**2)),
+                        0),
+
+        # Negative normalized mean squared error
+        # Range: [-inf, 0]
+        # Value = -1 when y_hat == mean(y)
+        "neg_nmse" :    (lambda y, y_hat : -np.mean((y - y_hat)**2)/var_y,
+                        0),
+
+        # Negative normalized root mean squared error
+        # Range: [-inf, 0]
+        # Value = -1 when y_hat == mean(y)
+        "neg_nrmse" :   (lambda y, y_hat : -np.sqrt(np.mean((y - y_hat)**2)/var_y),
+                        0),
+
+        # (Protected) negative log mean squared error
+        # Range: [-inf, 0]
+        # Value = -log(1 + var(y)) when y_hat == mean(y)
+        "neglog_mse" : (lambda y, y_hat : -np.log(1 + np.mean((y - y_hat)**2)),
+                        0),
+
+        # (Protected) inverse mean squared error
+        # Range: [0, 1]
+        # Value = 1/(1 + args[0]*var(y)) when y_hat == mean(y)
+        "inv_mse" : (lambda y, y_hat : 1/(1 + args[0]*np.mean((y - y_hat)**2)),
+                        1),
+
+        # (Protected) inverse normalized mean squared error
+        # Range: [0, 1]
+        # Value = 1/(1 + args[0]) when y_hat == mean(y)
+        "inv_nmse" :    (lambda y, y_hat : 1/(1 + args[0]*np.mean((y - y_hat)**2)/var_y),
+                        1),
+
+        # (Protected) inverse normalized root mean squared error
+        # Range: [0, 1]
+        # Value = 1/(1 + args[0]) when y_hat == mean(y)
+        "inv_nrmse" :    (lambda y, y_hat : 1/(1 + args[0]*np.sqrt(np.mean((y - y_hat)**2)/var_y)),
+                        1),
+
+        # Fraction of predicted points within p0*abs(y) + p1 band of the true value
+        # Range: [0, 1]
+        "fraction" :    (lambda y, y_hat : np.mean(abs(y - y_hat) < args[0]*abs(y) + args[1]),
+                        2),
+
+        # Pearson correlation coefficient
+        # Range: [0, 1]
+        "pearson" :     (lambda y, y_hat : scipy.stats.pearsonr(y, y_hat)[0],
+                        0),
+
+        # Spearman correlation coefficient
+        # Range: [0, 1]
+        "spearman" :    (lambda y, y_hat : scipy.stats.spearmanr(y, y_hat)[0],
+                        0)
+    }
+
+    assert name in all_metrics, "Unrecognized reward function name."
+    assert len(args) == all_metrics[name][1], "For {}, expected {} reward function parameters; received {}.".format(name,all_metrics[name][1], len(args))
+    metric = all_metrics[name][0]
+
+    # For negative MSE-based rewards, invalid reward is the value of the reward function when y_hat = mean(y)
+    # For inverse MSE-based rewards, invalid reward is 0.0
+    # For non-MSE-based rewards, invalid reward is the minimum value of the reward function's range
+    all_invalid_rewards = {
+        "neg_mse" : -var_y,
+        "neg_rmse" : -np.sqrt(var_y),
+        "neg_nmse" : -1.0,
+        "neg_nrmse" : -1.0,
+        "neglog_mse" : -np.log(1 + var_y),
+        "inv_mse" : 0.0, #1/(1 + args[0]*var_y),
+        "inv_nmse" : 0.0, #1/(1 + args[0]),
+        "inv_nrmse" : 0.0, #1/(1 + args[0]),
+        "fraction" : 0.0,
+        "pearson" : 0.0,
+        "spearman" : 0.0
+    }
+    invalid_reward = all_invalid_rewards[name]
+
+    all_max_rewards = {
+        "neg_mse" : 0.0,
+        "neg_rmse" : 0.0,
+        "neg_nmse" : 0.0,
+        "neg_nrmse" : 0.0,
+        "neglog_mse" : 0.0,
+        "inv_mse" : 1.0,
+        "inv_nmse" : 1.0,
+        "inv_nrmse" : 1.0,
+        "fraction" : 1.0,
+        "pearson" : 1.0,
+        "spearman" : 1.0
+    }
+    max_reward = all_max_rewards[name]
+
+    return metric, invalid_reward, max_reward
diff --git a/dsr/dsr/task/regression/sklearn.py b/dsr/dsr/task/regression/sklearn.py
new file mode 100644
index 00000000..c3777a30
--- /dev/null
+++ b/dsr/dsr/task/regression/sklearn.py
@@ -0,0 +1,35 @@
+from copy import deepcopy
+
+from sklearn.base import BaseEstimator, RegressorMixin
+from sklearn.utils.validation import check_is_fitted
+
+from dsr import DeepSymbolicOptimizer
+
+
+class DeepSymbolicRegressor(DeepSymbolicOptimizer,
+                            BaseEstimator, RegressorMixin):
+    """
+    Sklearn interface for deep symbolic regression.
+    """
+
+    def __init__(self, config=None):
+        DeepSymbolicOptimizer.__init__(self, config)
+
+    def fit(self, X, y):
+
+        # Update the Task
+        config = deepcopy(self.config)
+        config["task"]["task_type"] = "regression"
+        config["task"]["dataset"] = (X, y)
+        self.update_config(config)
+
+        train_result = self.train()
+        self.program_ = train_result["program"]
+
+        return self
+
+    def predict(self, X):
+
+        check_is_fitted(self, "program_")
+
+        return self.program_.execute(X)
diff --git a/dsr/dsr/task/regression/test_sklearn.py b/dsr/dsr/task/regression/test_sklearn.py
new file mode 100644
index 00000000..193bf6c9
--- /dev/null
+++ b/dsr/dsr/task/regression/test_sklearn.py
@@ -0,0 +1,24 @@
+"""Tests for sklearn interface."""
+
+import pytest
+import numpy as np
+
+from dsr import DeepSymbolicRegressor
+from dsr.test.generate_test_data import CONFIG_TRAINING_OVERRIDE
+
+
+@pytest.fixture
+def model():
+    return DeepSymbolicRegressor("config.json")
+
+
+def test_task(model):
+    """Test regression for various configs."""
+
+    # Generate some data
+    np.random.seed(0)
+    X = np.random.random(size=(10, 3))
+    y = np.random.random(size=(10,))
+
+    model.config_training.update(CONFIG_TRAINING_OVERRIDE)
+    model.fit(X, y)
diff --git a/dsr/dsr/task/task.py b/dsr/dsr/task/task.py
new file mode 100644
index 00000000..8574cb08
--- /dev/null
+++ b/dsr/dsr/task/task.py
@@ -0,0 +1,86 @@
+"""Factory functions for generating symbolic search tasks."""
+
+from dataclasses import dataclass
+from typing import Callable, List, Dict, Any
+
+from dsr.task.regression.regression import make_regression_task
+from dsr.program import Program
+from dsr.library import Library
+
+
+@dataclass(frozen=True)
+class Task:
+    """
+    Data object specifying a symbolic search task.
+
+    Attributes
+    ----------
+    reward_function : function
+        Reward function mapping program.Program object to scalar. Includes
+        test argument for train vs test evaluation.
+
+    eval_function : function
+        Evaluation function mapping program.Program object to a dict of task-
+        specific evaluation metrics (primitives). Special optional key "success"
+        is used for determining early stopping during training.
+
+    library : Library
+        Library of Tokens.
+
+    stochastic : bool
+        Whether the reward function of the task is stochastic.
+
+    extra_info : dict
+        Extra task-specific info, e.g. reference to symbolic policies for
+        control task.
+    """
+
+    reward_function: Callable[[Program], float]
+    evaluate: Callable[[Program], float]
+    library: Library
+    stochastic: bool
+    extra_info: Dict[str, Any]
+
+
+def make_task(task_type, **config_task):
+    """
+    Factory function for Task object.
+
+    Parameters
+    ----------
+
+    task_type : str
+        Type of task:
+        "regression" : Symbolic regression task.
+
+    config_task : kwargs
+        Task-specific arguments. See specifications of task_dict. Special key
+        "name" is required, which defines the benchmark (i.e. dataset for
+        regression).
+
+    Returns
+    -------
+
+    task : Task
+        Task object.
+    """
+
+    # Dictionary from task name to task factory function
+    task_dict = {
+        "regression" : make_regression_task,
+    }
+
+    task = task_dict[task_type](**config_task)
+    return task
+
+
+def set_task(config_task):
+    """Helper function to make set the Program class Task and execute function
+    from task config."""
+
+    # Use of protected functions is the same for all tasks, so it's handled separately
+    protected = config_task["protected"] if "protected" in config_task else False
+
+    Program.set_execute(protected)
+    task = make_task(**config_task)
+    Program.set_task(task)
diff --git a/dsr/dsr/test/__init__.py b/dsr/dsr/test/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/dsr/dsr/test/data/test_model.data-00000-of-00001 b/dsr/dsr/test/data/test_model.data-00000-of-00001
new file mode 100644
index 00000000..ceaaf9ba
Binary files /dev/null and b/dsr/dsr/test/data/test_model.data-00000-of-00001 differ
diff --git a/dsr/dsr/test/data/test_model.index b/dsr/dsr/test/data/test_model.index
new file mode 100644
index 00000000..9cc628d1
Binary files /dev/null and b/dsr/dsr/test/data/test_model.index differ
diff --git a/dsr/dsr/test/data/test_model.meta b/dsr/dsr/test/data/test_model.meta
new file mode 100644
index 00000000..bb26087f
Binary files /dev/null and b/dsr/dsr/test/data/test_model.meta differ
diff --git a/dsr/dsr/test/generate_test_data.py b/dsr/dsr/test/generate_test_data.py
new file mode 100644
index 00000000..b0b68005
--- /dev/null
+++ b/dsr/dsr/test/generate_test_data.py
@@ -0,0 +1,28 @@
+"""Generate model parity test case data for DeepSymbolicOptimizer."""
+
+from pkg_resources import resource_filename
+
+from dsr import DeepSymbolicOptimizer
+
+
+# Shorter config run for parity test
+CONFIG_TRAINING_OVERRIDE = {
+    "n_samples" : 1000,
+    "batch_size" : 100
+}
+
+
+def main():
+
+    # Train the model
+    model = DeepSymbolicOptimizer("config.json")
+    model.config_training.update(CONFIG_TRAINING_OVERRIDE)
+    model.train()
+
+    # Save the model
+    save_path = resource_filename("dsr.test", "data/test_model")
+    model.save(save_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/dsr/dsr/test/test_core.py b/dsr/dsr/test/test_core.py
new file mode 100644
index 00000000..ebdd55ca
--- /dev/null
+++ b/dsr/dsr/test/test_core.py
@@ -0,0 +1,47 @@
+"""Test cases for DeepSymbolicOptimizer on each Task."""
+
+from pkg_resources import resource_filename
+
+import pytest
+import tensorflow as tf
+import numpy as np
+
+from dsr import DeepSymbolicOptimizer
+from dsr.test.generate_test_data import CONFIG_TRAINING_OVERRIDE
+
+
+@pytest.fixture
+def model():
+    return DeepSymbolicOptimizer("config.json")
+
+
+@pytest.fixture
+def cached_results(model):
+    save_path = resource_filename("dsr.test", "data/test_model")
+    model.load(save_path)
+    results = model.sess.run(tf.trainable_variables())
+
+    return results
+
+
+@pytest.mark.parametrize("config", ["config.json"])
+def test_task(model, config):
+    """Test that Tasks do not crash for various configs."""
+
+    model.update_config(config)
+    model.config_training.update({"n_samples" : 10,
+                                  "batch_size" : 5
+                                  })
+    model.train()
+
+
+def test_model_parity(model, cached_results):
+    """Compare results to last"""
+
+    model.config_training.update(CONFIG_TRAINING_OVERRIDE)
+    model.train()
+    results = model.sess.run(tf.trainable_variables())
+
+    cached_results = np.concatenate([a.flatten() for a in cached_results])
+    results = np.concatenate([a.flatten() for a in results])
+    np.testing.assert_array_almost_equal(results, cached_results)
diff --git a/dsr/dsr/test/test_prior.py b/dsr/dsr/test/test_prior.py
new file mode 100644
index 00000000..b94293fc
--- /dev/null
+++ b/dsr/dsr/test/test_prior.py
@@ -0,0 +1,426 @@
+"""Tests for various Priors."""
+
+import pytest
+
+from dsr.core import DeepSymbolicOptimizer
+from dsr.test.generate_test_data import CONFIG_TRAINING_OVERRIDE
+from dsr.program import from_tokens, Program
+from dsr.memory import Batch
+from dsr.controller import parents_siblings
+
+import numpy as np
+
+
+BATCH_SIZE = 1000
+
+
+@pytest.fixture
+def model():
+    return DeepSymbolicOptimizer("config.json")
+
+
+def assert_invalid(model, cases):
+    cases = [Program.library.actionize(case) for case in cases]
+    batch = make_batch(model, cases)
+    logp = model.controller.compute_probs(batch, log=True)
+    print(batch)
+    assert all(np.isneginf(logp)), \
+        "Found invalid case with probability > 0."
+
+
+def assert_valid(model, cases):
+    cases = [Program.library.actionize(case) for case in cases]
+    batch = make_batch(model, cases)
+    logp = model.controller.compute_probs(batch, log=True)
+    assert all(logp > -np.inf), \
+        "Found valid case with probability 0."
+
+
+def make_sequence(model, L):
+    """Utility function to generate a sequence of length L"""
+    X = Program.library.input_tokens[0]
+    U = Program.library.unary_tokens[0]
+    B = Program.library.binary_tokens[0]
+    num_B = (L - 1) // 2
+    num_U = int(L % 2 == 0)
+    num_X = num_B + 1
+    case = [B] * num_B + [U] * num_U + [X] * num_X
+    assert len(case) == L
+    case = case[:model.controller.max_length]
+    return case
+
+
+def make_batch(model, actions):
+    """
+    Utility function to generate a Batch from (unfinished) actions.
+
+    This uses essentially the same logic as controller.py's loop_fn, except
+    actions are prescribed instead of samples. Is there a way to refactor these
+    with less code reuse?
+    """
+
+    batch_size = len(actions)
+    L = model.controller.max_length
+
+    # Pad actions to maximum length
+    actions = np.array([np.pad(a, (0, L - len(a)), "constant")
+                        for a in actions], dtype=np.int32)
+
+    # Initialize obs
+    prev_actions = np.zeros_like(actions)
+    parents = np.zeros_like(actions)
+    siblings = np.zeros_like(actions)
+
+    arities = Program.library.arities
+    parent_adjust = Program.library.parent_adjust
+
+    # Set initial values
+    empty_parent = np.max(parent_adjust) + 1
+    empty_sibling = len(arities)
+    action = empty_sibling
+    parent, sibling = empty_parent, empty_sibling
+    prior = np.array([model.prior.initial_prior()] * batch_size)
+
+    priors = []
+    lengths = np.zeros(batch_size, dtype=np.int32)
+    finished = np.zeros(batch_size, dtype=np.bool_)
+    dangling = np.ones(batch_size, dtype=np.int32)
+    for i in range(L):
+        partial_actions = actions[:, :(i + 1)]
+
+        # Set prior and obs used to generate this action
+        prev_actions[:, i] = action
+        parents[:, i] = parent
+        siblings[:, i] = sibling
+        priors.append(prior)
+
+        # Compute next obs and prior
+        action = actions[:, i]
+        parent, sibling = parents_siblings(tokens=partial_actions,
+                                           arities=arities,
+                                           parent_adjust=parent_adjust)
+        dangling += arities[action] - 1
+        prior = model.prior(partial_actions, parent, sibling, dangling)
+        finished = np.where(np.logical_and(dangling == 0, lengths == 0),
+                            True,
+                            False)
+        lengths = np.where(finished,
+                           i + 1,
+                           lengths)
+
+    lengths = np.where(lengths == 0, L, lengths)
+    obs = [prev_actions, parents, siblings]
+    priors = np.array(priors).swapaxes(0, 1)
+    rewards = np.zeros(batch_size, dtype=np.float32)
+    batch = Batch(actions, obs, priors, lengths, rewards)
+    return batch
+
+
+def test_repeat(model):
+    """Test cases for RepeatConstraint."""
+
+    model.config_prior = {} # Turn off all other Priors
+    model.config_prior["repeat"] = {
+        "tokens" : ["sin", "cos"],
+        "min_" : None, # Not yet supported
+        "max_" : 2
+    }
+    model.config_training.update(CONFIG_TRAINING_OVERRIDE)
+    model.train()
+
+    invalid_cases = []
+    invalid_cases.append(["sin"] * 3)
+    invalid_cases.append(["cos"] * 3)
+    invalid_cases.append(["sin", "cos", "sin"])
+    invalid_cases.append(["mul", "sin"] * 3)
+    invalid_cases.append(["mul", "sin", "x1", "sin", "mul", "cos"])
+    assert_invalid(model, invalid_cases)
+
+    valid_cases = []
+    valid_cases.append(["mul"] + ["sin"] * 2 + ["log"] * 2)
+    valid_cases.append(["sin"] + ["mul", "exp"] * 4 + ["cos"])
+    assert_valid(model, valid_cases)
+
+
+def test_descendant(model):
+    """Test cases for descendant RelationalConstraint."""
+
+    descendants = "add,mul"
+    ancestors = "exp,log"
+
+    library = Program.library
+    model.config_prior = {} # Turn off all other Priors
+    model.config_prior["relational"] = {
+        "targets" : descendants,
+        "effectors" : ancestors,
+        "relationship" : "descendant"
+    }
+
+    model.config_training.update(CONFIG_TRAINING_OVERRIDE)
+    model.train()
+
+    descendants = library.actionize(descendants)
+    ancestors = library.actionize(ancestors)
+
+    U = [i for i in library.unary_tokens
+         if i not in ancestors and i not in descendants][0]
+    B = [i for i in library.binary_tokens
+         if i not in ancestors and i not in descendants][0]
+
+    # For each D-A combination, generate invalid cases where A is an ancestor
+    # of D
+    invalid_cases = []
+    for A in ancestors:
+        for D in descendants:
+            invalid_cases.append([A, D])
+            invalid_cases.append([A] * 10 + [D])
+            invalid_cases.append([A] + [U, B] * 5 + [D])
+    assert_invalid(model, invalid_cases)
+
+    # For each D-A combination, generate valid cases where A is not an ancestor
+    # of D
+    valid_cases = []
+    for A in ancestors:
+        for D in descendants:
+            valid_cases.append([U, D])
+            valid_cases.append([D] + [U] * 10 + [A])
+    assert_valid(model, valid_cases)
+
+
+def test_trig(model):
+    """Test cases for TrigConstraint."""
+
+    library = Program.library
+    model.config_prior = {} # Turn off all other Priors
+    model.config_prior["trig"] = {}
+    model.config_training.update(CONFIG_TRAINING_OVERRIDE)
+    model.train()
+
+    X = library.input_tokens[0]
+    U = [i for i in library.unary_tokens
+         if i not in library.trig_tokens][0]
+    B = library.binary_tokens[0]
+
+    # For each trig-trig combination, generate invalid cases where one Token is
+    # a descendant the other
+    invalid_cases = []
+    trig_tokens = library.trig_tokens
+    for t1 in trig_tokens:
+        for t2 in trig_tokens:
+            invalid_cases.append([t1, t2, X]) # E.g. sin(cos(x))
+            invalid_cases.append([t1, B, X, t2, X]) # E.g. sin(x + cos(x))
+            invalid_cases.append([t1] + [U] * 10 + [t2, X])
+    assert_invalid(model, invalid_cases)
+
+    # For each trig-trig pair, generate valid cases where one Token is the
+    # sibling the other
+    valid_cases = []
+    for t1 in trig_tokens:
+        for t2 in trig_tokens:
+            valid_cases.append([B, U, t1, X, t2, X]) # E.g. log(sin(x)) + cos(x)
+            valid_cases.append([B, t1, X, t2, X]) # E.g. sin(x) + cos(x)
+            valid_cases.append([U] + valid_cases[-1]) # E.g. log(sin(x) + cos(x))
+    assert_valid(model, valid_cases)
+
+
+def test_child(model):
+    """Test cases for child RelationalConstraint."""
+
+    library = Program.library
+    parents = library.actionize("log,exp,mul")
+    children = library.actionize("exp,log,sin")
+
+    model.config_prior = {} # Turn off all other Priors
+    model.config_prior["relational"] = {
+        "targets" : children,
+        "effectors" : parents,
+        "relationship" : "child"
+    }
+    model.config_training.update(CONFIG_TRAINING_OVERRIDE)
+    model.train()
+
+    # For each parent-child pair, generate invalid cases where child is one of
+    # parent's children.
+    X = library.input_tokens[0]
+    assert X not in children, \
+        "Error in test case specification. Do not include x1 in children."
+    invalid_cases = []
+    for p, c in zip(parents, children):
+        arity = library.tokenize(p)[0].arity
+        for i in range(arity):
+            before = i
+            after = arity - i - 1
+            case = [p] + [X] * before + [c] + [X] * after
+            invalid_cases.append(case)
+    assert_invalid(model, invalid_cases)
+
+
+def test_uchild(model):
+    """Test cases for uchild RelationalConstraint."""
+
+    library = Program.library
+    targets = library.actionize("x1")
+    effectors = library.actionize("sub,div") # i.e. no x1 - x1 or x1 / x1
+
+    model.config_prior = {} # Turn off all other Priors
+    model.config_prior["relational"] = {
+        "targets" : targets,
+        "effectors" : effectors,
+        "relationship" : "uchild"
+    }
+    model.config_training.update(CONFIG_TRAINING_OVERRIDE)
+    model.train()
+
+    # Generate valid test cases
+    valid_cases = []
+    valid_cases.append("mul,x1,x1")
+    valid_cases.append("sub,x1,sub,x1,sub,x1,sin,x1")
+    valid_cases.append("sub,sub,sub,x1,sin,x1,x1")
+    valid_cases.append("sub,sin,x1,sin,x1")
+    assert_valid(model, valid_cases)
+
+    # Generate invalid test cases
+    invalid_cases = []
+    invalid_cases.append("add,sub,x1,x1,sin,x1")
+    invalid_cases.append("sin,sub,x1,x1")
+    invalid_cases.append("sub,sub,sub,x1,x1,x1")
+    assert_invalid(model, invalid_cases)
+
+
+def test_const(model):
+    """Test cases for ConstConstraint."""
+
+    # This test case needs the const Token before creating the model
+    model.config["task"]["name"] = "Nguyen-1c"
+    model.pool = model.make_pool() # Resets Program.task with new Task
+
+    library = Program.library
+    model.config_prior = {} # Turn off all other Priors
+    model.config_prior["const"] = {}
+    model.config_training.update(CONFIG_TRAINING_OVERRIDE)
+    model.train()
+
+    # Generate valid test cases
+    valid_cases = []
+    valid_cases.append("mul,const,x1")
+    valid_cases.append("sub,const,sub,const,x1")
+    assert_valid(model, valid_cases)
+
+    # Generate invalid test cases
+    invalid_cases = []
+    invalid_cases.append("sin,const")
+    invalid_cases.append("mul,const,const")
+    invalid_cases.append("sin,add,const,const")
+    assert_invalid(model, invalid_cases)
+
+
+def test_sibling(model):
+    """Test cases for sibling RelationalConstraint."""
+
+    library = Program.library
+    targets = library.actionize("sin,cos")
+    effectors = library.actionize("x1")
+
+    model.config_prior = {} # Turn off all other Priors
+    model.config_prior["relational"] = {
+        "targets" : targets,
+        "effectors" : effectors,
+        "relationship" : "sibling"
+    }
+    model.config_training.update(CONFIG_TRAINING_OVERRIDE)
+    model.train()
+
+    # Generate valid test cases
+    valid_cases = []
+    valid_cases.append("mul,sin,x1,cos,x1")
+    valid_cases.append("sin,cos,x1")
+    valid_cases.append("add,add,sin,mul,x1,x1,cos,x1,x1")
+    assert_valid(model, valid_cases)
+
+    # Generate invalid test cases
+    invalid_cases = []
+    invalid_cases.append("add,x1,sin,x1")
+    invalid_cases.append("add,sin,x1,x1")
+    invalid_cases.append("add,add,sin,mul,x1,x1,x1,sin,x1")
+    assert_invalid(model, invalid_cases)
+
+
+def test_inverse(model):
+    """Test cases for InverseConstraint."""
+
+    library = Program.library
+    model.config_prior = {} # Turn off all other Priors
+    model.config_prior["inverse"] = {}
+    model.config_training.update(CONFIG_TRAINING_OVERRIDE)
+    model.train()
+
+    # Generate valid cases
+    valid_cases = []
+    valid_cases.append("exp,sin,log,cos,exp,x1")
+    valid_cases.append("mul,sin,log,x1,exp,cos,x1")
+    assert_valid(model, valid_cases)
+
+    # Generate invalid cases for each inverse
+    invalid_cases = []
+    invalid_cases.append("mul,sin,x1,exp,log,x1")
+    for t1, t2 in library.inverse_tokens.items():
+        invalid_cases.append([t1, t2])
+        invalid_cases.append([t2, t1])
+    assert_invalid(model, invalid_cases)
+
+
+@pytest.mark.parametrize("minmax", [(10, 10), (4, 30), (None, 10), (10, None)])
+def test_length(model, minmax):
+    """Test cases for LengthConstraint."""
+
+    min_, max_ = minmax
+    model.config_prior = {} # Turn off all other Priors
+    model.config_prior["length"] = {"min_" : min_, "max_" : max_}
+    model.config_training.update(CONFIG_TRAINING_OVERRIDE)
+    model.train()
+
+    # First, check that randomly generated samples do not violate constraints
+    actions, _, _ = model.controller.sample(BATCH_SIZE)
+    programs = [from_tokens(a, optimize=True) for a in actions]
+    lengths = [len(p.traversal) for p in programs]
+    if min_ is not None:
+        min_L = min(lengths)
+        assert min_L >= min_, \
+            "Found min length {} but constrained to {}.".format(min_L, min_)
+    if max_ is not None:
+        max_L = max(lengths)
+        assert max_L <= max_, \
+            "Found max length {} but constrained to {}.".format(max_L, max_)
+
+    # Next, check valid and invalid test cases based on min_ and max_
+    # Valid test cases should not be constrained
+    # Invalid test cases should all be constrained
+    valid_cases = []
+    invalid_cases = []
+
+    # Initial prior prevents length-1 tokens
+    case = make_sequence(model, 1)
+    invalid_cases.append(case)
+
+    if min_ is not None:
+        # Generate an invalid case that is one Token too short
+        if min_ > 1:
+            case = make_sequence(model, min_ - 1)
+            invalid_cases.append(case)
+
+        # Generate a valid case that is exactly the minimum length
+        case = make_sequence(model, min_)
+        valid_cases.append(case)
+
+    if max_ is not None:
+        # Generate an invalid case that is one Token too long (which will be
+        # truncated to dangling == 1)
+        case = make_sequence(model, max_ + 1)
+        invalid_cases.append(case)
+
+        # Generate a valid case that is exactly the maximum length
+        case = make_sequence(model, max_)
+        valid_cases.append(case)
+
+    assert_valid(model, valid_cases)
+    assert_invalid(model, invalid_cases)
diff --git a/dsr/dsr/train.py b/dsr/dsr/train.py
new file mode 100644
index 00000000..bd819eb3
--- /dev/null
+++ b/dsr/dsr/train.py
@@ -0,0 +1,508 @@
+"""Defines main training loop for deep symbolic regression."""
+
+import os
+import multiprocessing
+from itertools import compress
+from datetime import datetime
+from collections import defaultdict
+
+import tensorflow as tf
+import pandas as pd
+import numpy as np
+
+from dsr.program import Program, from_tokens
+from dsr.utils import empirical_entropy, is_pareto_efficient, setup_output_files
+from dsr.memory import Batch, make_queue
+
+# Ignore TensorFlow warnings
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
+# Set TensorFlow seed
+tf.random.set_random_seed(0)
+
+
+# Work for multiprocessing pool: optimize constants and compute reward
+def work(p):
+    optimized_constants = p.optimize()
+    return optimized_constants, p.base_r
+
+
+def hof_work(p):
+    return [p.r, p.base_r, p.count, repr(p.sympy_expr), repr(p), p.evaluate]
+
+
+def pf_work(p):
+    return [p.complexity_eureqa, p.r, p.base_r, p.count, repr(p.sympy_expr), repr(p), p.evaluate]
+
+
+def learn(sess, controller, pool,
+          logdir="./log", n_epochs=None, n_samples=1e6,
+          batch_size=1000, complexity="length", complexity_weight=0.001,
+          const_optimizer="minimize", const_params=None, alpha=0.1,
+          epsilon=0.01, n_cores_batch=1, verbose=True, summary=True,
+          output_file=None, save_all_r=False, baseline="ewma_R",
+          b_jumpstart=True, early_stopping=False, hof=10, eval_all=False,
+          pareto_front=False, debug=0):
+    """
+    Executes the main training loop.
+
+    Parameters
+    ----------
+    sess : tf.Session
+        TenorFlow Session object.
+
+    controller : dsr.controller.Controller
+        Controller object used to generate Programs.
+
+    pool : multiprocessing.Pool or None
+        Pool to parallelize reward computation. For the control task, each
+        worker should have its own TensorFlow model. If None, a Pool will be
+        generated if n_cores_batch > 1.
+
+    logdir : str, optional
+        Name of log directory.
+
+    n_epochs : int or None, optional
+        Number of epochs to train when n_samples is None.
+
+    n_samples : int or None, optional
+        Total number of expressions to sample when n_epochs is None. In this
+        case, n_epochs = int(n_samples / batch_size).
+
+    batch_size : int, optional
+        Number of sampled expressions per epoch.
+
+    complexity : str, optional
+        Complexity penalty name.
+
+    complexity_weight : float, optional
+        Coefficient for complexity penalty.
+
+    const_optimizer : str or None, optional
+        Name of constant optimizer.
+
+    const_params : dict, optional
+        Dict of constant optimizer kwargs.
+
+    alpha : float, optional
+        Coefficient of exponentially-weighted moving average of baseline.
+
+    epsilon : float or None, optional
+        Fraction of top expressions used for training. None (or
+        equivalently, 1.0) turns off risk-seeking.
+
+    n_cores_batch : int, optional
+        Number of cores to spread out over the batch for constant optimization
+        and evaluating reward. If -1, uses multiprocessing.cpu_count().
+
+    verbose : bool, optional
+        Whether to print progress.
+
+    summary : bool, optional
+        Whether to write TensorFlow summaries.
+
+    output_file : str, optional
+        Filename to write results for each iteration.
+
+    save_all_r : bool, optional
+        Whether to save all rewards for each iteration.
+
+    baseline : str, optional
+        Type of baseline to use: grad J = (R - b) * grad-log-prob(expression).
+        Choices:
+        (1) "ewma_R" : b = EWMA(<R>)
+        (2) "R_e" : b = R_e
+        (3) "ewma_R_e" : b = EWMA(R_e)
+        (4) "combined" : b = R_e + EWMA(<R> - R_e)
+        In the above, <R> is the sample average _after_ epsilon sub-sampling and
+        R_e is the (1-epsilon)-quantile estimate.
+
+    b_jumpstart : bool, optional
+        Whether EWMA part of the baseline starts at the average of the first
+        iteration. If False, the EWMA starts at 0.0.
+
+    early_stopping : bool, optional
+        Whether to stop early if stopping criteria is reached.
+
+    hof : int or None, optional
+        If not None, number of top Programs to evaluate after training.
+
+    eval_all : bool, optional
+        If True, evaluate all Programs. While expensive, this is useful for
+        noisy data when you can't be certain of success solely based on reward.
+        If False, only the top Program is evaluated each iteration.
+
+    pareto_front : bool, optional
+        If True, compute and save the Pareto front at the end of training.
+
+    debug : int, optional
+        Debug level, also passed to Controller. 0: No debug. 1: Print initial
+        parameter means. 2: Print parameter means each step.
+
+    Returns
+    -------
+    result : dict
+        A dict describing the best-fit expression (determined by base_r).
+    """
+
+    # Config assertions and warnings
+    assert n_samples is None or n_epochs is None, "At least one of 'n_samples' or 'n_epochs' must be None."
+
+    # Create the summary writer
+    if summary:
+        timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+        summary_dir = os.path.join("summary", timestamp)
+        writer = tf.summary.FileWriter(summary_dir, sess.graph)
+
+    # Create log file
+    if output_file is not None:
+        all_r_output_file, hof_output_file, pf_output_file = setup_output_files(logdir, output_file)
+    else:
+        all_r_output_file = hof_output_file = pf_output_file = None
+
+    # Set the complexity functions
+    Program.set_complexity_penalty(complexity, complexity_weight)
+
+    # Set the constant optimizer
+    const_params = const_params if const_params is not None else {}
+    Program.set_const_optimizer(const_optimizer, **const_params)
+
+    # Initialize compute graph
+    sess.run(tf.global_variables_initializer())
+
+    if debug:
+        tvars = tf.trainable_variables()
+        def print_var_means():
+            tvars_vals = sess.run(tvars)
+            for var, val in zip(tvars, tvars_vals):
+                print(var.name, "mean:", val.mean(),"var:", val.var())
+
+    # Create the pool of workers, if pool is not already given
+    if pool is None:
+        if n_cores_batch == -1:
+            n_cores_batch = multiprocessing.cpu_count()
+        if n_cores_batch > 1:
+            pool = multiprocessing.Pool(n_cores_batch)
+
+    # Create the priority queue
+    k = controller.pqt_k
+    if controller.pqt and k is not None and k > 0:
+        priority_queue = make_queue(priority=True, capacity=k)
+    else:
+        priority_queue = None
+
+    if debug >= 1:
+        print("\nInitial parameter means:")
+        print_var_means()
+
+    base_r_history = None
+
+    # Main training loop
+    p_final = None
+    base_r_best = -np.inf
+    r_best = -np.inf
+    prev_r_best = None
+    prev_base_r_best = None
+    ewma = None if b_jumpstart else 0.0 # EWMA portion of baseline
+    n_epochs = n_epochs if n_epochs is not None else int(n_samples / batch_size)
+    all_r = np.zeros(shape=(n_epochs, batch_size), dtype=np.float32)
+
+    for step in range(n_epochs):
+
+        # Set of str representations for all Programs ever seen
+        s_history = set(Program.cache.keys())
+
+        # Sample batch of expressions from controller
+        # Shape of actions: (batch_size, max_length)
+        # Shape of obs: [(batch_size, max_length)] * 3
+        # Shape of priors: (batch_size, max_length, n_choices)
+        actions, obs, priors = controller.sample(batch_size)
+
+        # Instantiate, optimize, and evaluate expressions
+        if pool is None:
+            programs = [from_tokens(a, optimize=True) for a in actions]
+        else:
+            # To prevent interfering with the cache, un-optimized programs are
+            # first generated serially. Programs that need optimizing are
+            # optimized optimized in parallel. Since multiprocessing operates on
+            # copies of programs, we manually set the optimized constants and
+            # base reward after the pool joins.
+            programs = [from_tokens(a, optimize=False) for a in actions]
+
+            # Filter programs that have not yet computed base_r
+            programs_to_optimize = list(set([p for p in programs if "base_r" not in p.__dict__]))
+
+            # Optimize and compute base_r
+            results = pool.map(work, programs_to_optimize)
+            for (optimized_constants, base_r), p in zip(results, programs_to_optimize):
+                p.set_constants(optimized_constants)
+                p.base_r = base_r
+
+        # Retrieve metrics
+        base_r = np.array([p.base_r for p in programs])
+        r = np.array([p.r for p in programs])
+        l = np.array([len(p.traversal) for p in programs])
+        s = [p.str for p in programs] # Str representations of Programs
+        invalid = np.array([p.invalid for p in programs], dtype=bool)
+        all_r[step] = base_r
+
+        if eval_all:
+            success = [p.evaluate.get("success") for p in programs]
+            # Check for success before risk-seeking, but don't break until after
+            if any(success):
+                p_final = programs[success.index(True)]
+
+        # Update reward history
+        if base_r_history is not None:
+            for p in programs:
+                key = p.str
+                if key in base_r_history:
+                    base_r_history[key].append(p.base_r)
+                else:
+                    base_r_history[key] = [p.base_r]
+
+        # Collect full-batch statistics
+        base_r_max = np.max(base_r)
+        base_r_best = max(base_r_max, base_r_best)
+        base_r_avg_full = np.mean(base_r)
+        r_max = np.max(r)
+        r_best = max(r_max, r_best)
+        r_avg_full = np.mean(r)
+        l_avg_full = np.mean(l)
+        a_ent_full = np.mean(np.apply_along_axis(empirical_entropy, 0, actions))
+        n_unique_full = len(set(s))
+        n_novel_full = len(set(s).difference(s_history))
+        invalid_avg_full = np.mean(invalid)
+
+        # Risk-seeking policy gradient: train on top epsilon fraction of samples
+        if epsilon is not None and epsilon < 1.0:
+            quantile = np.quantile(r, 1 - epsilon, interpolation="higher")
+            keep = base_r >= quantile
+            base_r = base_r[keep]
+            r_train = r = r[keep]
+            programs = list(compress(programs, keep))
+            l = l[keep]
+            s = list(compress(s, keep))
+            invalid = invalid[keep]
+            actions = actions[keep, :]
+            obs = [o[keep, :] for o in obs]
+            priors = priors[keep, :, :]
+
+        # Clip bounds of rewards to prevent NaNs in gradient descent
+        r = np.clip(r, -1e6, 1e6)
+
+        # Compute baseline
+        if baseline == "ewma_R":
+            ewma = np.mean(r) if ewma is None else alpha*np.mean(r) + (1 - alpha)*ewma
+            b_train = ewma
+        elif baseline == "R_e": # Default
+            ewma = -1
+            b_train = quantile
+
+        # Collect sub-batch statistics and write output
+        if output_file is not None:
+            base_r_avg_sub = np.mean(base_r)
+            r_avg_sub = np.mean(r)
+            l_avg_sub = np.mean(l)
+            a_ent_sub = np.mean(np.apply_along_axis(empirical_entropy, 0, actions))
+            n_unique_sub = len(set(s))
+            n_novel_sub = len(set(s).difference(s_history))
+            invalid_avg_sub = np.mean(invalid)
+            stats = np.array([[
+                         base_r_best,
+                         base_r_max,
+                         base_r_avg_full,
+                         base_r_avg_sub,
+                         r_best,
+                         r_max,
+                         r_avg_full,
+                         r_avg_sub,
+                         l_avg_full,
+                         l_avg_sub,
+                         ewma,
+                         n_unique_full,
+                         n_unique_sub,
+                         n_novel_full,
+                         n_novel_sub,
+                         a_ent_full,
+                         a_ent_sub,
+                         invalid_avg_full,
+                         invalid_avg_sub
+                         ]], dtype=np.float32)
+            with open(os.path.join(logdir, output_file), 'ab') as f:
+                np.savetxt(f, stats, delimiter=',')
+
+        # Compute sequence lengths
+        lengths = np.array([min(len(p.traversal), controller.max_length)
+                            for p in programs], dtype=np.int32)
+
+        # Create the Batch
+        sampled_batch = Batch(actions=actions, obs=obs, priors=priors,
+                              lengths=lengths, rewards=r)
+
+        # Update and sample from the priority queue
+        if priority_queue is not None:
+            priority_queue.push_best(sampled_batch, programs)
+            pqt_batch = priority_queue.sample_batch(controller.pqt_batch_size)
+        else:
+            pqt_batch = None
+
+        # Train the controller
+        summaries = controller.train_step(b_train, sampled_batch, pqt_batch)
+        if summary:
+            writer.add_summary(summaries, step)
+            writer.flush()
+
+        # Update new best expression
+        new_r_best = False
+        new_base_r_best = False
+
+        if prev_r_best is None or r_max > prev_r_best:
+            new_r_best = True
+            p_r_best = programs[np.argmax(r)]
+            
+        if prev_base_r_best is None or base_r_max > prev_base_r_best:
+            new_base_r_best = True
+            p_base_r_best = programs[np.argmax(base_r)]
+
+        prev_r_best = r_best
+        prev_base_r_best = base_r_best
+
+        # Print new best expression
+        if verbose:
+            if new_r_best and new_base_r_best:
+                if p_r_best == p_base_r_best:
+                    print("\nNew best overall")
+                    p_r_best.print_stats()
+                else:
+                    print("\nNew best reward")
+                    p_r_best.print_stats()
+                    print("...and new best base reward")
+                    p_base_r_best.print_stats()
+
+            elif new_r_best:
+                print("\nNew best reward")
+                p_r_best.print_stats()
+
+            elif new_base_r_best:
+                print("\nNew best base reward")
+                p_base_r_best.print_stats()
+
+        # Stop if early stopping criteria is met
+        if eval_all and any(success):
+            all_r = all_r[:(step + 1)]
+            print("Early stopping criteria met; breaking early.")
+            break
+        if early_stopping and p_base_r_best.evaluate.get("success"):
+            all_r = all_r[:(step + 1)]
+            print("Early stopping criteria met; breaking early.")
+            break
+
+        if verbose and step > 0 and step % 10 == 0:
+            print("Completed {} steps".format(step))
+
+        if debug >= 2:
+            print("\nParameter means after step {} of {}:".format(step+1, n_epochs))
+            print_var_means()
+
+    if save_all_r:
+        with open(all_r_output_file, 'ab') as f:
+            np.save(f, all_r)
+
+    # Save the hall of fame
+    if hof is not None and hof > 0:
+        programs = list(Program.cache.values()) # All unique Programs found during training
+
+        base_r = [p.base_r for p in programs]
+        i_hof = np.argsort(base_r)[-hof:][::-1] # Indices of top hof Programs
+        hof = [programs[i] for i in i_hof]
+
+        if verbose:
+            print("Evaluating the hall of fame...")
+        if pool is not None:
+            results = pool.map(hof_work, hof)
+        else:
+            results = list(map(hof_work, hof))
+
+        eval_keys = list(results[0][-1].keys())
+        columns = ["r", "base_r", "count", "expression", "traversal"] + eval_keys
+        hof_results = [result[:-1] + [result[-1][k] for k in eval_keys] for result in results]
+        df = pd.DataFrame(hof_results, columns=columns)
+        if hof_output_file is not None:
+            print("Saving Hall of Fame to {}".format(hof_output_file))
+            df.to_csv(hof_output_file, header=True, index=False)
+        
+    # Print error statistics of the cache
+    n_invalid = 0
+    error_types = defaultdict(lambda : 0)
+    error_nodes = defaultdict(lambda : 0)
+    for p in Program.cache.values():
+        if p.invalid:
+            n_invalid += p.count
+            error_types[p.error_type] += p.count
+            error_nodes[p.error_node] += p.count
+    if n_invalid > 0:
+        total_samples = (step + 1)*batch_size # May be less than n_samples if breaking early
+        print("Invalid expressions: {} of {} ({:.1%}).".format(n_invalid, total_samples, n_invalid/total_samples))
+        print("Error type counts:")
+        for error_type, count in error_types.items():
+            print("  {}: {} ({:.1%})".format(error_type, count, count/n_invalid))
+        print("Error node counts:")
+        for error_node, count in error_nodes.items():
+            print("  {}: {} ({:.1%})".format(error_node, count, count/n_invalid))
+
+    # Print the priority queue at the end of training
+    if verbose and priority_queue is not None:
+        for i, item in enumerate(priority_queue.iter_in_order()):
+            print("\nPriority queue entry {}:".format(i))
+            p = Program.cache[item[0]]
+            p.print_stats()
+
+    # Compute the pareto front
+    if pareto_front:
+        if verbose:
+            print("Evaluating the pareto front...")
+        all_programs = list(Program.cache.values())
+        costs = np.array([(p.complexity_eureqa, -p.r) for p in all_programs])
+        pareto_efficient_mask = is_pareto_efficient(costs) # List of bool
+        pf = list(compress(all_programs, pareto_efficient_mask))
+        pf.sort(key=lambda p : p.complexity_eureqa) # Sort by complexity
+
+        if pool is not None:
+            results = pool.map(pf_work, pf)
+        else:
+            results = list(map(pf_work, pf))
+
+        eval_keys = list(results[0][-1].keys())
+        columns = ["complexity", "r", "base_r", "count", "expression", "traversal"] + eval_keys
+        pf_results = [result[:-1] + [result[-1][k] for k in eval_keys] for result in results]
+        df = pd.DataFrame(pf_results, columns=columns)
+        if pf_output_file is not None:
+            print("Saving Pareto Front to {}".format(pf_output_file))
+            df.to_csv(pf_output_file, header=True, index=False)
+
+        # Look for a success=True case within the Pareto front
+        for p in pf:
+            if p.evaluate.get("success"):
+                p_final = p
+                break
+
+    # Close the pool
+    if pool is not None:
+        pool.close()
+
+    # Return statistics of best Program
+    p = p_final if p_final is not None else p_base_r_best
+    result = {
+        "r" : p.r,
+        "base_r" : p.base_r,
+    }
+    result.update(p.evaluate)
+    result.update({
+        "expression" : repr(p.sympy_expr),
+        "traversal" : repr(p),
+        "program" : p
+    })
+
+    return result
diff --git a/dsr/dsr/utils.py b/dsr/dsr/utils.py
new file mode 100644
index 00000000..1c8113b4
--- /dev/null
+++ b/dsr/dsr/utils.py
@@ -0,0 +1,154 @@
+"""Utility functions used in deep symbolic regression."""
+
+import os
+import functools
+import numpy as np
+
+
+def is_float(s):
+    """Determine whether str can be cast to float."""
+
+    try:
+        float(s)
+        return True
+    except ValueError:
+        return False
+
+
+# Adapted from: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
+def is_pareto_efficient(costs):
+    """
+    Find the pareto-efficient points given an array of costs.
+
+    Parameters
+    ----------
+
+    costs : np.ndarray
+        Array of shape (n_points, n_costs).
+
+    Returns
+    -------
+
+    is_efficient_maek : np.ndarray (dtype:bool)
+        Array of which elements in costs are pareto-efficient.
+    """
+
+    is_efficient = np.arange(costs.shape[0])
+    n_points = costs.shape[0]
+    next_point_index = 0  # Next index in the is_efficient array to search for
+    while next_point_index < len(costs):
+        nondominated_point_mask = np.any(costs < costs[next_point_index], axis=1)
+        nondominated_point_mask[next_point_index] = True
+        is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
+        costs = costs[nondominated_point_mask]
+        next_point_index = np.sum(nondominated_point_mask[:next_point_index]) + 1
+    is_efficient_mask = np.zeros(n_points, dtype=bool)
+    is_efficient_mask[is_efficient] = True
+    return is_efficient_mask
+
+
+def setup_output_files(logdir, output_file):
+    """
+    Writes the main output file header and returns the reward, hall of fame, and Pareto front config filenames.
+
+    Parameters:
+    -----------
+
+    logdir : string
+        Directory to log to.
+
+    output_file : string
+        Name of output file.
+
+    Returns:
+    --------
+
+    all_r_output_file : string
+        all_r output filename
+
+    hof_output_file : string
+        hof output filename
+
+    pf_output_file : string
+        pf output filename
+    """
+    os.makedirs(logdir, exist_ok=True)
+    output_file = os.path.join(logdir, output_file)
+    prefix, _ = os.path.splitext(output_file)
+    all_r_output_file = "{}_all_r.npy".format(prefix)
+    hof_output_file = "{}_hof.csv".format(prefix)
+    pf_output_file = "{}_pf.csv".format(prefix)
+    with open(output_file, 'w') as f:
+        # r_best : Maximum across all iterations so far
+        # r_max : Maximum across this iteration's batch
+        # r_avg_full : Average across this iteration's full batch (before taking epsilon subset)
+        # r_avg_sub : Average across this iteration's epsilon-subset batch
+        # n_unique_* : Number of unique Programs in batch
+        # n_novel_* : Number of never-before-seen Programs per batch
+        # a_ent_* : Empirical positional entropy across sequences averaged over positions
+        # invalid_avg_* : Fraction of invalid Programs per batch
+        headers = ["base_r_best",
+                    "base_r_max",
+                    "base_r_avg_full",
+                    "base_r_avg_sub",
+                    "r_best",
+                    "r_max",
+                    "r_avg_full",
+                    "r_avg_sub",
+                    "l_avg_full",
+                    "l_avg_sub",
+                    "ewma",
+                    "n_unique_full",
+                    "n_unique_sub",
+                    "n_novel_full",
+                    "n_novel_sub",
+                    "a_ent_full",
+                    "a_ent_sub",
+                    "invalid_avg_full",
+                    "invalid_avg_sub"]
+        f.write("{}\n".format(",".join(headers)))
+
+    return all_r_output_file, hof_output_file, pf_output_file
+
+
+class cached_property(object):
+    """
+    Decorator used for lazy evaluation of an object attribute. The property
+    should be non-mutable, since it replaces itself.
+    """
+
+    def __init__(self, getter):
+        self.getter = getter
+
+        functools.update_wrapper(self, getter)
+
+    def __get__(self, obj, cls):
+        if obj is None:
+            return self
+
+        value = self.getter(obj)
+        setattr(obj, self.getter.__name__, value)
+        return value
+
+
+# Entropy computation in batch
+def empirical_entropy(labels):
+
+    n_labels = len(labels)
+
+    if n_labels <= 1:
+        return 0
+
+    value, counts = np.unique(labels, return_counts=True)
+    probs = counts / n_labels
+    n_classes = np.count_nonzero(probs)
+
+    if n_classes <= 1:
+        return 0
+
+    ent = 0.
+    # Compute entropy
+    for i in probs:
+        ent -= i * np.log(i)
+
+    return ent
diff --git a/dsr/setup.py b/dsr/setup.py
new file mode 100644
index 00000000..e192d213
--- /dev/null
+++ b/dsr/setup.py
@@ -0,0 +1,16 @@
+from distutils.core import setup
+from Cython.Build import cythonize
+import numpy
+import os
+
+# To build cython code using setup try:
+# python setup.py build_ext --inplace
+
+setup(  name='dsr',
+        version='1.0dev',
+        description='Deep symbolic regression.',
+        author='LLNL',
+        packages=['dsr'],
+        ext_modules=cythonize([os.path.join('dsr','cyfunc.pyx')]), 
+        include_dirs=[numpy.get_include()]
+        )
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..c00628f1
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,12 @@
+pytest
+cython
+numpy
+tensorflow==1.14
+numba
+sympy
+pandas
+scikit-learn
+click
+mpi4py
+dataclasses
+