Merge pull request #424 from elfi-dev/dev

Release v0.8.4
elfi-dev · Jun 13, 2022 · 47cd2ae · 47cd2ae
2 parents b4663c8 + 6b1b17a
commit 47cd2ae
Show file tree

Hide file tree

Showing 27 changed files with 596 additions and 244 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,6 +1,29 @@
 Changelog
 =========
 
+0.8.4 (2021-06-13)
+------------------
+- Modify Lotka-Volterra model's priors as many methods do not support discrete random variables.
+- Fix acquisition index in state plot
+- Reformat `summary()` for `Sample(ParameterInferenceResult)`
+- Fix linting in `arch.py`
+- Add summary statistics to Lotka-Volterra model
+- Add boolean `observation_noise` option to `lotka_volterra`
+- Add parameter names as an optional input in model prior and fix the parameter order in priors used in BOLFI and BOLFIRE
+- Add feature names as an optional input and make training data size a required input in BOLFIRE
+- Fix the observed property in simulator nodes
+- Fix default outputs in generate
+- Add docstring description to ARCH-model
+- Make MAP estimates calculation in BOLFIRE optional and based on log-posterior
+- Use batch system to run simulations in BOLFIRE
+- Use `target_model.parameter_names` from instead of `model.parameter_names` in `BOLFIRE`
+- Extract BO results using `target_model.parameter_names` from instead of `model.parameter_names`
+- Update tox.ini
+- Add option to use additive acquisition cost in LCBSC
+- Change sigma_proposals-input in metropolis from list to dict
+- Fix is_array in utils
+- Fix acq_noise_var-bug in acquisition.py. Influenced BOLFI.
+
 0.8.3 (2021-02-17)
 ------------------
 - Add a new inference method: BOLFIRE

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1 +1,3 @@
 recursive-include elfi/examples/cpp *.txt *.cpp Makefile
+include docs/description.rst
+include requirements.txt
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-**Version 0.8.3 released!** See the [CHANGELOG](CHANGELOG.rst) and [notebooks](https://github.com/elfi-dev/notebooks).
+**Version 0.8.4 released!** See the [CHANGELOG](CHANGELOG.rst) and [notebooks](https://github.com/elfi-dev/notebooks).
 
 <img src="https://raw.githubusercontent.com/elfi-dev/elfi/dev/docs/logos/elfi_logo_text_nobg.png" width="200" />
 

diff --git a/docs/usage/BOLFI.rst b/docs/usage/BOLFI.rst
@@ -92,7 +92,7 @@ surface is updated after each batch (especially so if the noise is 0!).
 .. code:: ipython3
 
     bolfi = elfi.BOLFI(log_d, batch_size=1, initial_evidence=20, update_interval=10, 
-                       bounds={'t1':(-2, 2), 't2':(-1, 1)}, acq_noise_var=[0.1, 0.1], seed=seed)
+                       bounds={'t1':(-2, 2), 't2':(-1, 1)}, acq_noise_var=0.1, seed=seed)
 
 Sometimes you may have some samples readily available. You could then
 initialize the GP model with a dictionary of previous results by giving

diff --git a/elfi/__init__.py b/elfi/__init__.py
@@ -31,4 +31,4 @@
 __email__ = '[email protected]'
 
 # make sure __version_ is on the last non-empty line (read by setup.py)
-__version__ = '0.8.3'
+__version__ = '0.8.4'
diff --git a/elfi/examples/arch.py b/elfi/examples/arch.py
@@ -63,7 +63,21 @@ def get_model(n_obs=100, true_params=None, seed_obs=None, n_lags=5):
 
 
 def arch(t1, t2, n_obs=100, batch_size=1, random_state=None):
-    """Generate a sequence of samples from the ARCH(1) model.
+    r"""Generate a sequence of samples from the ARCH(1) regression model.
+
+    Autoregressive conditional heteroskedasticity (ARCH) sequence describes the variance
+    of the error term as a function of previous error terms.
+
+        x_i = t_1 x_{i-1} + \epsilon_i
+
+        \epsilon_i = w_i \sqrt{0.2 + t_2 \epsilon_{i-1}^2}
+
+    where w_i is white noise ~ N(0,1) independent of \epsilon_0 ~  N(0,1)
+
+    References
+    ----------
+    Engle, R.F. (1982). Autoregressive Conditional Heteroscedasticity with
+        Estimates of the Variance of United Kingdom Inflation. Econometrica, 50(4): 987-1007
 
     Parameters
     ----------
@@ -87,6 +101,7 @@ def arch(t1, t2, n_obs=100, batch_size=1, random_state=None):
     e = E(t2, n_obs, batch_size, random_state)
     for i in range(1, n_obs + 1):
         y[:, i] = t1 * y[:, i - 1] + e[:, i]
+
     return y[:, 1:]
 
 

diff --git a/elfi/examples/lotka_volterra.py b/elfi/examples/lotka_volterra.py
@@ -74,8 +74,10 @@ def lotka_volterra(r1, r2, r3, prey_init=50, predator_init=100, sigma=0., n_obs=
 
     n_full = 20000
     stock = np.empty((batch_size, n_full, 2), dtype=np.int32)
-    stock[:, 0, 0] = prey_init
-    stock[:, 0, 1] = predator_init
+    # As we use approximate continuous priors for prey_init and
+    # predator_init, we'll round them down to closest integers
+    stock[:, 0, 0] = np.floor(prey_init)
+    stock[:, 0, 1] = np.floor(predator_init)
     stoichiometry = np.array([[1, 0], [-1, 1], [0, -1], [0, 0]], dtype=np.int32)
     times = np.empty((batch_size, n_full))
     times[:, 0] = 0
@@ -141,9 +143,11 @@ def lotka_volterra(r1, r2, r3, prey_init=50, predator_init=100, sigma=0., n_obs=
     return stock_out
 
 
-def get_model(n_obs=50, true_params=None, seed_obs=None, **kwargs):
+def get_model(n_obs=50, true_params=None, observation_noise=False, seed_obs=None, **kwargs):
     """Return a complete Lotka-Volterra model in inference task.
 
+    Including observation noise to system is optional.
+
     Parameters
     ----------
     n_obs : int, optional
@@ -152,6 +156,8 @@ def get_model(n_obs=50, true_params=None, seed_obs=None, **kwargs):
         Parameters with which the observed data is generated.
     seed_obs : int, optional
         Seed for the observed data generation.
+    observation_noise : bool, optional
+        Whether or not add normal noise to observations.
 
     Returns
     -------
@@ -160,34 +166,117 @@ def get_model(n_obs=50, true_params=None, seed_obs=None, **kwargs):
     """
     logger = logging.getLogger()
     if true_params is None:
-        true_params = [1.0, 0.005, 0.6, 50, 100, 10.]
+        if observation_noise:
+            true_params = [1.0, 0.005, 0.6, 50, 100, 10.]
+        else:
+            true_params = [1.0, 0.005, 0.6, 50, 100, 0.]
+    else:
+        if observation_noise:
+            if len(true_params) != 6:
+                raise ValueError(
+                        "Option observation_noise = True."
+                        " Provide six input parameters."
+                        )
+        else:
+            if len(true_params) != 5:
+                raise ValueError(
+                        "Option observation_noise = False."
+                        " Provide five input parameters."
+                        )
+            true_params = true_params + [0]
 
     kwargs['n_obs'] = n_obs
     y_obs = lotka_volterra(*true_params, random_state=np.random.RandomState(seed_obs), **kwargs)
 
     m = elfi.ElfiModel()
     sim_fn = partial(lotka_volterra, **kwargs)
-    priors = []
-    sumstats = []
+    priors = [
+        elfi.Prior(ExpUniform, -6., 2., model=m, name='r1'),
+        elfi.Prior(ExpUniform, -6., 2., model=m, name='r2'),  # easily kills populations
+        elfi.Prior(ExpUniform, -6., 2., model=m, name='r3'),
+        elfi.Prior('normal', 50, np.sqrt(50), model=m, name='prey0'),
+        elfi.Prior('normal', 100, np.sqrt(100), model=m, name='predator0')
+    ]
 
-    priors.append(elfi.Prior(ExpUniform, -6., 2., model=m, name='r1'))
-    priors.append(elfi.Prior(ExpUniform, -6., 2., model=m, name='r2'))  # easily kills populations
-    priors.append(elfi.Prior(ExpUniform, -6., 2., model=m, name='r3'))
-    priors.append(elfi.Prior('poisson', 50, model=m, name='prey0'))
-    priors.append(elfi.Prior('poisson', 100, model=m, name='predator0'))
-    priors.append(elfi.Prior(ExpUniform, np.log(0.5), np.log(50), model=m, name='sigma'))
+    if observation_noise:
+        priors.append(elfi.Prior(ExpUniform, np.log(0.5), np.log(50), model=m, name='sigma'))
 
     elfi.Simulator(sim_fn, *priors, observed=y_obs, name='LV')
-    sumstats.append(elfi.Summary(partial(pick_stock, species=0), m['LV'], name='prey'))
-    sumstats.append(elfi.Summary(partial(pick_stock, species=1), m['LV'], name='predator'))
-    elfi.Distance('sqeuclidean', *sumstats, name='d')
+
+    sumstats = [
+        elfi.Summary(partial(stock_mean, species=0), m['LV'], name='prey_mean'),
+        elfi.Summary(partial(stock_mean, species=1), m['LV'], name='pred_mean'),
+        elfi.Summary(partial(stock_log_variance, species=0), m['LV'], name='prey_log_var'),
+        elfi.Summary(partial(stock_log_variance, species=1), m['LV'], name='pred_log_var'),
+        elfi.Summary(partial(stock_autocorr, species=0, lag=1), m['LV'], name='prey_autocorr_1'),
+        elfi.Summary(partial(stock_autocorr, species=1, lag=1), m['LV'], name='pred_autocorr_1'),
+        elfi.Summary(partial(stock_autocorr, species=0, lag=2), m['LV'], name='prey_autocorr_2'),
+        elfi.Summary(partial(stock_autocorr, species=1, lag=2), m['LV'], name='pred_autocorr_2'),
+        elfi.Summary(stock_crosscorr, m['LV'], name='crosscorr')
+    ]
+
+    elfi.Distance('euclidean', *sumstats, name='d')
 
     logger.info("Generated %i observations with true parameters r1: %.1f, r2: %.3f, r3: %.1f, "
                 "prey0: %i, predator0: %i, sigma: %.1f.", n_obs, *true_params)
 
     return m
 
 
+def stock_mean(stock, species=0, mu=0, std=1):
+    """Calculate the mean of the trajectory by species."""
+    stock = np.atleast_2d(stock[:, :, species])
+    mu_x = np.mean(stock, axis=1)
+
+    return (mu_x - mu) / std
+
+
+def stock_log_variance(stock, species=0, mu=0, std=1):
+    """Calculate the log variance of the trajectory by species."""
+    stock = np.atleast_2d(stock[:, :, species])
+    var_x = np.var(stock, axis=1, ddof=1)
+    log_x = np.log(var_x + 1)
+
+    return (log_x - mu) / std
+
+
+def stock_autocorr(stock, species=0, lag=1, mu=0, std=1):
+    """Calculate the autocorrelation of lag n of the trajectory by species."""
+    stock = np.atleast_2d(stock[:, :, species])
+    n_obs = stock.shape[1]
+
+    mu_x = np.mean(stock, axis=1, keepdims=True)
+    std_x = np.std(stock, axis=1, ddof=1, keepdims=True)
+    sx = ((stock - np.repeat(mu_x, n_obs, axis=1)) / np.repeat(std_x, n_obs, axis=1))
+    sx_t = sx[:, lag:]
+    sx_s = sx[:, :-lag]
+
+    C = np.sum(sx_t * sx_s, axis=1) / (n_obs - 1)
+
+    return (C - mu) / std
+
+
+def stock_crosscorr(stock, mu=0, std=1):
+    """Calculate the cross correlation of the species trajectories."""
+    n_obs = stock.shape[1]
+
+    x_preys = stock[:, :, 0]  # preys
+    x_preds = stock[:, :, 1]  # predators
+
+    mu_preys = np.mean(x_preys, axis=1, keepdims=True)
+    mu_preds = np.mean(x_preds, axis=1, keepdims=True)
+    std_preys = np.std(x_preys, axis=1, keepdims=True)
+    std_preds = np.std(x_preds, axis=1, keepdims=True)
+    s_preys = ((x_preys - np.repeat(mu_preys, n_obs, axis=1))
+               / np.repeat(std_preys, n_obs, axis=1))
+    s_preds = ((x_preds - np.repeat(mu_preds, n_obs, axis=1))
+               / np.repeat(std_preds, n_obs, axis=1))
+
+    C = np.sum(s_preys * s_preds, axis=1) / (n_obs - 1)
+
+    return (C - mu) / std
+
+
 class ExpUniform(elfi.Distribution):
     r"""Prior distribution for parameter.
 
@@ -235,20 +324,3 @@ def pdf(cls, x, a, b):
             p = np.where((x < np.exp(a)) | (x > np.exp(b)), 0, np.reciprocal(x))
             p /= (b - a)  # normalize
         return p
-
-
-def pick_stock(stock, species):
-    """Return the stock for single species.
-
-    Parameters
-    ----------
-    stock : np.array
-    species : int
-        0 for prey, 1 for predator.
-
-    Returns
-    -------
-    np.array
-
-    """
-    return stock[:, :, species]
diff --git a/elfi/executor.py b/elfi/executor.py
@@ -107,6 +107,9 @@ def get_execution_order(cls, G):
         # Filter those output nodes who have an operation to run
         needed = tuple(sorted(node for node in output_nodes if 'operation' in G.nodes[node]))
 
+        if len(needed) == 0:
+            return []
+
         if needed not in cache:
             # Resolve the nodes that need to be executed in the graph
             nodes_to_execute = set(needed)