ZuckermanLab
diff --git a/‎bayesian_transporter/__init__.py
Lines changed: 12 additions & 0 deletions b/‎bayesian_transporter/__init__.py
Lines changed: 12 additions & 0 deletions
diff --git a/‎bayesian_transporter/analysis_functions.py
Lines changed: 160 additions & 4 deletions b/‎bayesian_transporter/analysis_functions.py
Lines changed: 160 additions & 4 deletions
diff --git a/‎bayesian_transporter/run_emcee.py
Lines changed: 3 additions & 3 deletions b/‎bayesian_transporter/run_emcee.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎bayesian_transporter/run_optimizer.py
Lines changed: 3 additions & 3 deletions b/‎bayesian_transporter/run_optimizer.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎bayesian_transporter/run_pocomc.py
Lines changed: 3 additions & 3 deletions b/‎bayesian_transporter/run_pocomc.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎bayesian_transporter/utility_functions.py
Lines changed: 70 additions & 2 deletions b/‎bayesian_transporter/utility_functions.py
Lines changed: 70 additions & 2 deletions
@@ -0,0 +1,12 @@
+from . import analysis_functions
+from . import run_emcee
+from . import run_optimizer
+from . import run_pocomc
+from . import ssme_function
+from . import utility_functions
+
+"""
+Bayesian transporter research code documentation. 
+
+See `example_notebook.ipynb` for example usage.
+"""
@@ -2,6 +2,7 @@
 import numpy as np
 from scipy.stats import uniform
 import matplotlib.pyplot as plt
+import math
 
 
 def estimate_multivariate_density_w_GMM(samples, name, k_max=30, verbose=False, plot=False):
@@ -56,8 +57,8 @@ def estimate_multivariate_density_w_GMM(samples, name, k_max=30, verbose=False,
         plt.title(f'{name}')
         plt.savefig(f'{name}.png')
     if verbose:
-        print(f"AIC min: k=gmm_best_aic_idx+1, AIC={aics[gmm_best_aic_idx]}")
-        print(f"BIC min: k=gmm_best_bic_idx+1, AIC={bics[gmm_best_bic_idx]}")
+        print(f"AIC min: k={gmm_best_aic_idx+1}, AIC={aics[gmm_best_aic_idx]}")
+        print(f"BIC min: k={gmm_best_bic_idx+1}, AIC={bics[gmm_best_bic_idx]}")
     return gmm_best_aic, gmm_best_bic
 
 
@@ -84,7 +85,6 @@ def kl_divergence_gmm_uniform(gmm, unfiform_prior_bounds, name, n_samples=10**6,
         Samples outside the specified bounds for the uniform distribution are discarded.
     """
 
-    
     samples = gmm.sample(n_samples)[0]
     valid_samples = np.all([(samples[:, i] >= r[0]) & (samples[:, i] <= r[1]) for i, r in enumerate(unfiform_prior_bounds)], axis=0)
     samples = samples[valid_samples]
@@ -93,4 +93,160 @@ def kl_divergence_gmm_uniform(gmm, unfiform_prior_bounds, name, n_samples=10**6,
     kl_divergence = np.mean(log_gmm_pdf - log_uniform_pdf)
     if verbose:
         print(f"KL divergence of {name} = {kl_divergence}")
-    return kl_divergence
+    return kl_divergence
+
+
+def plot_1D_distributions(sample_arrays, sample_labels, parameter_names, parameter_ranges, parameter_nominals, bins=100, title="1D Parameter Distribution"):
+    """
+    Plots 1D parameter distributions with overlay of different samples and returns the figure object.
+
+    Args:
+        sample_arrays (list[np.ndarray]): List of 2D sample arrays to be plotted, where each row represents a sample and each column represents a parameter.
+        sample_labels (list[str]): List of labels corresponding to each sample array.
+        parameter_names (list[str]): List of parameter names.
+        parameter_ranges (list[tuple]): List of (min, max) ranges for each parameter.
+        parameter_nominals (list[float]): List of nominal values for each parameter.
+        title (str, optional): Title of the plot. Default is "1D Parameter Distribution".
+
+    Returns:
+        matplotlib.figure.Figure: The figure object containing the plotted distributions.
+
+    Raises:
+        AssertionError: If the number of sample arrays does not match the number of labels.
+    """
+
+    # Ensure the number of labels match the number of sample arrays
+    assert len(sample_arrays) == len(sample_labels), "Mismatch between number of sample arrays and labels."
+
+    num_cols = math.ceil(math.sqrt(len(parameter_names)))
+    num_rows = math.ceil(len(parameter_names) / num_cols)
+    
+    fig, axs = plt.subplots(num_rows, num_cols, figsize=(3 * num_cols, 2 * num_rows))
+    axs = axs.flatten()
+
+    for param_idx in range(len(parameter_names)):
+        for i, sample_array in enumerate(sample_arrays):
+            axs[param_idx].hist(sample_array[:, param_idx], bins=bins, alpha=0.5, density=True, histtype='step', label=sample_labels[i], range=parameter_ranges[param_idx])
+        axs[param_idx].axvline(parameter_nominals[param_idx], linestyle='--', color='k', linewidth=1)
+        axs[param_idx].set_xlabel(parameter_names[param_idx])
+        axs[param_idx].set_ylabel('Density')
+
+    axs[0].legend()
+    fig.suptitle(title)
+    plt.tight_layout()
+    plt.subplots_adjust(top=0.9)
+    return fig
+
+
+def plot_2D_corner(sample_arrays, sample_labels, parameter_names, parameter_ranges, parameter_nominals=None, bins=100, title="Corner Plot"):
+    """
+    Plots a 2D corner plot with 2D density histograms off-diagonal and 1D histograms on the diagonal.
+
+    Args:
+        sample_arrays (list[np.ndarray]): List of 2D sample arrays to be plotted, where each row represents a sample and each column represents a parameter.
+        sample_labels (list[str]): List of labels corresponding to each sample array.
+        parameter_names (list[str]): List of parameter names.
+        parameter_ranges (list[tuple]): List of (min, max) ranges for each parameter.
+        parameter_nominals (list[float], optional): List of nominal (reference) values for each parameter.
+        bins (int or list): Number of bins or a list of bin edges for the histograms.
+        title (str, optional): Title of the plot. Default is "Corner Plot".
+
+    Returns:
+        matplotlib.figure.Figure: The figure object containing the plotted distributions.
+
+    Raises:
+        AssertionError: If the number of sample arrays does not match the number of labels.
+    """
+
+    # Ensure the number of labels match the number of sample arrays
+    assert len(sample_arrays) == len(sample_labels), "Mismatch between number of sample arrays and labels."
+
+    num_params = len(parameter_names)
+    fig, axs = plt.subplots(num_params, num_params, figsize=(3 * num_params, 3 * num_params))
+
+    for row in range(num_params):
+        for col in range(num_params):
+            ax = axs[row, col]
+            
+            # Hide plots in the upper triangle
+            if row < col:
+                ax.axis('off')
+                continue
+            
+            # Diagonal: 1D histograms
+            if row == col:
+                for i, sample_array in enumerate(sample_arrays):
+                    ax.hist(sample_array[:, col], bins=bins, alpha=0.5, density=True, histtype='step', label=sample_labels[i], range=parameter_ranges[col])
+                ax.set_xlim(*parameter_ranges[col])
+                ax.set_xlabel(parameter_names[col])
+                if parameter_nominals:
+                    ax.axvline(parameter_nominals[col], linestyle='--', color='k', linewidth=1)
+            
+            # Off-diagonal: 2D histograms
+            else:
+                for i, sample_array in enumerate(sample_arrays):
+                    hist2d_params = {
+                        "bins": bins,
+                        "range": [parameter_ranges[col], parameter_ranges[row]],
+                        "cmap": 'Blues',
+                        "density": True
+                    }
+                    ax.hist2d(sample_array[:, col], sample_array[:, row], **hist2d_params)
+                ax.set_xlim(*parameter_ranges[col])
+                ax.set_ylim(*parameter_ranges[row])
+                ax.set_xlabel(parameter_names[col])
+                ax.set_ylabel(parameter_names[row])
+                if parameter_nominals:
+                    ax.axvline(parameter_nominals[col], linestyle='--', color='k', linewidth=1)
+                    ax.axhline(parameter_nominals[row], linestyle='--', color='k', linewidth=1)
+
+    # We set the legend on one of the diagonal plots for compactness
+    axs[0,0].legend(loc='upper right')
+    fig.suptitle(title)
+    plt.tight_layout()
+    plt.subplots_adjust(top=0.95)
+    return fig
+
+
+def plot_random_sample_predictions(samples, data_gen_func, observed_data, N, gen_func_args=None, title=None):
+    """
+    Draws N random samples from the given sample array, generates predicted datasets 
+    for each sample, and plots them against the observed dataset.
+    
+    Args:
+        samples (np.ndarray): Sample array where each row is a sample.
+        data_gen_func (function): Data generation function that takes in a sample and additional arguments.
+        observed_data (np.ndarray): Observed data set for reference.
+        N (int): Number of random samples to be drawn.
+        gen_func_args (dict, optional): Additional arguments to be passed to the data generation function.
+        
+    Returns:
+        matplotlib.figure.Figure: The figure object containing the plotted predictions and observed data.
+    """
+    
+    if gen_func_args is None:
+        gen_func_args = {}
+    
+    # Check if the number of samples requested is valid
+    total_samples = samples.shape[0]
+    if N <= 0 or N > total_samples:
+        raise ValueError(f"Invalid number of samples requested: {N}. It should be between 1 and {total_samples}.")
+    
+    # Select N random samples
+    random_samples = samples[np.random.choice(total_samples, N, replace=False)]
+    
+    plt.figure(figsize=(12, 6))
+    
+    # Generate and plot predicted data for each random sample
+    for sample in random_samples:
+        predicted_data = data_gen_func(sample, **gen_func_args)
+        plt.plot(predicted_data, 'b-', alpha=0.25)
+    
+    # Plot observed data for comparison
+    plt.plot(observed_data, 'ro', label="Observed Data", alpha=0.45)
+    plt.legend()
+    if title:
+        plt.title(f"{title}")
+    else:
+        plt.title(f"{N} Randomly Selected Predictions vs Observed Data")
+    return plt.gcf()
@@ -3,14 +3,14 @@
 import scipy as sp
 import emcee 
 import yaml
-import ssme_function as ssme
+from . import ssme_function as ssme
 import tellurium as te
 import os 
 import shutil
 import datetime
 import logging
 import corner
-import utility_functions as uf
+from . import utility_functions as uf
 import os
 
 
@@ -230,6 +230,6 @@ def run_sampler(config_fname):
 if __name__ == '__main__':
 
     ##### Adjust this if needed ##### 
-    example_config = "/example/antiporter_1_1_12D_cycle1_config.yaml"
+    example_config = "./example/antiporter_1_1_12D_cycle1_config.yaml"
     run_sampler(example_config)
 
@@ -2,7 +2,7 @@
 import numpy as np 
 import scipy as sp
 import yaml
-import ssme_function as ssme
+from . import ssme_function as ssme
 import tellurium as te
 import os 
 import shutil
@@ -12,7 +12,7 @@
 from tqdm import tqdm
 from scipy.optimize import basinhopping, dual_annealing, shgo, minimize
 import inspect
-import utility_functions as uf
+from . import utility_functions as uf
 
 
 
@@ -817,5 +817,5 @@ def run_optimizer(config_fname):
 if __name__ == '__main__':
 
     ##### Adjust this if needed ##### 
-    example_config = "/example/antiporter_1_1_12D_cycle1_config.yaml"
+    example_config = "./example/antiporter_1_1_12D_cycle1_config.yaml"
     run_optimizer(example_config)
@@ -3,7 +3,7 @@
 import scipy as sp
 import pocomc as pmc 
 import yaml
-import ssme_function as ssme
+from . import ssme_function as ssme
 import tellurium as te
 import os 
 import shutil
@@ -12,7 +12,7 @@
 import corner
 import arviz as az
 import os 
-import utility_functions as uf
+from . import utility_functions as uf
 
 
 os.environ['KMP_DUPLICATE_LIB_OK']='True'  # may be needed depending on installation of math library
@@ -254,7 +254,7 @@ def run_sampler(config_fname):
 if __name__ == '__main__':
 
     ##### Adjust this if needed ##### 
-    example_config = "/example/antiporter_1_1_12D_cycle1_config.yaml"
+    example_config = "./example/antiporter_1_1_12D_cycle1_config.yaml"
     run_sampler(example_config)
 
 
@@ -1,7 +1,9 @@
 import numpy as np 
 import scipy as sp
-import ssme_function as ssme
+from . import ssme_function as ssme
 import time as time
+import yaml
+import tellurium as te
 
 
 def get_p0(b_list, n):
@@ -283,4 +285,70 @@ def negative_log_likelihood_wrapper_extended(params, rr_model, y_obs, initial_co
     """
     logl = log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
     logpr = log_prior(params, param_lb, param_ub)
-    return -1*(logl+logpr)
+    return -1*(logl+logpr)
+
+
+def get_ssme_pred_data_from_config(params, config_file):   
+    """
+    Simulates a model based on parameters and a configuration file, returning the model's predictions.
+
+    This function sets up a RoadRunner model, adjusts its parameters based on the provided `params` 
+    and the `extended` field of the configuration, then simulates the model. If the simulation 
+    is successful, the function returns the simulated data. Otherwise, it returns a zero array 
+    of the same length as the observational data specified in the config file.
+
+    Args:
+        params (list[float]): List of model parameters.
+        config_file (str): Path to the YAML configuration file which contains model, data paths,
+                           and other settings.
+
+    Returns:
+        list[float]: Predicted data from the model simulation. If simulation fails, returns 
+                     a zero array of the same length as the observed data.
+    """
+
+    with open(config_file, "r") as f:
+        config = yaml.safe_load(f)
+
+    model_file = config['model_file']
+    data_file = config['data_file']
+    simulation_kwargs = config['solver_arguments']
+    seed = config['random_seed']
+    np.random.seed(seed)
+    extended = config['bayesian_inference']['extended']
+    initial_conditions = config['solver_arguments']['species_initial_concentrations']
+    initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale']
+    buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale']
+   
+    # load roadrunner model
+    rr_model = te.loadSBMLModel(model_file)
+
+    # set parameters for ODE (SBML) model, depending on how many nuisance parameters to use
+    if extended:
+        # additional scaling and bias terms
+        k = [10**i for i in params[:-5]]
+        sigma = 10**params[-1]
+        bias = params[-2]
+        H_out_buffer_scale = params[-5]
+        S_out_buffer_scale = params[-4]
+        initial_transporter_concentration_scale = params[-3]
+    else:
+        k = [10**i for i in params[:-1]]
+        sigma = 10**params[-1]
+        bias = 1
+        H_out_buffer_scale = 1
+        S_out_buffer_scale = 1
+        initial_transporter_concentration_scale = 1
+
+    # set concentration uncertainity
+    buffer_concentration_scale[0] = H_out_buffer_scale
+    buffer_concentration_scale[1] = S_out_buffer_scale
+    initial_conditions_scale[0] = initial_transporter_concentration_scale
+
+    y_obs = np.loadtxt(data_file, delimiter=',')
+    try:
+        res = ssme.simulate_assay(rr_model, k, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
+        y_pred = bias*res[1]
+        return y_pred
+    except:
+        return [0]*len(y_obs)