Skip to content

Commit 31b13a8

Browse files
author
AG
committed
updates
1 parent 5551a09 commit 31b13a8

28 files changed

+9940
-2948
lines changed

Diff for: bayesian_transporter/__init__.py

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from . import analysis_functions
2+
from . import run_emcee
3+
from . import run_optimizer
4+
from . import run_pocomc
5+
from . import ssme_function
6+
from . import utility_functions
7+
8+
"""
9+
Bayesian transporter research code documentation.
10+
11+
See `example_notebook.ipynb` for example usage.
12+
"""

Diff for: bayesian_transporter/analysis_functions.py

+160-4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import numpy as np
33
from scipy.stats import uniform
44
import matplotlib.pyplot as plt
5+
import math
56

67

78
def estimate_multivariate_density_w_GMM(samples, name, k_max=30, verbose=False, plot=False):
@@ -56,8 +57,8 @@ def estimate_multivariate_density_w_GMM(samples, name, k_max=30, verbose=False,
5657
plt.title(f'{name}')
5758
plt.savefig(f'{name}.png')
5859
if verbose:
59-
print(f"AIC min: k=gmm_best_aic_idx+1, AIC={aics[gmm_best_aic_idx]}")
60-
print(f"BIC min: k=gmm_best_bic_idx+1, AIC={bics[gmm_best_bic_idx]}")
60+
print(f"AIC min: k={gmm_best_aic_idx+1}, AIC={aics[gmm_best_aic_idx]}")
61+
print(f"BIC min: k={gmm_best_bic_idx+1}, AIC={bics[gmm_best_bic_idx]}")
6162
return gmm_best_aic, gmm_best_bic
6263

6364

@@ -84,7 +85,6 @@ def kl_divergence_gmm_uniform(gmm, unfiform_prior_bounds, name, n_samples=10**6,
8485
Samples outside the specified bounds for the uniform distribution are discarded.
8586
"""
8687

87-
8888
samples = gmm.sample(n_samples)[0]
8989
valid_samples = np.all([(samples[:, i] >= r[0]) & (samples[:, i] <= r[1]) for i, r in enumerate(unfiform_prior_bounds)], axis=0)
9090
samples = samples[valid_samples]
@@ -93,4 +93,160 @@ def kl_divergence_gmm_uniform(gmm, unfiform_prior_bounds, name, n_samples=10**6,
9393
kl_divergence = np.mean(log_gmm_pdf - log_uniform_pdf)
9494
if verbose:
9595
print(f"KL divergence of {name} = {kl_divergence}")
96-
return kl_divergence
96+
return kl_divergence
97+
98+
99+
def plot_1D_distributions(sample_arrays, sample_labels, parameter_names, parameter_ranges, parameter_nominals, bins=100, title="1D Parameter Distribution"):
100+
"""
101+
Plots 1D parameter distributions with overlay of different samples and returns the figure object.
102+
103+
Args:
104+
sample_arrays (list[np.ndarray]): List of 2D sample arrays to be plotted, where each row represents a sample and each column represents a parameter.
105+
sample_labels (list[str]): List of labels corresponding to each sample array.
106+
parameter_names (list[str]): List of parameter names.
107+
parameter_ranges (list[tuple]): List of (min, max) ranges for each parameter.
108+
parameter_nominals (list[float]): List of nominal values for each parameter.
109+
title (str, optional): Title of the plot. Default is "1D Parameter Distribution".
110+
111+
Returns:
112+
matplotlib.figure.Figure: The figure object containing the plotted distributions.
113+
114+
Raises:
115+
AssertionError: If the number of sample arrays does not match the number of labels.
116+
"""
117+
118+
# Ensure the number of labels match the number of sample arrays
119+
assert len(sample_arrays) == len(sample_labels), "Mismatch between number of sample arrays and labels."
120+
121+
num_cols = math.ceil(math.sqrt(len(parameter_names)))
122+
num_rows = math.ceil(len(parameter_names) / num_cols)
123+
124+
fig, axs = plt.subplots(num_rows, num_cols, figsize=(3 * num_cols, 2 * num_rows))
125+
axs = axs.flatten()
126+
127+
for param_idx in range(len(parameter_names)):
128+
for i, sample_array in enumerate(sample_arrays):
129+
axs[param_idx].hist(sample_array[:, param_idx], bins=bins, alpha=0.5, density=True, histtype='step', label=sample_labels[i], range=parameter_ranges[param_idx])
130+
axs[param_idx].axvline(parameter_nominals[param_idx], linestyle='--', color='k', linewidth=1)
131+
axs[param_idx].set_xlabel(parameter_names[param_idx])
132+
axs[param_idx].set_ylabel('Density')
133+
134+
axs[0].legend()
135+
fig.suptitle(title)
136+
plt.tight_layout()
137+
plt.subplots_adjust(top=0.9)
138+
return fig
139+
140+
141+
def plot_2D_corner(sample_arrays, sample_labels, parameter_names, parameter_ranges, parameter_nominals=None, bins=100, title="Corner Plot"):
142+
"""
143+
Plots a 2D corner plot with 2D density histograms off-diagonal and 1D histograms on the diagonal.
144+
145+
Args:
146+
sample_arrays (list[np.ndarray]): List of 2D sample arrays to be plotted, where each row represents a sample and each column represents a parameter.
147+
sample_labels (list[str]): List of labels corresponding to each sample array.
148+
parameter_names (list[str]): List of parameter names.
149+
parameter_ranges (list[tuple]): List of (min, max) ranges for each parameter.
150+
parameter_nominals (list[float], optional): List of nominal (reference) values for each parameter.
151+
bins (int or list): Number of bins or a list of bin edges for the histograms.
152+
title (str, optional): Title of the plot. Default is "Corner Plot".
153+
154+
Returns:
155+
matplotlib.figure.Figure: The figure object containing the plotted distributions.
156+
157+
Raises:
158+
AssertionError: If the number of sample arrays does not match the number of labels.
159+
"""
160+
161+
# Ensure the number of labels match the number of sample arrays
162+
assert len(sample_arrays) == len(sample_labels), "Mismatch between number of sample arrays and labels."
163+
164+
num_params = len(parameter_names)
165+
fig, axs = plt.subplots(num_params, num_params, figsize=(3 * num_params, 3 * num_params))
166+
167+
for row in range(num_params):
168+
for col in range(num_params):
169+
ax = axs[row, col]
170+
171+
# Hide plots in the upper triangle
172+
if row < col:
173+
ax.axis('off')
174+
continue
175+
176+
# Diagonal: 1D histograms
177+
if row == col:
178+
for i, sample_array in enumerate(sample_arrays):
179+
ax.hist(sample_array[:, col], bins=bins, alpha=0.5, density=True, histtype='step', label=sample_labels[i], range=parameter_ranges[col])
180+
ax.set_xlim(*parameter_ranges[col])
181+
ax.set_xlabel(parameter_names[col])
182+
if parameter_nominals:
183+
ax.axvline(parameter_nominals[col], linestyle='--', color='k', linewidth=1)
184+
185+
# Off-diagonal: 2D histograms
186+
else:
187+
for i, sample_array in enumerate(sample_arrays):
188+
hist2d_params = {
189+
"bins": bins,
190+
"range": [parameter_ranges[col], parameter_ranges[row]],
191+
"cmap": 'Blues',
192+
"density": True
193+
}
194+
ax.hist2d(sample_array[:, col], sample_array[:, row], **hist2d_params)
195+
ax.set_xlim(*parameter_ranges[col])
196+
ax.set_ylim(*parameter_ranges[row])
197+
ax.set_xlabel(parameter_names[col])
198+
ax.set_ylabel(parameter_names[row])
199+
if parameter_nominals:
200+
ax.axvline(parameter_nominals[col], linestyle='--', color='k', linewidth=1)
201+
ax.axhline(parameter_nominals[row], linestyle='--', color='k', linewidth=1)
202+
203+
# We set the legend on one of the diagonal plots for compactness
204+
axs[0,0].legend(loc='upper right')
205+
fig.suptitle(title)
206+
plt.tight_layout()
207+
plt.subplots_adjust(top=0.95)
208+
return fig
209+
210+
211+
def plot_random_sample_predictions(samples, data_gen_func, observed_data, N, gen_func_args=None, title=None):
212+
"""
213+
Draws N random samples from the given sample array, generates predicted datasets
214+
for each sample, and plots them against the observed dataset.
215+
216+
Args:
217+
samples (np.ndarray): Sample array where each row is a sample.
218+
data_gen_func (function): Data generation function that takes in a sample and additional arguments.
219+
observed_data (np.ndarray): Observed data set for reference.
220+
N (int): Number of random samples to be drawn.
221+
gen_func_args (dict, optional): Additional arguments to be passed to the data generation function.
222+
223+
Returns:
224+
matplotlib.figure.Figure: The figure object containing the plotted predictions and observed data.
225+
"""
226+
227+
if gen_func_args is None:
228+
gen_func_args = {}
229+
230+
# Check if the number of samples requested is valid
231+
total_samples = samples.shape[0]
232+
if N <= 0 or N > total_samples:
233+
raise ValueError(f"Invalid number of samples requested: {N}. It should be between 1 and {total_samples}.")
234+
235+
# Select N random samples
236+
random_samples = samples[np.random.choice(total_samples, N, replace=False)]
237+
238+
plt.figure(figsize=(12, 6))
239+
240+
# Generate and plot predicted data for each random sample
241+
for sample in random_samples:
242+
predicted_data = data_gen_func(sample, **gen_func_args)
243+
plt.plot(predicted_data, 'b-', alpha=0.25)
244+
245+
# Plot observed data for comparison
246+
plt.plot(observed_data, 'ro', label="Observed Data", alpha=0.45)
247+
plt.legend()
248+
if title:
249+
plt.title(f"{title}")
250+
else:
251+
plt.title(f"{N} Randomly Selected Predictions vs Observed Data")
252+
return plt.gcf()

Diff for: bayesian_transporter/run_emcee.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
import scipy as sp
44
import emcee
55
import yaml
6-
import ssme_function as ssme
6+
from . import ssme_function as ssme
77
import tellurium as te
88
import os
99
import shutil
1010
import datetime
1111
import logging
1212
import corner
13-
import utility_functions as uf
13+
from . import utility_functions as uf
1414
import os
1515

1616

@@ -230,6 +230,6 @@ def run_sampler(config_fname):
230230
if __name__ == '__main__':
231231

232232
##### Adjust this if needed #####
233-
example_config = "/example/antiporter_1_1_12D_cycle1_config.yaml"
233+
example_config = "./example/antiporter_1_1_12D_cycle1_config.yaml"
234234
run_sampler(example_config)
235235

Diff for: bayesian_transporter/run_optimizer.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import numpy as np
33
import scipy as sp
44
import yaml
5-
import ssme_function as ssme
5+
from . import ssme_function as ssme
66
import tellurium as te
77
import os
88
import shutil
@@ -12,7 +12,7 @@
1212
from tqdm import tqdm
1313
from scipy.optimize import basinhopping, dual_annealing, shgo, minimize
1414
import inspect
15-
import utility_functions as uf
15+
from . import utility_functions as uf
1616

1717

1818

@@ -817,5 +817,5 @@ def run_optimizer(config_fname):
817817
if __name__ == '__main__':
818818

819819
##### Adjust this if needed #####
820-
example_config = "/example/antiporter_1_1_12D_cycle1_config.yaml"
820+
example_config = "./example/antiporter_1_1_12D_cycle1_config.yaml"
821821
run_optimizer(example_config)

Diff for: bayesian_transporter/run_pocomc.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import scipy as sp
44
import pocomc as pmc
55
import yaml
6-
import ssme_function as ssme
6+
from . import ssme_function as ssme
77
import tellurium as te
88
import os
99
import shutil
@@ -12,7 +12,7 @@
1212
import corner
1313
import arviz as az
1414
import os
15-
import utility_functions as uf
15+
from . import utility_functions as uf
1616

1717

1818
os.environ['KMP_DUPLICATE_LIB_OK']='True' # may be needed depending on installation of math library
@@ -254,7 +254,7 @@ def run_sampler(config_fname):
254254
if __name__ == '__main__':
255255

256256
##### Adjust this if needed #####
257-
example_config = "/example/antiporter_1_1_12D_cycle1_config.yaml"
257+
example_config = "./example/antiporter_1_1_12D_cycle1_config.yaml"
258258
run_sampler(example_config)
259259

260260

Diff for: bayesian_transporter/utility_functions.py

+70-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import numpy as np
22
import scipy as sp
3-
import ssme_function as ssme
3+
from . import ssme_function as ssme
44
import time as time
5+
import yaml
6+
import tellurium as te
57

68

79
def get_p0(b_list, n):
@@ -283,4 +285,70 @@ def negative_log_likelihood_wrapper_extended(params, rr_model, y_obs, initial_co
283285
"""
284286
logl = log_like_extended(params, rr_model, y_obs, initial_conditions, initial_conditions_scale, buffer_concentration_scale, solver_arguments)
285287
logpr = log_prior(params, param_lb, param_ub)
286-
return -1*(logl+logpr)
288+
return -1*(logl+logpr)
289+
290+
291+
def get_ssme_pred_data_from_config(params, config_file):
292+
"""
293+
Simulates a model based on parameters and a configuration file, returning the model's predictions.
294+
295+
This function sets up a RoadRunner model, adjusts its parameters based on the provided `params`
296+
and the `extended` field of the configuration, then simulates the model. If the simulation
297+
is successful, the function returns the simulated data. Otherwise, it returns a zero array
298+
of the same length as the observational data specified in the config file.
299+
300+
Args:
301+
params (list[float]): List of model parameters.
302+
config_file (str): Path to the YAML configuration file which contains model, data paths,
303+
and other settings.
304+
305+
Returns:
306+
list[float]: Predicted data from the model simulation. If simulation fails, returns
307+
a zero array of the same length as the observed data.
308+
"""
309+
310+
with open(config_file, "r") as f:
311+
config = yaml.safe_load(f)
312+
313+
model_file = config['model_file']
314+
data_file = config['data_file']
315+
simulation_kwargs = config['solver_arguments']
316+
seed = config['random_seed']
317+
np.random.seed(seed)
318+
extended = config['bayesian_inference']['extended']
319+
initial_conditions = config['solver_arguments']['species_initial_concentrations']
320+
initial_conditions_scale = config['solver_arguments']['species_initial_concentrations_scale']
321+
buffer_concentration_scale = config['solver_arguments']['buffer_concentration_scale']
322+
323+
# load roadrunner model
324+
rr_model = te.loadSBMLModel(model_file)
325+
326+
# set parameters for ODE (SBML) model, depending on how many nuisance parameters to use
327+
if extended:
328+
# additional scaling and bias terms
329+
k = [10**i for i in params[:-5]]
330+
sigma = 10**params[-1]
331+
bias = params[-2]
332+
H_out_buffer_scale = params[-5]
333+
S_out_buffer_scale = params[-4]
334+
initial_transporter_concentration_scale = params[-3]
335+
else:
336+
k = [10**i for i in params[:-1]]
337+
sigma = 10**params[-1]
338+
bias = 1
339+
H_out_buffer_scale = 1
340+
S_out_buffer_scale = 1
341+
initial_transporter_concentration_scale = 1
342+
343+
# set concentration uncertainity
344+
buffer_concentration_scale[0] = H_out_buffer_scale
345+
buffer_concentration_scale[1] = S_out_buffer_scale
346+
initial_conditions_scale[0] = initial_transporter_concentration_scale
347+
348+
y_obs = np.loadtxt(data_file, delimiter=',')
349+
try:
350+
res = ssme.simulate_assay(rr_model, k, initial_conditions, initial_conditions_scale, buffer_concentration_scale, simulation_kwargs)
351+
y_pred = bias*res[1]
352+
return y_pred
353+
except:
354+
return [0]*len(y_obs)

0 commit comments

Comments
 (0)