Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/hagabbar/vitamin_b
Browse files Browse the repository at this point in the history
  • Loading branch information
Hunter Arthur Gabbard committed Jul 31, 2020
2 parents 49774cd + 40b802f commit c62450b
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 237 deletions.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,14 @@ def readfile(filename):
return filecontents


VERSION = '0.2.7'
VERSION = '0.2.8'
version_file = write_version_file(VERSION)
long_description = get_long_description()


setup(
name='vitamin_b',
version='0.2.7',
version='0.2.8',
description='A user-friendly machine learning Bayesian inference library',
long_description=long_description,
long_description_content_type='text/markdown',
Expand Down
2 changes: 1 addition & 1 deletion vitamin_b/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

from . import run_vitamin

__version__ = "0.2.7"
__version__ = "0.2.8"
__author__ = 'Hunter Gabbard'
__credits__ = 'University of Glasgow'

5 changes: 4 additions & 1 deletion vitamin_b/models/CVAE_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,8 +676,10 @@ def truncnorm(i,lp): # we set up a function that adds the log-likelihoods and
plt.ylabel('cost')
plt.legend()
plt.savefig('%s/latest_%s/cost_%s.png' % (params['plot_dir'],params['run_label'],params['run_label']))
print('Saving unzoomed cost to -> %s/latest_%s/cost_%s.png' % (params['plot_dir'],params['run_label'],params['run_label']))
plt.ylim([np.min(np.array(plotdata)[-int(0.9*np.array(plotdata).shape[0]):,0]), np.max(np.array(plotdata)[-int(0.9*np.array(plotdata).shape[0]):,1])])
plt.savefig('%s/latest_%s/cost_zoom_%s.png' % (params['plot_dir'],params['run_label'],params['run_label']))
print('Saving zoomed cost to -> %s/latest_%s/cost_zoom_%s.png' % (params['plot_dir'],params['run_label'],params['run_label']))
plt.close('all')

except:
Expand Down Expand Up @@ -707,7 +709,6 @@ def truncnorm(i,lp): # we set up a function that adds the log-likelihoods and
# Save loss plot data
np.savetxt(save_dir.split('/')[0] + '/loss_data.txt', np.array(plotdata))
except FileNotFoundError as err:
print(err)
pass

if i % params['save_interval'] == 0 and i > 0:
Expand Down Expand Up @@ -767,7 +768,9 @@ def truncnorm(i,lp): # we set up a function that adds the log-likelihoods and


plt.savefig('%s/corner_plot_%s_%d-%d.png' % (params['plot_dir'],params['run_label'],i,j))
print('Saved corner plot iteration %d to -> %s/corner_plot_%s_%d-%d.png' % (i,params['plot_dir'],params['run_label'],i,j))
plt.savefig('%s/latest_%s/corner_plot_%s_%d.png' % (params['plot_dir'],params['run_label'],params['run_label'],j))
print('Saved latest corner plot to -> %s/latest_%s/corner_plot_%s_%d.png' % (params['plot_dir'],params['run_label'],params['run_label'],j))
plt.close('all')
print('Made corner plot %d' % j)

Expand Down
238 changes: 5 additions & 233 deletions vitamin_b/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,8 +801,9 @@ def plot_pp(self,model,sig_test,par_test,i_epoch,normscales,pos_test,bounds):
for l in leg.legendHandles:
l.set_alpha(1.0)
plt.tight_layout()
fig.savefig('%s/pp_plot_%04d.png' % (self.params['plot_dir'],i_epoch),dpi=360)
#fig.savefig('%s/pp_plot_%04d.png' % (self.params['plot_dir'],i_epoch),dpi=360)
fig.savefig('%s/latest_%s/latest_pp_plot.png' % (self.params['plot_dir'],self.params['run_label']),dpi=360)
print('Saved pp plot to -> %s/latest_%s/latest_pp_plot.png' % (self.params['plot_dir'],self.params['run_label']))
plt.close(fig)
# TODO add this back in
hf.close()
Expand Down Expand Up @@ -832,11 +833,12 @@ def plot_loss(self):
plt.legend()
plt.tight_layout()
plt.savefig('%s/latest_%s/cost_%s.png' % (self.params['plot_dir'],self.params['run_label'],self.params['run_label']),dpi=360)
print('Saved cost unzoomed plot to -> %s/latest_%s/cost_%s.png' % (self.params['plot_dir'],self.params['run_label'],self.params['run_label']))
plt.ylim([np.min(np.array(plotdata)[-int(0.9*np.array(plotdata).shape[0]):,0]), np.max(np.array(plotdata)[-int(0.9*np.array(plotdata).shape[0]):,1])])
plt.savefig('%s/latest_%s/cost_zoom_%s.png' % (self.params['plot_dir'],self.params['run_label'],self.params['run_label']),dpi=360)
print('Saved cost zoomed plot to -> %s/latest_%s/cost_zoom_%s.png' % (self.params['plot_dir'],self.params['run_label'],self.params['run_label']))
plt.close('all')


return

def gen_kl_plots(self,model,sig_test,par_test,normscales,bounds,snrs_test):
Expand Down Expand Up @@ -1123,235 +1125,5 @@ def my_kde_bandwidth(obj, fac=1.0):
fig_kl.savefig('%s/latest_%s/hist-kl.png' % (self.params['plot_dir'],self.params['run_label']),dpi=360)
plt.close(fig_kl)
hf.close()
return

"""
# Compute dynesty with itself once
sampler1 = 'dynesty1'
sampler2 = 'dynesty1'
logbins = np.logspace(-3,2.5,50)
time_dict = {}
set1,time_dict[sampler1] = self.load_test_set(model,sig_test,par_test,normscales,bounds,sampler=sampler1)
# set2,time_dict[sampler2] = self.load_test_set(model,sig_test,par_test,normscales,bounds,sampler=sampler2)
set2 = np.copy(set1)
tot_kl_std = []
tot_kl_mean = []
for r in range(self.params['r']**2):
std_kl, mean_kl = compute_kl(set1[r],set2[r],[sampler1,sampler2])
tot_kl_std.append(std_kl)
tot_kl_mean.append(mean_kl)
print(tot_kl_std[r])
print('Completed KL for set %s-%s and test sample %s' % (sampler1,sampler2,str(r)))
tot_kl_std = np.array(tot_kl_std)
tot_kl_mean = np.array(tot_kl_mean)
# linear plot
fig_kl, axis_kl = plt.subplots(1,1,figsize=(6,6))
axis_kl.hist(tot_kl_std,bins=50,histtype='stepfilled',density=True,color='grey')
axis_kl.set_xlabel(r'$\mathrm{KL-Statistic}$',fontsize=14)
axis_kl.set_ylabel(r'$p(\mathrm{KL})$',fontsize=14)
axis_kl.tick_params(axis="x", labelsize=14)
axis_kl.tick_params(axis="y", labelsize=14)
leg = axis_kl.legend(loc='upper right', fontsize=10) #'medium')
for l in leg.legendHandles:
l.set_alpha(1.0)
axis_kl.set_xlim(left=1e-2,right=1)
axis_kl.grid(False)
fig_kl.canvas.draw()
fig_kl.savefig('/home/hunter.gabbard/public_html/CBC/VItamin/dynesty_with_itself_linear.png')
plt.close()
# log plot
fig_kl, axis_kl = plt.subplots(1,1,figsize=(6,6))
axis_kl.hist(tot_kl_std,bins=logbins,histtype='stepfilled',density=True,color='grey')
axis_kl.set_xlabel(r'$\mathrm{KL-Statistic}$',fontsize=14)
axis_kl.set_ylabel(r'$p(\mathrm{KL})$',fontsize=14)
axis_kl.tick_params(axis="x", labelsize=14)
axis_kl.tick_params(axis="y", labelsize=14)
leg = axis_kl.legend(loc='upper right', fontsize=10) #'medium')
for l in leg.legendHandles:
l.set_alpha(1.0)
axis_kl.set_xlim(left=1e-2,right=1)
axis_kl.set_xscale('log')
axis_kl.set_yscale('log')
axis_kl.grid(False)
fig_kl.canvas.draw()
fig_kl.savefig('/home/hunter.gabbard/public_html/CBC/VItamin/dynesty_with_itself_log.png')
plt.close()
print()
print('Mean std KL is below:')
print(np.mean(tot_kl_std))
print()
print('Mean KL is below: ')
print(np.mean(tot_kl_mean))
print('Finished computing KL with itself')
exit()
"""

tot_kl_grey = np.array([])
fig_kl, axis_kl = plt.subplots(1,1,figsize=(6,6))
time_dict = {}
# single plot KL approach
for i in range(len(usesamps)):
for j in range(tmp_idx):

# Load appropriate test sets
if samplers[i] == samplers[::-1][j]:
print_cnt+=1
sampler1, sampler2 = samplers[i]+'1', samplers[::-1][j]+'1'

# currently not doing KL of approaches with themselves, so skip here
continue
if self.params['load_plot_data'] == False:
set1,time_dict[sampler1] = self.load_test_set(model,sig_test,par_test,normscales,bounds,sampler=sampler1)
set2,time_dict[sampler2] = self.load_test_set(model,sig_test,par_test,normscales,bounds,sampler=sampler2)
else:
sampler1, sampler2 = samplers[i]+'1', samplers[::-1][j]+'1'

if self.params['load_plot_data'] == False:
set1,time_dict[sampler1] = self.load_test_set(model,sig_test,par_test,normscales,bounds,sampler=sampler1,vitamin_pred_made=vi_pred_made)
set2,time_dict[sampler2] = self.load_test_set(model,sig_test,par_test,normscales,bounds,sampler=sampler2,vitamin_pred_made=vi_pred_made)

# check if vitamin test posteriors were generated for the first time
if sampler1 == 'vitamin1' and vi_pred_made == None:
vi_pred_made = [set1,time_dict[sampler1]]
elif sampler2 == 'vitamin1' and vi_pred_made == None:
vi_pred_made = [set2,time_dict[sampler2]]

if self.params['load_plot_data'] == True:
tot_kl = np.array(hf['%s-%s' % (sampler1,sampler2)])
else:

# Iterate over test cases
tot_kl = [] # total KL over all infered parameters
indi_kl = np.zeros((self.params['r']**2,len(self.params['inf_pars']))) # KL for each individual paramter

if self.params['make_indi_kl'] == True:
for r in range(self.params['r']**2):
indi_kl[r,:] = compute_kl(set1[r],set2[r],[sampler1,sampler2],one_D=True)
print('Completed KL for set %s-%s and test sample %s' % (sampler1,sampler2,str(r)))
for r in range(self.params['r']**2):
tot_kl.append(compute_kl(set1[r],set2[r],[sampler1,sampler2]))
print('Completed KL for set %s-%s and test sample %s' % (sampler1,sampler2,str(r)))
tot_kl = np.array(tot_kl)

if self.params['load_plot_data'] == False:

# Save results to h5py file
hf.create_dataset('%s-%s' % (sampler1,sampler2), data=tot_kl)

logbins = np.logspace(-3,2.5,50)
logbins_indi = np.logspace(-3,3,50)
#logbins = 50

if samplers[i] == 'vitamin' or samplers[::-1][j] == 'vitamin':
# print 10 worst and 10 best kl
print(tot_kl.argsort()[-15:][::-1])
print(np.sort(tot_kl)[-15:][::-1])
print(tot_kl.argsort()[:15][:])
print(np.sort(tot_kl)[:15][:])

# plot vitamin kls
axis_kl.hist(tot_kl,bins=logbins,alpha=0.5,histtype='stepfilled',density=True,color=CB_color_cycle[print_cnt],label=r'$\textrm{VItamin-%s}$' % (samplers[::-1][j]),zorder=2)
axis_kl.hist(tot_kl,bins=logbins,histtype='step',density=True,facecolor='None',ls='-',lw=2,edgecolor=CB_color_cycle[print_cnt],zorder=10)

if self.params['make_indi_kl'] == True:
# plot indi vitamin kls
for u in range(len(self.params['inf_pars'])):
indi_axis_kl[u].hist(indi_kl[:,u],bins=logbins_indi,alpha=0.5,histtype='stepfilled',density=True,color=CB_color_cycle[print_cnt],label=r'$\textrm{VItamin vs. %s}$' % (samplers[::-1][j]),zorder=2)
indi_axis_kl[u].hist(indi_kl[:,u],bins=logbins_indi,histtype='step',density=True,facecolor='None',ls='-',lw=0.5,edgecolor=CB_color_cycle[print_cnt],zorder=10)
print('Mean total KL vitamin vs bilby: %s' % str(np.mean(tot_kl)))

# Return the mean KL if doing hyperparameter optimization
if self.params['hyperparam_optim'] == True:
return np.mean(tot_kl)
else:
print(tot_kl.argsort()[-15:][::-1])
print(np.sort(tot_kl)[-15:][::-1])
print(tot_kl.argsort()[:15][:])
print(np.sort(tot_kl)[:15][:])


tot_kl_grey = np.append(tot_kl_grey,tot_kl)

if label_idx == 0:

if self.params['make_indi_kl'] == True:
# plot indi bayesian kls
for u in range(len(self.params['inf_pars'])):
indi_axis_kl[u].hist(indi_kl[:,u],bins=logbins_indi,alpha=0.8,histtype='stepfilled',density=True,color='grey',label=r'$\textrm{other samplers}$',zorder=1)

label_idx += 1
else:
if self.params['make_indi_kl'] == True:
# plot indi bayesian kls
for u in range(len(self.params['inf_pars'])):
indi_axis_kl[u].hist(indi_kl[:,u],bins=logbins_indi,alpha=0.8,histtype='stepfilled',density=True,color='grey',zorder=1)

if self.params['make_indi_kl'] == True:
# plot indi bayesian kls
for u in range(len(self.params['inf_pars'])):
indi_axis_kl[u].hist(indi_kl[:,u],bins=logbins_indi,histtype='step',density=True,facecolor='None',ls='-',lw=0.2,edgecolor='grey',zorder=1)

print('Mean total KL between bilby samps: %s' % str(np.mean(tot_kl)))

print('Completed KL calculation %d/%d' % (print_cnt,len(usesamps)*2))
print_cnt+=1

tmp_idx -= 1
if self.params['load_plot_data'] == False:
runtime[sampler1] = time_dict[sampler1]

# plot non indi bayesian kls
axis_kl.hist(tot_kl_grey,bins=logbins,alpha=0.8,histtype='stepfilled',density=True,color='grey',label=r'$\textrm{other samplers vs. other samplers}$',zorder=1)
# plot non indi bayesian kls
axis_kl.hist(tot_kl_grey,bins=logbins,histtype='step',density=True,facecolor='None',ls='-',lw=2,edgecolor='grey',zorder=1)

if self.params['load_plot_data'] == False:
# # Print sampler runtimes
for i in range(len(usesamps)):
hf.create_dataset('%s_runtime' % (samplers[i]), data=np.array(runtime[samplers[i]+'1']))
print('%s sampler runtimes: %s' % (samplers[i]+'1',str(runtime[samplers[i]+'1'])))



# Save KL histogram
axis_kl.set_xlabel(r'$\mathrm{KL-Statistic}$',fontsize=14)
axis_kl.set_ylabel(r'$p(\mathrm{KL})$',fontsize=14)
axis_kl.tick_params(axis="x", labelsize=14)
axis_kl.tick_params(axis="y", labelsize=14)
leg = axis_kl.legend(loc='upper right', fontsize=10) #'medium')
for l in leg.legendHandles:
l.set_alpha(1.0)

axis_kl.set_xlim(left=8e-2,right=100)
axis_kl.set_xscale('log')
axis_kl.set_yscale('log')
axis_kl.grid(False)
fig_kl.canvas.draw()
# fig_kl.savefig('%s/latest_%s/hist-kl.png' % (self.params['plot_dir'],self.params['run_label']),dpi=360)
plt.close(fig_kl)

if self.params['make_indi_kl'] == True:
# save indi kl histogram
for u in range(len(self.params['inf_pars'])):
indi_axis_kl[u].set_xlabel(r'$\mathrm{%s}$' % self.params['inf_pars'][u],fontsize=5)
indi_axis_kl[u].tick_params(axis="x", labelsize=5)
indi_axis_kl[u].tick_params(axis="y", labelsize=5)

indi_axis_kl[u].set_xlim(left=1e-3)
indi_axis_kl[u].set_xscale('log')
indi_axis_kl[u].set_yscale('log')
indi_axis_kl[u].grid(False)
indi_axis_kl[7].set_visible(False)
indi_axis_kl[8].set_visible(False)
indi_fig_kl.canvas.draw()
indi_fig_kl.savefig('%s/latest_%s/hist-kl_individual_par.png' % (self.params['plot_dir'],self.params['run_label']),dpi=360)
plt.close(indi_fig_kl)

hf.close()
print('Saved KL plot to -> %s/latest_%s/hist-kl.png' % (self.params['plot_dir'],self.params['run_label']))
return
3 changes: 3 additions & 0 deletions vitamin_b/run_vitamin.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,7 @@ def train(params=params,bounds=bounds,fixed_vals=fixed_vals,resume_training=Fals
plt.close('all')
plot_convergence(search_result)
plt.savefig('%s/latest_%s/hyperpar_convergence.png' % (params['plot_dir'],params['run_label']))
print('Saved hyperparameter convergence loss to -> %s/latest_%s/hyperpar_convergence.png' % (params['plot_dir'],params['run_label']))
print('Did a hyperparameter search')

# train using user defined params
Expand Down Expand Up @@ -1049,6 +1050,7 @@ def test(params=params,bounds=bounds,fixed_vals=fixed_vals):
plt.close()
del figure
print('Made corner plot: %s' % str(i+1))
print('Saved corner plot to -> %s/latest_%s/corner_plot_%s_%d.png' % (params['plot_dir'],params['run_label'],params['run_label'],i))

# Store ML predictions for later plotting use
VI_pred_all.append(VI_pred)
Expand Down Expand Up @@ -1199,6 +1201,7 @@ def gen_samples(params='params.txt',bounds='bounds.txt',fixed_vals='fixed_vals.t
figure = corner.corner(samples[0,:,:],truths=x_data[0,:],labels=parnames)
plt.savefig('./vitamin_example_corner.png')
plt.close()
print('Saved corner plot to -> ./vitamin_example_corner.png')

return samples, y_data_test, x_data

Expand Down

0 comments on commit c62450b

Please sign in to comment.