-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathpippi_parse.py
743 lines (636 loc) · 35.7 KB
/
pippi_parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
#############################################################
# pippi: parse it, plot it
# ------------------------
# Parsing program for pippi.
#
# Author: Pat Scott ([email protected])
# Originally developed: March 2012
#############################################################
from __future__ import print_function
import subprocess
from pippi_utils import *
from pippi_read import *
from scipy import version as scipyCurrent
from scipy.special import gammaincinv as deltaLnLike
from scipy.interpolate import InterpolatedUnivariateSpline as oneDspline
from scipy.interpolate import RectBivariateSpline as twoDbilinear
from distutils.version import StrictVersion
if StrictVersion(scipyCurrent.version) >= StrictVersion("0.9.0"):
from scipy.interpolate import CloughTocher2DInterpolator as twoDspline
# Define parse-specific pip file entries
parsedir = dataObject('parse_dir',safe_string)
cutOnAnyInvalid = dataObject('cut_on_invalid_observables',boolean)
labelFile = dataObject('labels_from_file',string)
col_assignments = dataObject('assign_to_pippi_datastream',string_dictionary)
labels = dataObject('quantity_labels',string_dictionary)
logPlots = dataObject('use_log_scale',int_list)
rescalings = dataObject('quantity_rescalings',float_dictionary)
defaultBins = dataObject('default_bins',integer)
specificBins = dataObject('specific_bins',int_dictionary)
defaultResolution = dataObject('default_resolution',integer)
specificResolution = dataObject('specific_resolution',int_dictionary)
intMethod = dataObject('interpolation_method',string)
chainType = dataObject('chain_type',internal)
doEvidence = dataObject('compute_evidence',boolean)
data_ranges = dataObject('data_ranges',floatuple_dictionary)
preamble = dataObject('preamble',string)
alt_best_fit = dataObject('bf_lnlike_for_profile_like',float)
keys = keys+[parsedir,labelFile,cutOnAnyInvalid,defaultBins,specificBins,intMethod,chainType,defaultResolution,specificResolution,doEvidence,labels,col_assignments,logPlots,rescalings,data_ranges,preamble,alt_best_fit]
# Initialise variables
doPosteriorMean = True
firstLikeKey = None
ObsFloor = 0.0
def parse(filename):
#input: filename = the name of the pip file
global doPosteriorMean
# Parse pip file
getIniData(filename,keys,savekeys=[labels],savedir=parsedir)
# Make a local copy of data_ranges
dataRanges = {} if not data_ranges.value else dict(data_ranges.value)
# Work out where the parse output is to be located
if parsedir.value is None:
# No parse_dir; default to the directory containing chain(s)
baseFiledir = re.sub(r'/.*?$', '/', mainChain.value)
else:
# Save in parse_dir
baseFiledir = parsedir.value+'/'
# Check that the requested interpolation method is available
if twoDplots.value is not None:
if intMethod.value is None: intMethod.value = allowedIntMethods[0]
if intMethod.value not in allowedIntMethods: sys.exit('Error: unrecognised interpolation_method.')
if intMethod.value == 'spline' and StrictVersion(scipyCurrent.version) < StrictVersion("0.9.0"):
sys.exit('Sorry, Clough-Tocher 2D interpolation is not supported in SciPy \n'+
'v0.8 or lower; please upgrade your installation to use this option.')
#Read in label data if it is not in the pip file
if labelFile.value is not None: getIniData(labelFile.value,[labels])
#Check that flags match up for profile likelihood
if all(x not in labels.value for x in permittedLikes) and doProfile.value:
print(' Warning: no likelihood in chain labels.\n Skipping profile likelihood...')
doProfile.value = False
#Work out whether to do posterior mean and check that flags match up for posterior pdf
doPosteriorMean = any(x in labels.value for x in permittedMults)
if doPosterior.value and not doPosteriorMean:
print(' Warning: do_posterior_pdf = T but no multiplicity in chain labels.\n Skipping posterior PDF...')
doPosterior.value = False
#Check that flags match up for evidence
if doEvidence.value:
if chainType.value is mcmc:
if all(x not in labels.value for x in permittedLikes) or \
all(x not in labels.value for x in permittedMults) or \
all(x not in labels.value for x in permittedPriors):
print(' The evidence cannot be calculated without multiplicity, prior and likelihood.\n Skipping evidence...')
doEvidence.value = False
else:
print(' The evidence can only be calculated from an MCMC chain.\n Skipping evidence...')
doEvidence.value = False
#Check that flags and match up for quantities selected for plotting
oneDlist = [] if oneDplots.value is None else oneDplots.value
twoDlist = [] if twoDplots.value is None else twoDplots.value
obslist = [] if obsPlots.value is None else obsPlots.value
datarangelist = [] if data_ranges.value is None else data_ranges.value.keys()
partialSetOfRequestedColumns = set(oneDlist + [y for x in twoDlist for y in x])
setOfRequestedColumns = set.union(partialSetOfRequestedColumns, set(datarangelist),obslist)
# Check that labels for all the requested columns are present.
for plot in partialSetOfRequestedColumns:
try:
label = labels.value[plot]
except (KeyError, TypeError):
sys.exit('Error: please provide a label for column '+str(plot)+' if you want to plot it.\nQuitting...\n')
# Work out what binning to use in each dimension
nBins = {}
for x in setOfRequestedColumns:
nBins[x] = defaultBins.value if (not specificBins.value or x not in specificBins.value) else specificBins.value[x]
# Work out what interpolation resolution to use in each dimension
iBins = {}
for x in setOfRequestedColumns:
iBins[x] = defaultResolution.value if (not specificResolution.value or x not in specificResolution.value) else specificResolution.value[x]
# Open main chain and read in contents
(mainArray, hdf5_names, lookupKey, all_best_fit_data) = getChainData(mainChain.value, cut_all_invalid=cutOnAnyInvalid.value,
requested_cols=setOfRequestedColumns, labels=labels, assignments=col_assignments, data_ranges=data_ranges, log_plots=logPlots,obs_plots=obsPlots,
rescalings=rescalings, preamble=preamble.value)
# Parse main chain
outputBaseFilename = baseFiledir+re.sub(r'.*/|\..?.?.?$', '', mainChain.value)
doParse(mainArray,lookupKey,outputBaseFilename,setOfRequestedColumns,hdf5_names,dataRanges,all_best_fit_data,nBins,iBins,alt_best_fit)
# If a comparison chain is specified, parse it too
if secChain.value is not None:
# Open secondary chain and read in contents
outputBaseFilename = baseFiledir+re.sub(r'.*/|\..?.?.?$', '', secChain.value)+'_comparison'
(mainArray, hdf5_names, lookupKey, all_best_fit_data) = getChainData(secChain.value, cut_all_invalid=cutOnAnyInvalid.value,
requested_cols=setOfRequestedColumns, labels=labels, assignments=col_assignments, data_ranges=data_ranges, log_plots=logPlots,obs_plots=obsPlots,
rescalings=rescalings, preamble=preamble.value)
# Switch depending on whether the comparison file is hdf5 or ascii
min_array_length = max(setOfRequestedColumns) if hdf5_names is None else len(setOfRequestedColumns)
if mainArray.shape[1] >= min_array_length:
# Clear savedkeys file for this chain
subprocess.call('rm -rf '+outputBaseFilename+'_savedkeys.pip', shell=True)
# Parse comparison chain
doParse(mainArray,lookupKey,outputBaseFilename,setOfRequestedColumns,hdf5_names,dataRanges,all_best_fit_data,nBins,iBins,alt_best_fit)
else:
print(' Chain '+secChain.value+' has less columns than required to do all requested plots.')
print(' Skipping parsing of this chain...')
def doParse(dataArray,lk,outputBaseFilename,setOfRequestedColumns,column_names,dataRanges,all_best_fit_data,nBins,iBins,alt_best_fit):
#Perform all numerical operations required for chain parsing
# Determine the minimum log-likelihood requested as an isocontour in 2D plots
min_contour = None
if doProfile.value and twoDplots.value and contours2D.value:
min_contour = deltaLnLike(1.0,0.01*max(contours2D.value))
# Standardise likelihood, prior and multiplicity labels, and rescale likelihood and columns if necessary
standardise(dataArray,lk)
# Sort array if required
doSort(dataArray,lk)
# Find best-fit point
[bestFit,worstFit,bestFitIndex] = getBestFit(dataArray,lk,outputBaseFilename,column_names,all_best_fit_data,alt_best_fit,min_contour)
# Find posterior mean
[totalMult, posteriorMean] = getPosteriorMean(dataArray,lk,outputBaseFilename)
# Get evidence for mcmc
[lnZMain,lnZMainError] = getEvidence(dataArray,lk,bestFit,totalMult,outputBaseFilename)
# Save data minima and maxima
saveExtrema(dataArray,lk,outputBaseFilename,setOfRequestedColumns,dataRanges)
# Save lookup keys for parameters
saveLookupKeys(lk,outputBaseFilename)
# Do binning for 1D plots
oneDsampler(dataArray,lk,bestFit,worstFit,outputBaseFilename,dataRanges,nBins,iBins)
# Do binning for 2D plots
twoDsampler(dataArray,lk,bestFit,worstFit,outputBaseFilename,dataRanges,nBins,iBins)
def standardise(dataArray,lk):
global firstLikeKey
# Standardise likelihood, prior and multiplicity labels, rescale likelihood if necessary,
for key, entry in labels.value.copy().items():
if any(key == mult for mult in permittedMults):
labels.value[refMult] = labels.value[key]
if key != refMult: del labels.value[key]
if any(key == prior for prior in permittedPriors):
labels.value[refPrior] = labels.value[key]
if key != refPrior: del labels.value[key]
#if any(key == obs for obs in permittedObs):
# labels.value[refObs] = labels.value[key]
# if key != refObs: del labels.value[key]
if any(key == like for like in permittedLikes):
if firstLikeKey is None: firstLikeKey = key
dataArray[:,lk[labels.value[key]]] = mapToRefLike(firstLikeKey,dataArray[:,lk[labels.value[key]]])
labels.value[refLike] = labels.value[key]
if key != refLike: del labels.value[key]
if any(entry == mult for mult in permittedMults): labels.value[key] = refMult
if any(entry == prior for prior in permittedPriors): labels.value[key] = refPrior
if any(entry == like for like in permittedLikes): labels.value[key] = refLike
#if any(entry == obs for obs in permittedObs): labels.value[key] = refObs
# Rescale columns if requested
if rescalings.value is not None:
for key, entry in rescalings.value.items(): dataArray[:,lk[key]] *= entry
# Convert columns to log if requested
if logPlots.value is not None:
for column in logPlots.value:
if column in lk:
if any(dataArray[:,lk[column]] <= 0.0):
print("Error: column {0} requested for log plotting has non-positive values!".format(column))
bad_indices = np.where(dataArray[:,lk[column]] <= 0.0)[0]
print("Here is the first point with bad values, for example: ")
for i,val in enumerate(dataArray[bad_indices[0],:]):
index = i
for x in lk:
if lk[x] == i:
index = x
print(" col {0}: {1}".format(index,val))
sys.exit('\nPlease fix log settings (or your data) and rerun pippi.')
dataArray[:,lk[column]] = np.log10(dataArray[:,lk[column]])
def doSort(dataArray,lk):
# Sort chain in order of increasing posterior mass (i.e. multiplicity)
if doPosterior.value and (contours1D.value is not None or contours2D.value is not None):
viewString = 'float64' + ',float64' * (dataArray.shape[1]-1)
dataArray.view(viewString).sort(order = ['f'+str(lk[labels.value[refMult]])], axis=0)
def getBestFit(dataArray,lk,outputBaseFilename,column_names,all_best_fit_data,alt_best_fit,min_contour):
# Find best-fit point
bestFitIndex = dataArray[:,lk[labels.value[refLike]]].argmin()
bestFit = dataArray[bestFitIndex,lk[labels.value[refLike]]]
worstFit = dataArray[:,lk[labels.value[refLike]]].max()
print(' Best fit -lnlike: ',bestFit)
outfile = smart_open(outputBaseFilename+'.best','w')
outfile.write('# This best-fit/posterior mean file created by pippi '
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write('Best-fit log-likelihood: '+str(-bestFit)+'\n')
outfile.write('Best-fit point:\n')
outfile.write(' '.join([str(x) for x in dataArray[bestFitIndex,:]])+'\n')
outfile.close
outfile = smart_open(outputBaseFilename+'.best_all','w')
outfile.write('# This best-fit file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
if (column_names is None) :
# Just a regular ASCII chain from MultiNest or similar
for i, x in enumerate(dataArray[bestFitIndex,:]):
outfile.write(str(i)+': '+str(x)+'\n')
else:
# HDF5 file from GAMBIT or similar, with proper data record identifiers
parameter_sets = {}
i = 0
for x in column_names:
if "primary_parameters" in x:
model = re.sub(".*@|::.*", '', x)
par = re.sub(".*::", '', x)
if not model in parameter_sets: parameter_sets[model] = []
parameter_sets[model].append(' '+ par + ': ' + all_best_fit_data[i])
if x != '':
outfile.write(x.lstrip('#')+': '+all_best_fit_data[i]+'\n')
i += 1
if parameter_sets != {}:
outfile2 = smart_open(outputBaseFilename+'.best_parameters.yaml','w')
outfile2.write('# This best-fit file created in GAMBIT yaml format by pippi '
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile2.write('# Best-fit log-likelihood: '+str(-bestFit)+'\n\n')
for model, parameters in parameter_sets.items():
outfile2.write(' ' + model + ':\n')
for parval in parameters: outfile2.write(' ' + parval + '\n')
outfile2.close
outfile.close
if alt_best_fit.value is not None:
halt = (min_contour is not None) and (bestFit+alt_best_fit.value > min_contour)
bestFit = -alt_best_fit.value
print(' Best fit -lnlike to be used to define profile likelihood ratio: ',bestFit)
if halt: sys.exit('\n The highest CL likelihood likelihood contour you have requested contains no samples! No more pippi for you.\n')
return [bestFit,worstFit,bestFitIndex]
def getPosteriorMean(dataArray,lk,outputBaseFilename):
# Find posterior mean
if doPosteriorMean:
posteriorMean = []
# Get total multiplicity for entire chain
totalMult = np.sum(dataArray[:,lk[labels.value[refMult]]])
if (totalMult == 0.0):
sys.exit('Error: total multiplicity equal to zero. Please make sure you have assigned posterior/weight columns correctly.\n')
# Calculate posterior mean as weighted average of each point's contribution to each variable
for i in range(dataArray.shape[1]):
posteriorMean.append(np.sum(dataArray[:,lk[labels.value[refMult]]] * dataArray[:,i])/totalMult)
outfile = smart_open(outputBaseFilename+'.best','a')
outfile.write('Posterior mean:\n')
outfile.write(' '.join([str(x) for x in posteriorMean])+'\n')
outfile.close
return [totalMult, posteriorMean]
else:
return [None, None]
def getEvidence(dataArray,lk,bestFit,totalMult,outputBaseFilename):
# Get evidence (sum of mult*prior*like for mcmc)
if doEvidence.value:
if chainType.value is mcmc:
lnZ = np.log(np.sum(dataArray[:,lk[labels.value[refMult]]] * \
dataArray[:,lk[labels.value[refPrior]]] * \
np.exp(bestFit-dataArray[:,lk[labels.value[refLike]]]))) \
- bestFit - np.log(totalMult)
lnZError = np.log(1.0 - pow(totalMult,-0.5))
print(' ln(evidence): ',lnZ,'+/-',lnZError)
else:
sys.exit('Error: evidence calculation only possible for MCMC (should never get here).')
outfile = smart_open(outputBaseFilename+'.lnZ','w')
outfile.write('# This evidence file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write('ln(evidence): '+str(lnZ)+' +/- '+str(lnZError)+'\n')
outfile.close
return [lnZ, lnZError]
else:
return [None, None]
def saveExtrema(dataArray,lk,outputBaseFilename,setOfRequestedColumns,dataRanges):
# Save the maxima and minima for each parameter requested for plotting
outfile = smart_open(outputBaseFilename+'_savedkeys.pip','a')
outfile.write('data_ranges =')
for column in setOfRequestedColumns:
extrema = [dataArray[:,lk[column]].min(), dataArray[:,lk[column]].max()]
if column in dataRanges: extrema = [max(dataRanges[column][0], extrema[0]), min(dataRanges[column][1], extrema[1])]
dataRanges[column] = extrema
outfile.write(' '+str(column)+':{'+str(extrema[0])+', '+str(extrema[1])+'}')
outfile.write('\n')
outfile.close
def saveLookupKeys(lk,outputBaseFilename):
# Save the lookup keys for all the requested parameters
outfile = smart_open(outputBaseFilename+'_savedkeys.pip','a')
outfile.write('lookup_keys =')
if type(lk) == dict:
for key, value in lk.items(): outfile.write(' '+str(key)+':'+str(value))
else:
for i in lk: outfile.write(' '+str(i)+':'+str(i))
outfile.write('\n')
outfile.close
def oneDsampler(dataArray,lk,bestFit,worstFit,outputBaseFilename,dataRanges,nAllBins,rAllBins):
# Do sample sorting for 1D plots
if oneDplots.value is None: return
if contours1D.value is not None:
# Determine profile likelihood contour levels (same for all plots of a given dimensionality)
profContourLevels = [np.exp(-deltaLnLike(0.5,0.01*contour)) for contour in contours1D.value]
outfile = smart_open(outputBaseFilename+'_like1D.contours','w')
outfile.write('# This 1D profile likelihood ratio contours file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write(' '.join([str(x) for x in profContourLevels])+'\n')
outfile.close
for plot in oneDplots.value:
print(' Parsing data for 1D plots of quantity ',plot)
nBins = nAllBins[plot]
resolution = rAllBins[plot]
likeGrid = np.empty((nBins), dtype=np.float64)
likeGrid[:] = worstFit + 100.0
postGrid = np.zeros((nBins), dtype=np.float64)
# Work out maximum and minimum values of parameter/derived quantity
minVal = dataRanges[plot][0]
maxVal = dataRanges[plot][1]
rangeOfVals = maxVal - minVal
# Fix things if the range of values is a delta function
if (rangeOfVals <= 0): rangeOfVals = maxVal * 1e-3
binSep = rangeOfVals / nBins
# Calculate bin centres
binCentresOrig = np.array([minVal + (x+0.5)*rangeOfVals/nBins for x in range(nBins)])
binCentresInterp = np.array([binCentresOrig[0] + x*(binCentresOrig[-1]-binCentresOrig[0])\
/(resolution-1) for x in range(resolution)])
# Loop over points in chain
for i in range(dataArray.shape[0]-1,-1,-1):
index = min(int((dataArray[i,lk[plot]]-minVal)/rangeOfVals*nBins),nBins-1)
# Profile over likelihoods
if doProfile.value: likeGrid[index] = min(dataArray[i,lk[labels.value[refLike]]],likeGrid[index])
if doPosterior.value:
# Marginalise by addding to posterior sample count
postGrid[index] += dataArray[i,lk[labels.value[refMult]]]
# Convert -log(profile likelihoods) to profile likelihood ratio
if doProfile.value: likeGrid = np.exp(bestFit - likeGrid)
# Rescale posterior pdf to maximum 1
if doPosterior.value: postGrid = postGrid / postGrid.max()
# Save raw binned profile likelihood and posterior pdf for outputting in histogram files
if doProfile.value: likeGridHistogram = likeGrid
if doPosterior.value: postGridHistogram = postGrid
# Interpolate profile likelihoods and posterior pdfs to requested resolution
if doProfile.value:
interpolator = oneDspline(binCentresOrig, likeGrid)
likeGrid = interpolator(binCentresInterp)
# Fix any points sent NaN by scipy's crappy interpolators
likeGrid[np.isnan(likeGrid)] = 0.0
# Kill off any points that have been sent negative due to ringing
likeGrid[np.isneginf(likeGrid)] = 0.0
likeGrid[likeGrid<0] = 0.0
# Fix any points that have been sent >1 due to ringing
likeGrid[np.isposinf(likeGrid)] = 1.0
likeGrid[likeGrid>1] = 1.0
if doPosterior.value:
interpolator = oneDspline(binCentresOrig, postGrid)
postGrid = interpolator(binCentresInterp)
# Kill off any points that have been sent negative due to ringing
postGrid[~np.isfinite(postGrid)] = 0.0
postGrid[postGrid<0] = 0.0
# Rescale posterior pdf so that it has maximum 1
postGrid = postGrid / postGrid.max()
# Find posterior pdf contour levels
if contours1D.value is not None and doPosterior.value:
# Zero posterior contour levels
postContourLevels = [None for contour in contours1D.value]
# Zero posterior integral
integratedPosterior = 0.0
# Sort bins in order of posterior mass
sortedPostGrid = np.ma.sort(postGrid)
# Work out the new total multiplicity
totalMult = np.sum(sortedPostGrid)
# Work through bins backwards until total posterior mass adds up to the requested confidence levels
for i in range(sortedPostGrid.shape[0]-1,-1,-1):
integratedPosterior += sortedPostGrid[i]/totalMult
for j,contour in enumerate(contours1D.value):
if 100*integratedPosterior >= contour and postContourLevels[j] is None:
postContourLevels[j] = sortedPostGrid[i]
if all([x is not None for x in postContourLevels]): break
# Write profile likelihood to file
if doProfile.value:
outfile = smart_open(outputBaseFilename+'_'+str(plot)+'_like1D.ct2','w')
outfile.write('# This 1D binned profile likelihood ratio file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write('\n'.join([str(binCentresInterp[i])+'\t'+str(x) for i,x in enumerate(likeGrid)]))
outfile.close
outfile = smart_open(outputBaseFilename+'_'+str(plot)+'_like1Dhist.ct2','w')
outfile.write('# This 1D binned profile likelihood ratio file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write('\n'.join([str(binCentresOrig[i]-0.5*binSep)+'\t'+str(x)+'\n'+
str(binCentresOrig[i]+0.5*binSep)+'\t'+str(x) for i,x in enumerate(likeGridHistogram)]))
outfile.close
# Write posterior pdf and contours to file
if doPosterior.value:
outfile = smart_open(outputBaseFilename+'_'+str(plot)+'_post1D.ct2','w')
outfile.write('# This 1D binned posterior pdf file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write('\n'.join([str(binCentresInterp[i])+'\t'+str(x) for i,x in enumerate(postGrid)]))
outfile.close
outfile = smart_open(outputBaseFilename+'_'+str(plot)+'_post1Dhist.ct2','w')
outfile.write('# This 1D binned posterior pdf file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write('\n'.join([str(binCentresOrig[i]-0.5*binSep)+'\t'+str(x)+'\n'+
str(binCentresOrig[i]+0.5*binSep)+'\t'+str(x) for i,x in enumerate(postGridHistogram)]))
outfile.close
if contours1D.value is not None:
outfile = smart_open(outputBaseFilename+'_'+str(plot)+'_post1D.contours','w')
outfile.write('# This 1D posterior pdf contours file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write(' '.join([str(x) for x in postContourLevels])+'\n')
outfile.close
def twoDsampler(dataArray,lk,bestFit,worstFit,outputBaseFilename,dataRanges,nAllBins,rAllBins):
# Do sample sorting for 2D plots
if twoDplots.value is None: return
# Determine profile likelihood contour levels (same for all plots of a given dimensionality)
if contours2D.value is not None:
profContourLevels = [np.exp(-deltaLnLike(1.0,0.01*contour)) for contour in contours2D.value]
outName = outputBaseFilename+'_like2D.contours'
outfile = smart_open(outName,'w')
outfile.write('# This 2D profile likelihood ratio contours file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write(' '.join([str(x) for x in profContourLevels]))
outfile.close
for plot in twoDplots.value:
print(' Parsing data for 2D plots of quantities ',plot)
nBins = [nAllBins[plot[j]] for j in range(2)]
resolution = [rAllBins[plot[j]] for j in range(2)]
likeGrid = np.empty((nBins[0],nBins[1]), dtype=np.float64)
likeGrid[:,:] = worstFit + 100.0
postGrid = np.zeros((nBins[0],nBins[1]), dtype=np.float64)
num_obs = 0
if obsPlots.value is not None:
for column in obsPlots.value:
if column in lk:
num_obs = num_obs+1
obsGrid = np.zeros((num_obs,nBins[0],nBins[1]), dtype=np.float64)
k = -1
minimum_obs = np.zeros(num_obs)
obsMinVal = np.zeros(num_obs)
obsMaxVal = np.zeros(num_obs)
obsFloor = np.zeros(num_obs)
if obsPlots.value is not None:
for column in obsPlots.value:
if column in lk:
k = k+1
obsMinVal[k] = dataArray[:,lk[column]].min()
obsMaxVal[k] = dataArray[:,lk[column]].max()
# temporarily set the whole grid to a very low value
obsGrid[k,:,:] = obsMinVal[k] - 100
# set a value just below the actual minimum
if obsMinVal[k] < 0:
obsFloor[k] = obsMinVal[k]*1.01
if obsMinVal[k] > 0:
obsFloor[k] = obsMinVal[k]*0.99
if obsMinVal[k] == 0:
obsFloor[k] = -0.01
# Work out maximum and minimum values of parameters/derived quantities
minVal = [dataRanges[plot[j]][0] for j in range(2)]
maxVal = [dataRanges[plot[j]][1] for j in range(2)]
rangeOfVals = [maxVal[j] - minVal[j] for j in range(2)]
# Pad edges of grid
binSep = [rangeOfVals[j]/(nBins[j]-2) for j in range(2)]
minVal = [minVal[j] - binSep[j] for j in range(2)]
maxVal = [maxVal[j] + binSep[j] for j in range(2)]
rangeOfVals = [rangeOfVals[j] + 2.0 * binSep[j] for j in range(2)]
# Calculate bin centres
binCentresOrig = []
binCentresInterp = []
for j in range(2): binCentresOrig.append(np.array([minVal[j] + (x+0.5)*rangeOfVals[j]/nBins[j] for x in range(nBins[j])]))
for j in range(2): binCentresInterp.append(np.array([binCentresOrig[j][0] + x*(binCentresOrig[j][-1]-binCentresOrig[j][0])\
/(resolution[j]-1) for x in range(resolution[j])]))
# Loop over points in chain
for i in range(dataArray.shape[0]-1,-1,-1):
[in1,in2] = [min(int((dataArray[i,lk[plot[j]]]-minVal[j])/rangeOfVals[j]*nBins[j]),nBins[j]-2) for j in range(2)]
# Take observable at maximum likelihood for this bin
if dataArray[i,lk[labels.value[refLike]]] < likeGrid[in1,in2]:
if obsPlots.value is not None:
k = -1
for column in obsPlots.value:
if column in lk:
k = k + 1
obsGrid[k,in1,in2] = dataArray[i,lk[column]]
# Profile over likelihoods
if doProfile.value: likeGrid[in1,in2] = min(dataArray[i,lk[labels.value[refLike]]],likeGrid[in1,in2])
# Marginalise by addding to posterior sample count
if doPosterior.value: postGrid[in1,in2] += dataArray[i,lk[labels.value[refMult]]]
# Convert -log(profile likelihoods) to profile likelihood ratio
likeGrid = np.exp(bestFit - likeGrid)
#obsFloor = np.zeros(k+1)
k = -1
if obsPlots.value is not None:
for column in obsPlots.value:
if column in lk:
k = k + 1
obsContourLevels = [obsFloor[k],obsMinVal[k],obsMaxVal[k]]
outName = outputBaseFilename+'_'+'_'.join([str(x) for x in plot])+'_obs2D_' + str(column) + '.contours'
outfile = smart_open(outName,'w')
outfile.write('# This 2D observable color-map levels file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write(' '.join([str(x) for x in obsContourLevels]))
outfile.close
# Precompute co-ordinate grids for splines to avoid repetition
if intMethod.value == 'spline':
oldCoords = np.array([[binCentresOrig[0][i], binCentresOrig[1][j]] for i in range(nBins[0]) for j in range(nBins[1])])
newCoords = [[binCentresInterp[0][i], binCentresInterp[1][j]] for i in range(resolution[0]) for j in range(resolution[1])]
# Interpolate posterior pdf, profile likelihood and observable to requested display resolution
if doProfile.value:
if intMethod.value == 'spline':
likeGrid = np.array(likeGrid).reshape(nBins[0]*nBins[1])
interpolator = twoDspline(oldCoords,likeGrid)
likeGrid = np.array(interpolator(newCoords)).reshape(resolution[0],resolution[1])
else:
interpolator = twoDbilinear(binCentresOrig[0], binCentresOrig[1], likeGrid, ky = 1, kx = 1)
likeGrid = np.array([interpolator(binCentresInterp[0][j], binCentresInterp[1][i])
for j in range(resolution[j]) for i in range(resolution[1])]).reshape(resolution[0],resolution[1])
# Fix any points sent NaN by scipy's crappy interpolators
likeGrid[np.isnan(likeGrid)] = 0.0
# Kill off any points that have been sent negative due to ringing
likeGrid[np.isneginf(likeGrid)] = 0.0
likeGrid[likeGrid<0] = 0.0
# Fix any points that have been sent >1 due to ringing
likeGrid[np.isposinf(likeGrid)] = 1.0
likeGrid[likeGrid>1] = 1.0
# Make sure we haven't erased the best-fit point by interpolating over it
likeGrid[np.unravel_index(likeGrid.argmax(),likeGrid.shape)] = 1.0
if doPosterior.value:
if intMethod.value == 'spline':
postGrid = np.array(postGrid).reshape(nBins[0]*nBins[1])
interpolator = twoDspline(oldCoords,postGrid)
postGrid = np.array(interpolator(newCoords)).reshape(resolution[0],resolution[1])
else:
interpolator = twoDbilinear(binCentresOrig[0], binCentresOrig[1], postGrid, ky = 1, kx = 1)
postGrid = np.array([interpolator(binCentresInterp[0][j], binCentresInterp[1][i])
for j in range(resolution[0]) for i in range(resolution[1])]).reshape(resolution[0],resolution[1])
# Kill off any points that have been sent negative due to ringing
postGrid[~np.isfinite(postGrid)] = 0.0
postGrid[postGrid<0] = 0.0
# Rescale posterior pdf back into the range [0,1]
postGrid = postGrid / postGrid.max()
if obsPlots.value is not None:
k = -1
for column in obsPlots.value:
if column in lk:
k = k + 1
obsGrid_temp = obsGrid[k,:,:]
obsGrid_temp[obsGrid_temp == (obsMinVal[k]-100) ] = obsMinVal[k]
if intMethod.value == 'spline':
obsGrid_temp = np.array(obsGrid_temp).reshape(nBins[0]*nBins[1])
interpolator = twoDspline(oldCoords,obsGrid_temp)
obsGrid_temp = np.array(interpolator(newCoords)).reshape(resolution[0],resolution[1])
else:
interpolator = twoDbilinear(binCentresOrig[0], binCentresOrig[1], obsGrid_temp, ky = 1, kx = 1)
obsGrid_temp = np.array([interpolator(binCentresInterp[0][j], binCentresInterp[1][i])
for j in range(resolution[0]) for i in range(resolution[1])]).reshape(resolution[0],resolution[1])
obsGrid_temp[obsGrid_temp < obsMinVal[k] ] = obsMinVal[k]
# set points outside the contours to the floor value (effectively "no data")
obsGrid_temp[likeGrid<min(profContourLevels)] = obsFloor[k]
# Write observable to file
outName = outputBaseFilename+'_'+'_'.join([str(x) for x in plot])+'_obs2D_'+str(column)+'.ct2'
outfile = smart_open(outName,'w')
outfile.write('# This 2D binned observable file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write('\n'.join([str(binCentresInterp[0][i])+'\t'+str(binCentresInterp[1][j])+'\t'+str(obsGrid_temp[i,j]) \
for i in range(obsGrid_temp.shape[0]) for j in range(obsGrid_temp.shape[1])]))
outfile.close
# make a dummy grid with maximum and minimum values (of the reduced grid) to use for colorbar
obsGrid_temp_list = obsGrid_temp.reshape(obsGrid_temp.shape[1] * obsGrid_temp.shape[0] )
obsGrid_temp_list = obsGrid_temp_list[obsGrid_temp_list != obsFloor[k]]
obsMinValReduced = obsGrid_temp_list.min()
obsMaxValReduced = obsGrid_temp_list.max()
dummyGrid = np.zeros([2, 3])
dummyGrid[0,0] = binCentresInterp[0][0]
dummyGrid[0,1] = binCentresInterp[0][0]
dummyGrid[0,2] = obsMinValReduced
dummyGrid[1,0] = binCentresInterp[0][1]
dummyGrid[1,1] = binCentresInterp[0][1]
dummyGrid[1,2] = obsMaxValReduced
outName = outputBaseFilename+'_'+'_'.join([str(x) for x in plot])+'_obs2D_'+str(column)+'_colorbar.ct2'
outfile = smart_open(outName,'w')
outfile.write('# This 2D binned observable file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write('\n'.join([str(dummyGrid[i,0])+'\t'+str(dummyGrid[i,1])+'\t'+str(dummyGrid[i,2]) \
for i in range(2) ]))
outfile.close
# Find posterior pdf contour levels
if contours2D.value is not None and doPosterior.value:
# Zero posterior contour levels
postContourLevels = [None for contour in contours2D.value]
# Zero posterior integral
integratedPosterior = 0.0
# Sort bins in order of posterior mass
sortedPostGrid = np.ma.sort(postGrid.flatten())
# Work out the new total multiplicity
totalMult = np.sum(sortedPostGrid)
# Work through bins backwards until total posterior mass adds up to the requested confidence levels
for i in range(sortedPostGrid.shape[0]-1,-1,-1):
integratedPosterior += sortedPostGrid[i]/totalMult
for j,contour in enumerate(contours2D.value):
if 100*integratedPosterior >= contour and postContourLevels[j] is None:
postContourLevels[j] = sortedPostGrid[i]
if all([x is not None for x in postContourLevels]): break
# Write profile likelihood to file
if doProfile.value:
outName = outputBaseFilename+'_'+'_'.join([str(x) for x in plot])+'_like2D.ct2'
outfile = smart_open(outName,'w')
outfile.write('# This 2D binned profile likelihood ratio file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write('\n'.join([str(binCentresInterp[0][i])+'\t'+str(binCentresInterp[1][j])+'\t'+str(likeGrid[i,j]) \
for i in range(likeGrid.shape[0]) for j in range(likeGrid.shape[1])]))
outfile.close
# Write posterior pdf and contours to file
if doPosterior.value:
outName = outputBaseFilename+'_'+'_'.join([str(x) for x in plot])+'_post2D.ct2'
outfile = smart_open(outName,'w')
outfile.write('# This 2D binned posterior pdf file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write('\n'.join([str(binCentresInterp[0][i])+'\t'+str(binCentresInterp[1][j])+'\t'+str(postGrid[i,j]) \
for i in range(postGrid.shape[0]) for j in range(postGrid.shape[1])]))
outfile.close
if contours2D.value is not None:
outName = outputBaseFilename+'_'+'_'.join([str(x) for x in plot])+'_post2D.contours'
outfile = smart_open(outName,'w')
outfile.write('# This 2D posterior pdf contours file created by pippi '\
+pippiVersion+' on '+datetime.datetime.now().strftime('%c')+'\n')
outfile.write(' '.join([str(x) for x in postContourLevels]))
outfile.close