From 1929bc29999767165abaa5d30cb6db95733245a6 Mon Sep 17 00:00:00 2001
From: Francesco Pannarale <francesco.pannarale@ligo.org>
Date: Tue, 10 Dec 2024 13:43:20 +0100
Subject: [PATCH] Vetoes in PyGRB efficiency and page_tables scripts (#4978)

* Vetoes in pycbc_pygrb_page_tables + some syntax streamlining

* Vetoes in pycbc_pygrb_efficiency + some syntax streamlining

* Squashed mchirp retrieval bug in page_tables

* PR review follow up: comprehension, comment, readability, unused variables

* Cleaner format_pvalue_str

* Cleaner comprehensions
---
 bin/pygrb/pycbc_pygrb_efficiency  | 247 +++++++++++----------
 bin/pygrb/pycbc_pygrb_page_tables | 345 +++++++++++++++---------------
 2 files changed, 301 insertions(+), 291 deletions(-)

diff --git a/bin/pygrb/pycbc_pygrb_efficiency b/bin/pygrb/pycbc_pygrb_efficiency
index 171809007f6..5745dc9eded 100644
--- a/bin/pygrb/pycbc_pygrb_efficiency
+++ b/bin/pygrb/pycbc_pygrb_efficiency
@@ -125,7 +125,9 @@ parser.add_argument("--bank-file", action="store", type=str, required=True,
                     help="Location of the full template bank used.")
 ppu.pygrb_add_injmc_opts(parser)
 ppu.pygrb_add_bestnr_cut_opt(parser)
+ppu.pygrb_add_slide_opts(parser)
 opts = parser.parse_args()
+ppu.slide_opts_helper(opts)
 
 init_logging(opts.verbose, format="%(asctime)s: %(levelname)s: %(message)s")
 
@@ -144,6 +146,7 @@ if opts.exclusion_dist_output_file is not None or \
 trig_file = opts.trig_file
 onsource_file = opts.onsource_file
 found_missed_file = opts.found_missed_file
+veto_file = opts.veto_file
 inj_set_name = opts.injection_set_name
 wf_err = opts.waveform_error
 cal_errs = {}
@@ -178,76 +181,84 @@ for output_file in [opts.exclusion_dist_output_file,
     if output_file is not None:
         outdir = os.path.split(os.path.abspath(output_file))[0]
         if not os.path.isdir(outdir):
-            logging.info("Creating the output directoryi %s.", outdir)
+            logging.info("Creating the output directory %s.", outdir)
             os.makedirs(outdir)
 
-# Extract IFOs and vetoes
-ifos, vetoes = ppu.extract_ifos_and_vetoes(trig_file, opts.veto_files,
-                                           opts.veto_category)
-
-# Load triggers (apply reweighted SNR cut), time-slides, and segment dictionary
-logging.info("Loading triggers.")
-trigs = ppu.load_triggers(trig_file, ifos, vetoes,
-                          rw_snr_threshold=opts.newsnr_threshold)
-logging.info("%d offsource triggers surviving reweighted SNR cut.",
-             len(trigs['network/event_id']))
-logging.info("Loading timeslides.")
-slide_dict = ppu.load_time_slides(trig_file)
-logging.info("Loading segments.")
-segment_dict = ppu.load_segment_dict(trig_file)
-
-# Construct trials
-logging.info("Constructing trials.")
-trial_dict = ppu.construct_trials(opts.seg_files, segment_dict,
-                                  ifos, slide_dict, vetoes)
-total_trials = sum([len(trial_dict[slide_id]) for slide_id in slide_dict])
-logging.info("%d trials generated.", total_trials)
+# Extract IFOs
+ifos = ppu.extract_ifos(trig_file)
 
-# Extract basic trigger properties and store as dictionaries
-trig_time, trig_snr, trig_bestnr = \
-    ppu.extract_basic_trig_properties(trial_dict, trigs, slide_dict,
-                                      segment_dict, opts)
-
-# Calculate BestNR values and maximum
-time_veto_max_bestnr = {}
+# Generate time-slides dictionary
+slide_dict = ppu.load_time_slides(trig_file)
 
-for slide_id in slide_dict:
-    num_slide_segs = len(trial_dict[slide_id])
-    time_veto_max_bestnr[slide_id] = np.zeros(num_slide_segs)
+# Generate segments dictionary
+segment_dict = ppu.load_segment_dict(trig_file)
 
+# Construct trials removing vetoed times
+trial_dict, total_trials = ppu.construct_trials(
+    opts.seg_files,
+    segment_dict,
+    ifos,
+    slide_dict,
+    veto_file
+)
+
+# Load triggers (apply reweighted SNR cut, not vetoes)
+all_off_trigs = ppu.load_data(trig_file, ifos, data_tag='offsource',
+                              rw_snr_threshold=opts.newsnr_threshold,
+                              slide_id=opts.slide_id)
+
+# Extract needed trigger properties and store them as dictionaries
+# Based on trial_dict: if vetoes were applied, trig_* are the veto survivors
+keys = ['network/end_time_gc', 'network/reweighted_snr']
+trig_data = ppu.extract_trig_properties(
+    trial_dict,
+    all_off_trigs,
+    slide_dict,
+    segment_dict,
+    keys
+)
+
+# Max BestNR values in each trial: these are stored in a dictionary keyed
+# by slide_id, as arrays indexed by trial number
+background = {k: np.zeros(len(v)) for k,v in trial_dict.items()}
 for slide_id in slide_dict:
+    trig_times = trig_data[keys[0]][slide_id]
     for j, trial in enumerate(trial_dict[slide_id]):
-        trial_cut = (trial[0] <= trig_time[slide_id])\
-                          & (trig_time[slide_id] < trial[1])
+        # True whenever the trigger is in the trial
+        trial_cut = (trial[0] <= trig_times) & (trig_times < trial[1])
+        # Move on if nothing was in the trial
         if not trial_cut.any():
             continue
         # Max BestNR
-        time_veto_max_bestnr[slide_id][j] = \
-            max(trig_bestnr[slide_id][trial_cut])
+        background[slide_id][j] = max(trig_data[keys[1]][slide_id][trial_cut])
+
+# Max and median values of reweighted SNR,
+# and sorted (loudest in trial) reweighted SNR values
+max_bestnr, median_bestnr, sorted_bkgd =\
+    ppu.max_median_stat(slide_dict, background, trig_data[keys[1]],
+                        total_trials)
+assert total_trials == len(sorted_bkgd)
 
-logging.info("SNR and bestNR maxima calculated.")
+logging.info("Background bestNR calculated.")
 
-# Output details of loudest offsouce triggers
+# Output details of loudest offsouce triggers: only triggers compatible
+# with the trial_dict are considered
 offsource_trigs = []
-sorted_trigs = ppu.sort_trigs(trial_dict, trigs, slide_dict, segment_dict)
+sorted_off_trigs = ppu.sort_trigs(
+    trial_dict,
+    all_off_trigs,
+    slide_dict,
+    segment_dict
+)
 for slide_id in slide_dict:
-    offsource_trigs.extend(zip(trig_bestnr[slide_id], sorted_trigs[slide_id]))
+    offsource_trigs.extend(
+        zip(trig_data[keys[1]][slide_id], sorted_off_trigs[slide_id])
+    )
 offsource_trigs.sort(key=lambda element: element[0])
 offsource_trigs.reverse()
 
-# ==========================
-# Print loudest SNRs to file
-# THIS OUTPUT FILE IS CURRENTLY UNUSED - MAYBE DELETE?
-# Note: the only new info from above is the median SNR, bestnr
-# and loudest SNR, so could just add this to the above's caption.
-# ==========================
-max_bestnr, _, full_time_veto_max_bestnr =\
-    ppu.max_median_stat(slide_dict, time_veto_max_bestnr, trig_bestnr,
-                        total_trials)
 
-# ==========================
-# Calculate template chirp masses from bank
-# ==========================
+# Calculate chirp masses of templates in bank
 logging.info('Reading template chirp masses')
 with HFile(opts.bank_file, 'r') as bank_file:
     template_mchirps = mchirp_from_mass1_mass2(
@@ -261,9 +272,10 @@ with HFile(opts.bank_file, 'r') as bank_file:
 if onsource_file:
 
     logging.info("Processing onsource.")
-    # Get onsouce_triggers (apply reweighted SNR cut)
-    on_trigs = ppu.load_triggers(onsource_file, ifos, vetoes,
-                                 rw_snr_threshold=opts.newsnr_threshold)
+    # Load onsoource triggers (apply reweighted SNR cut, not vetoes)
+    on_trigs = ppu.load_data(onsource_file, ifos, data_tag=None,
+                             rw_snr_threshold=opts.newsnr_threshold,
+                             slide_id=0)
 
     # Calculate chirp mass values
     on_mchirp = template_mchirps[on_trigs['network/template_id']]
@@ -288,65 +300,57 @@ if onsource_file:
     logging.info("Onsource analysed.")
 
     if loud_on_bestnr_idx is not None:
-        num_trials_louder = 0
-        tot_off_snr = np.array([])
-        for slide_id in slide_dict:
-            num_trials_louder += sum(time_veto_max_bestnr[slide_id] >
-                                     loud_on_bestnr)
-            tot_off_snr = np.concatenate([tot_off_snr,
-                                          time_veto_max_bestnr[slide_id]])
-        #fap_test = sum(tot_off_snr > loud_on_bestnr)/total_trials
-        loud_on_fap = num_trials_louder/total_trials
+        loud_on_fap = sum(sorted_bkgd > loud_on_bestnr) / total_trials
 
-else:
-    tot_off_snr = np.array([])
-    for slide_id in slide_dict:
-        tot_off_snr = np.concatenate([tot_off_snr,
-                                      time_veto_max_bestnr[slide_id]])
-    med_snr = np.median(tot_off_snr)
-    #loud_on_fap = sum(tot_off_snr > med_snr)/total_trials
 
 # =======================
 # Post-process injections
 # =======================
-
-sites = [ifo[0] for ifo in ifos]
-
-# injs contains the information about found/missed injections AND triggers
-# Triggers and injections are discared if at vetoed times and/or below
-# Reweighted SNR thrshold
-injs = ppu.load_triggers(found_missed_file, ifos, vetoes,
-                         rw_snr_threshold=opts.newsnr_threshold)
-
-logging.info("Missed/found injections/triggers loaded.")
+# injs contains found/missed injections AND triggers they generated
+# The reweighted SNR cut is applied, vetoes are not
+injs = ppu.load_data(found_missed_file, ifos, data_tag='injs',
+                     rw_snr_threshold=opts.newsnr_threshold,
+                     slide_id=0)
+
+# Gather injections that were not missed
+found_inj = {}
+for k in injs.keys():
+    if 'missed' not in k:
+        found_inj[k] = injs[k]
+
+# Separate them in found surviving vetoes and found but vetoed
+found_after_vetoes, vetoed, *_ = ppu.apply_vetoes_to_found_injs(
+    found_missed_file,
+    found_inj,
+    ifos,
+    veto_file=veto_file
+)
 
 # Calculate quantities not included in trigger files, such as chirp mass
-found_trig_mchirp = template_mchirps[injs['network/template_id']]
-
+found_trig_mchirp = template_mchirps[found_after_vetoes['network/template_id']]
 
 # Construct conditions for injection:
-# 1) found louder than background,
-zero_fap = np.zeros(len(injs['network/end_time_gc'])).astype(bool)
-zero_fap_cut = injs['network/reweighted_snr'][:] > max_bestnr
+# 1) found (surviving vetoes) louder than background,
+zero_fap = np.zeros(len(found_after_vetoes['network/end_time_gc'])).astype(bool)
+zero_fap_cut = found_after_vetoes['network/reweighted_snr'] > max_bestnr
 zero_fap = zero_fap | (zero_fap_cut)
 
-# 2) found (bestnr > 0) but not louder than background (non-zero FAP)
-nonzero_fap = ~zero_fap & (injs['network/reweighted_snr'] != 0)
+# 2) found (bestnr > 0, and surviving vetoes) but not louder than background
+nonzero_fap = ~zero_fap & (found_after_vetoes['network/reweighted_snr'] != 0)
 
-# 3) missed after being recovered (i.e., vetoed) are not used here
-# missed = (~zero_fap) & (~nonzero_fap)
+# 3) missed after being recovered (i.e., vetoed) are in vetoed
 
 # Non-zero FAP triggers (g_ifar)
 g_ifar = {}
-g_ifar['bestnr'] = injs['network/reweighted_snr'][nonzero_fap]
+g_ifar['bestnr'] = found_after_vetoes['network/reweighted_snr'][nonzero_fap]
 g_ifar['stat'] = np.zeros([len(g_ifar['bestnr'])])
 for ix, (mc, bestnr) in \
             enumerate(zip(found_trig_mchirp[nonzero_fap], g_ifar['bestnr'])):
-    g_ifar['stat'][ix] = (full_time_veto_max_bestnr > bestnr).sum()
+    g_ifar['stat'][ix] = (sorted_bkgd > bestnr).sum()
 g_ifar['stat'] = g_ifar['stat'] / total_trials
 
 # Set the sigma values
-inj_sigma = {ifo: injs[f'{ifo}/sigmasq'][:] for ifo in ifos}
+inj_sigma = {ifo: found_after_vetoes[f'{ifo}/sigmasq'][:] for ifo in ifos}
 # If the sigmasqs are not populated, we can still do calibration errors,
 # but only in the 1-detector case
 for ifo in ifos:
@@ -365,9 +369,9 @@ f_resp = {}
 for ifo in ifos:
     antenna = Detector(ifo)
     f_resp[ifo] = ppu.get_antenna_responses(antenna,
-                                            injs['found/ra'][:],
-                                            injs['found/dec'][:],
-                                            injs['found/tc'][:])
+                                            found_after_vetoes['found/ra'][:],
+                                            found_after_vetoes['found/dec'][:],
+                                            found_after_vetoes['found/tc'][:])
 
 inj_sigma_mult = (np.asarray(list(inj_sigma.values())) *
                   np.asarray(list(f_resp.values())))
@@ -380,12 +384,12 @@ inj_sigma_mean = {}
 for ifo in ifos:
     inj_sigma_mean[ifo] = ((inj_sigma[ifo]*f_resp[ifo])/inj_sigma_tot).mean()
 
-logging.info("%d found injections analysed.", len(injs['found/tc']))
-
-# Process missed injections (injs['missed'])
-logging.info("%d missed injections analysed.", len(injs['missed/tc']))
+msg = f"{len(found_after_vetoes['found/tc'])} injections found and surviving "
+msg += f"vetoes and {len(injs['missed/tc'])} missed injections analysed."
+logging.info(msg)
 
-# Create new set of injections for efficiency calculations
+# Create new set of injections for efficiency calculations:
+# these are as many as the original injections
 total_injs = len(injs['found/distance']) + len(injs['missed/distance'])
 long_inj = {}
 long_inj['dist'] = stats.uniform.rvs(size=total_injs) * \
@@ -411,7 +415,7 @@ for key in ['mc', 'no_mc']:
     found_on_bestnr[key] = np.zeros(num_dist_bins_plus_one)
 
 # Construct FAP list for all found injections
-inj_fap = np.zeros(len(injs['found/distance']))
+inj_fap = np.zeros(len(found_after_vetoes['found/distance']))
 inj_fap[nonzero_fap] = g_ifar['stat']
 
 # Calculate the amplitude error
@@ -434,10 +438,20 @@ logging.info("Calibration amplitude uncertainty calculated.")
 # NOTE: the loop on num_mc_injs would fill up the *_inj['dist_mc']'s at the
 # same time, so filling them up sequentially will vary the numbers a little
 # (this is an MC, order of operations matters!)
-found_inj_dist_mc = ppu.mc_cal_wf_errs(num_mc_injs, injs['found/distance'],
-                                       cal_error, wav_err, max_dc_cal_error)
-missed_inj_dist_mc = ppu.mc_cal_wf_errs(num_mc_injs, injs['missed/distance'],
-                                        cal_error, wav_err, max_dc_cal_error)
+found_inj_dist_mc = ppu.mc_cal_wf_errs(
+    num_mc_injs,
+    found_after_vetoes['found/distance'],
+    cal_error,
+    wav_err,
+    max_dc_cal_error
+)
+missed_inj_dist_mc = ppu.mc_cal_wf_errs(
+    num_mc_injs,
+    np.concatenate((vetoed['found/distance'],injs['missed/distance'])),
+    cal_error,
+    wav_err,
+    max_dc_cal_error
+)
 long_inj['dist_mc'] = ppu.mc_cal_wf_errs(num_mc_injs, long_inj['dist'],
                                          cal_error, wav_err, max_dc_cal_error)
 
@@ -452,32 +466,32 @@ else:
 
 distance_count = np.zeros(len(dist_bins))
 
-found_trig_max_bestnr = np.empty(len(injs['network/event_id']))
+found_trig_max_bestnr = np.empty(len(found_after_vetoes['network/event_id']))
 found_trig_max_bestnr.fill(max_bestnr)
 
-max_bestnr_cut = (injs['network/reweighted_snr'] > found_trig_max_bestnr)
+max_bestnr_cut = (found_after_vetoes['network/reweighted_snr'] > found_trig_max_bestnr)
 
 # Check louder than on source
-found_trig_loud_on_bestnr = np.empty(len(injs['network/event_id']))
+found_trig_loud_on_bestnr = np.empty(len(found_after_vetoes['network/event_id']))
 if onsource_file:
     found_trig_loud_on_bestnr.fill(loud_on_bestnr)
 else:
-    found_trig_loud_on_bestnr.fill(med_snr)
-on_bestnr_cut = injs['network/reweighted_snr'] > found_trig_loud_on_bestnr
+    found_trig_loud_on_bestnr.fill(median_bestnr)
+on_bestnr_cut = found_after_vetoes['network/reweighted_snr'] > found_trig_loud_on_bestnr
 
 # Check whether injection is found for the purposes of exclusion
 # distance calculation.
 # Found: if louder than all on source
 # Missed: if not louder than loudest on source
 found_excl = on_bestnr_cut & (more_sig_than_onsource) & \
-            (injs['network/reweighted_snr'] != 0)
+            (found_after_vetoes['network/reweighted_snr'] != 0)
 # If not missed, double check bestnr against nearby triggers
 near_test = np.zeros((found_excl).sum()).astype(bool)
-for j, (t, bestnr) in enumerate(zip(injs['found/tc'][found_excl],
-                                injs['network/reweighted_snr'][found_excl])):
+for j, (t, bestnr) in enumerate(zip(found_after_vetoes['found/tc'][found_excl],
+                                found_after_vetoes['network/reweighted_snr'][found_excl])):
     # 0 is the zero-lag timeslide
     near_bestnr = \
-        trig_bestnr[0][np.abs(trig_time[0]-t) < cluster_window]
+        trig_data[keys[1]][0][np.abs(trig_data[keys[0]][0]-t) < cluster_window]
     near_test[j] = ~((near_bestnr * glitch_check_fac > bestnr).any())
 # Apply the local test
 c = 0
@@ -528,6 +542,7 @@ logging.info("Found/missed injection efficiency calculations completed.")
 # ==========
 # Make plots
 # ==========
+logging.info("Plotting.")
 # Calculate distances (horizontal axis) as means
 dist_plot_vals = [np.asarray(dist_bin).mean() for dist_bin in dist_bins]
 
@@ -578,7 +593,7 @@ yerr_low, yerr_high, fraction_mc = \
 red_efficiency = (fraction_mc) - (yerr_low) * scipy.stats.norm.isf(0.1)
 
 # Calculate and save to disk 50% and 90% exclusion distances
-# excl_dist dictionary contains 50% and 90% exclusion distances 
+# excl_dist dictionary contains 50% and 90% exclusion distances
 excl_dist = {}
 for percentile in [50, 90]:
     eff_idx = np.where(red_efficiency < (percentile / 100.))[0]
diff --git a/bin/pygrb/pycbc_pygrb_page_tables b/bin/pygrb/pycbc_pygrb_page_tables
index 6d53fc14efa..b2e7e18dc79 100755
--- a/bin/pygrb/pycbc_pygrb_page_tables
+++ b/bin/pygrb/pycbc_pygrb_page_tables
@@ -54,7 +54,7 @@ def additional_injection_data(data, ifos):
     eff_dist = 0
     for ifo in ifos:
         antenna = Detector(ifo)
-        data['eff_dist_%s' % ifo] = antenna.effective_distance(
+        data['eff_dist_'+ifo] = antenna.effective_distance(
                                     data['distance'],
                                     data['ra'],
                                     data['dec'],
@@ -62,13 +62,13 @@ def additional_injection_data(data, ifos):
                                     data['tc'],
                                     data['inclination']
                                     )
-        eff_dist += 1.0 / data['eff_dist_%s' % ifo]
+        eff_dist += 1.0 / data['eff_dist_'+ifo]
     data['eff_dist'] = 1.0 / eff_dist
 
     return data
 
 
-def load_missed_found_injections(hdf_file, ifos, snr_threshold, bank_file,
+def load_missed_found_injections(hdf_file, ifos, bank_file, snr_threshold=None,
                                  background_bestnrs=None):
     """Loads found and missed injections from an hdf file as two dictionaries
 
@@ -77,18 +77,20 @@ def load_missed_found_injections(hdf_file, ifos, snr_threshold, bank_file,
     hdf_file: str
         File path
     ifos: list
-    snr_threshold: float
-        NewSNR threshold
-    bank_file: HFile object
-    background_bestnrs: numpy.array, optional
-        Used to compute FAP of quiet injections.
+    bank_file: h5py.File object
+    snr_threshold: float, optional [default: None]
+        Reweighted SNR threshold
+    background_bestnrs: numpy.array, optional [default: None]
+        Used to compute FAP of quiet injections
 
     Returns
     -------
     data: tuple of dictionaries
-        Found and missed injection parameter dictionaries.
+        Found, missed, and missed after the cut in reweighted SNR injection
+        parameter dictionaries.
     """
 
+    logging.info('Loading injections...')
     inj_data = HFile(hdf_file, 'r')
     inj_params = ['mass1', 'mass2', 'distance', 'inclination', 'ra', 'dec',
                   'polarization', 'spin1x', 'spin1y', 'spin1z', 'spin2x',
@@ -96,12 +98,11 @@ def load_missed_found_injections(hdf_file, ifos, snr_threshold, bank_file,
     found_data = {}
     # Missed injections (ones not recovered at all)
     missed_data = {}
-    logging.info('Loading injections...')
 
     # Load injections parameters
     for param in inj_params:
-        missed_data[param] = inj_data['missed/%s' % param][...]
-        found_data[param] = inj_data['found/%s' % param][...]
+        missed_data[param] = inj_data['missed/'+param][...]
+        found_data[param] = inj_data['found/'+param][...]
 
     # Calculate effective distance for the ifos
     found_data = additional_injection_data(found_data, ifos)
@@ -110,7 +111,7 @@ def load_missed_found_injections(hdf_file, ifos, snr_threshold, bank_file,
     # Get recovered parameters and statistic values for the found injections
     # Recovered parameters
     for param in ['mass1', 'mass2', 'spin1z', 'spin2z']:
-        found_data['rec_%s' % param] = \
+        found_data['rec_'+param] = \
             np.array(bank_file[param])[inj_data['network/template_id']]
     found_data['time_diff'] = \
         found_data['tc'] - inj_data['network/end_time_gc'][...]
@@ -122,26 +123,40 @@ def load_missed_found_injections(hdf_file, ifos, snr_threshold, bank_file,
     found_data['rec_dec'] = inj_data['network/dec'][...]
     # Statistics values
     for param in ['coherent_snr', 'reweighted_snr', 'null_snr']:
-        found_data[param] = inj_data['network/%s' % param][...]
+        found_data[param] = inj_data['network/'+param][...]
     found_data['chisq'] = inj_data['network/my_network_chisq'][...]
     found_data['nifos'] = inj_data['network/nifo'][...].astype(int)
     for ifo in ifos:
         if np.all(inj_data['network/event_id'][...] ==
-                  inj_data['%s/event_id' % ifo][...]):
-            found_data['sigmasq_%s' % ifo] = inj_data['%s/sigmasq' % ifo][...]
-            found_data['snr_%s' % ifo] = inj_data['%s/snr' % ifo][...]
+                  inj_data[ifo+'/event_id'][...]):
+            found_data['sigmasq_'+ifo] = inj_data[ifo+'/sigmasq'][...]
+            found_data['snr_'+ifo] = inj_data[ifo+'/snr'][...]
+            found_data[ifo+'/end_time'] = inj_data[ifo+'/end_time'][...]
         else:
             # Sort the ifo event_id with respect to the network event_id
             ifo_sorted_indices = np.argsort(inj_data['network/event_id'][...][
                 np.argsort(inj_data['network/event_id'])].searchsorted(
-                    inj_data['%s/event_id' % ifo][...]))
-            found_data['sigmasq_%s' % ifo] = \
-                inj_data['%s/sigmasq' % ifo][...][ifo_sorted_indices]
-            found_data['snr_%s' % ifo] = \
-                inj_data['%s/snr' % ifo][...][ifo_sorted_indices]
+                    inj_data[ifo+'/event_id'][...]))
+            found_data['sigmasq_'+ifo] = \
+                inj_data[ifo+'/sigmasq'][...][ifo_sorted_indices]
+            found_data['snr_'+ifo] = \
+                inj_data[ifo+'/snr'][...][ifo_sorted_indices]
     # BestNRs
     found_data['bestnr'] = reweightedsnr_cut(found_data['reweighted_snr'][...],
                                              snr_threshold)
+    # Apply reweighted SNR cut
+    cut_data = {}
+    if snr_threshold:
+        logging.info("%d found injections loaded.", len(found_data[inj_params[0]]))
+        logging.info("%d missed injections loaded.", len(missed_data[inj_params[0]]))
+        logging.info("Applying reweighted SNR cut at %s.", snr_threshold)
+        rw_snr_cut = found_data['reweighted_snr'] < snr_threshold
+        for key in found_data:
+            cut_data[key] = found_data[key][rw_snr_cut]
+            found_data[key] = found_data[key][~rw_snr_cut]
+    del found_data['reweighted_snr']
+    del cut_data['reweighted_snr']
+
     if background_bestnrs is not None:
         found_data['fap'] = np.array(
                 [sum(background_bestnrs > bestnr) for bestnr in
@@ -150,15 +165,15 @@ def load_missed_found_injections(hdf_file, ifos, snr_threshold, bank_file,
     # Antenna responses
     f_resp = {}
     for ifo in ifos:
-        if sum(found_data['sigmasq_%s' % ifo] == 0):
+        if sum(found_data['sigmasq_'+ifo] == 0):
             logging.info("%s: sigmasq not set for at least one trigger.", ifo)
-        if sum(found_data['sigmasq_%s' % ifo] != 0) == 0:
+        if sum(found_data['sigmasq_'+ifo] != 0) == 0:
             logging.info("%s: sigmasq not set for any trigger.", ifo)
             if len(ifos) == 1:
                 msg = "This is a single ifo analysis. "
                 msg += "Setting sigmasq to unity for all triggers."
                 logging.info(msg)
-                found_data['sigmasq_%s' % ifo][:] = 1.0
+                found_data['sigmasq_'+ifo][:] = 1.0
         antenna = Detector(ifo)
         f_resp[ifo] = ppu.get_antenna_responses(antenna, found_data['ra'],
                                                 found_data['dec'],
@@ -166,15 +181,24 @@ def load_missed_found_injections(hdf_file, ifos, snr_threshold, bank_file,
 
     inj_sigma_mult = \
         np.asarray([f_resp[ifo] *
-                   found_data['sigmasq_%s' % ifo] for ifo in ifos])
+                   found_data['sigmasq_'+ifo] for ifo in ifos])
     inj_sigma_tot = np.sum(inj_sigma_mult, axis=0)
     for ifo in ifos:
-        found_data['inj_sigma_mean_%s' % ifo] = np.mean(
-            found_data['sigmasq_%s' % ifo] * f_resp[ifo] / inj_sigma_tot)
+        found_data['inj_sigma_mean_'+ifo] = np.mean(
+            found_data['sigmasq_'+ifo] * f_resp[ifo] / inj_sigma_tot)
     # Close the hdf file
     inj_data.close()
 
-    return found_data, missed_data
+    logging.info("%d found injections.", len(found_data['mchirp']))
+    logging.info("%d missed injections.", len(missed_data['mchirp']))
+    logging.info("%d injections cut.", len(cut_data['mchirp']))
+
+    return found_data, missed_data, cut_data
+
+
+def format_pvalue_str(pvalue, n_trials):
+    """Format p-value as a string."""
+    return f'< {(1./n_trials):.3g}' if pvalue == 0 else f'{pvalue:.3g}'
 
 
 # =============================================================================
@@ -216,7 +240,9 @@ parser.add_argument("-C", "--cluster-window", action="store", type=float,
                     default=0.1, help="The cluster window used " +
                     "to cluster triggers in time.")
 ppu.pygrb_add_bestnr_cut_opt(parser)
+ppu.pygrb_add_slide_opts(parser)
 opts = parser.parse_args()
+ppu.slide_opts_helper(opts)
 
 init_logging(opts.verbose, format="%(asctime)s: %(levelname)s: %(message)s")
 
@@ -266,84 +292,90 @@ for output_file in output_files:
         if not os.path.isdir(outdir):
             os.makedirs(outdir)
 
-# Extract IFOs and vetoes
-ifos, vetoes = ppu.extract_ifos_and_vetoes(offsource_file, opts.veto_files,
-                                           opts.veto_category)
-
-# Load triggers, time-slides, and segment dictionary
-logging.info("Loading triggers.")
-trig_data = ppu.load_triggers(offsource_file, ifos, None,
-                              rw_snr_threshold=opts.newsnr_threshold)
-logging.info("%d offsource triggers surviving reweighted SNR cut.",
-             len(trig_data['network/event_id']))
-logging.info("Loading timeslides.")
-slide_dict = ppu.load_time_slides(offsource_file)
-logging.info("Loading segments.")
-segment_dict = ppu.load_segment_dict(offsource_file)
+# Extract IFOs
+ifos = ppu.extract_ifos(offsource_file)
 
-# Calculate chirp masses of templates
-logging.info('Loading triggers template masses')
-bank_data = HFile(opts.bank_file, 'r')
-mchirps = mchirp_from_mass1_mass2(
-        bank_data['mass1'][...],
-        bank_data['mass2'][...]
-    )
+# Generate time-slides dictionary
+slide_dict = ppu.load_time_slides(offsource_file)
 
-# Construct trials
-logging.info("Constructing trials.")
-trial_dict = ppu.construct_trials(opts.seg_files, segment_dict,
-                                  ifos, slide_dict, vetoes)
-total_trials = sum([len(trial_dict[slide_id]) for slide_id in slide_dict])
-logging.info("%d trials generated.", total_trials)
-
-# Extract basic trigger properties and store as dictionaries
-trig_time, trig_snr, trig_bestnr = \
-    ppu.extract_basic_trig_properties(trial_dict, trig_data, slide_dict,
-                                      segment_dict, opts)
-# Calculate SNR and BestNR values and maxima
-time_veto_max_snr = {}
-time_veto_max_bestnr = {}
-for slide_id in slide_dict:
-    num_slide_segs = len(trial_dict[slide_id])
-    time_veto_max_snr[slide_id] = np.zeros(num_slide_segs)
-    time_veto_max_bestnr[slide_id] = np.zeros(num_slide_segs)
+# Generate segments dictionary
+segment_dict = ppu.load_segment_dict(offsource_file)
 
+# Construct trials removing vetoed times
+trial_dict, total_trials = ppu.construct_trials(opts.seg_files, segment_dict,
+                                                ifos, slide_dict,
+                                                opts.veto_file)
+
+# Load triggers (apply reweighted SNR cut, not vetoes)
+trig_data = ppu.load_data(offsource_file, ifos, data_tag='offsource',
+                          rw_snr_threshold=opts.newsnr_threshold,
+                          slide_id=opts.slide_id)
+
+# Extract needed trigger properties and store them as dictionaries
+# Based on trial_dict: if vetoes were applied, trig_* are the veto survivors
+# _av stands for after vetoes
+keys = ['network/end_time_gc', 'network/coherent_snr', 'network/reweighted_snr']
+trig_data_av = ppu.extract_trig_properties(
+    trial_dict,
+    trig_data,
+    slide_dict,
+    segment_dict,
+    keys
+)
+
+# Max SNR and BestNR values in each trial: these are stored in dictionaries
+# keyed by slide_id, as arrays indexed by trial number
+background_snr = {k: np.zeros(len(v)) for k,v in trial_dict.items()}
+background = {k: np.zeros(len(v)) for k,v in trial_dict.items()}
 for slide_id in slide_dict:
+    trig_times = trig_data_av[keys[0]][slide_id]
     for j, trial in enumerate(trial_dict[slide_id]):
-        trial_cut = (trial[0] <= trig_time[slide_id])\
-                          & (trig_time[slide_id] < trial[1])
+        # True whenever the trigger is in the trial
+        trial_cut = (trial[0] <= trig_times) & (trig_times < trial[1])
         if not trial_cut.any():
             continue
         # Max SNR
-        time_veto_max_snr[slide_id][j] = \
-            max(trig_snr[slide_id][trial_cut])
+        background_snr[slide_id][j] = \
+            max(trig_data_av[keys[1]][slide_id][trial_cut])
         # Max BestNR
-        time_veto_max_bestnr[slide_id][j] = \
-            max(trig_bestnr[slide_id][trial_cut])
-        # Max SNR for triggers passing SBVs
-        sbv_cut = trig_bestnr[slide_id] != 0
-        if not (trial_cut & sbv_cut).any():
-            continue
+        background[slide_id][j] = \
+            max(trig_data_av[keys[2]][slide_id][trial_cut])
+
+# Max and median values of reweighted SNR,
+# and sorted (loudest in trial) reweighted SNR values
+max_bestnr, median_bestnr, sorted_bkgd =\
+    ppu.max_median_stat(slide_dict, background,
+                        trig_data_av[keys[2]], total_trials)
+assert total_trials == len(sorted_bkgd)
+
+# Median value of SNR
+_, median_snr, _ = ppu.max_median_stat(slide_dict, background_snr,
+                                       trig_data_av[keys[1]], total_trials)
 
-logging.info("SNR and bestNR maxima calculated.")
+logging.info("Background SNR and bestNR of trials calculated.")
 
-# Output details of loudest offsouce triggers, sorted by BestNR
+# Output details of loudest offsouce triggers: only triggers compatible
+# with the trial_dict are considered
 offsource_trigs = []
 sorted_trigs = ppu.sort_trigs(trial_dict, trig_data, slide_dict, segment_dict)
 for slide_id in slide_dict:
-    offsource_trigs.extend(zip(trig_bestnr[slide_id],
-                               sorted_trigs[slide_id]))
-
+    offsource_trigs.extend(
+        zip(trig_data_av[keys[2]][slide_id], sorted_trigs[slide_id])
+    )
 offsource_trigs.sort(key=lambda element: element[0])
 offsource_trigs.reverse()
 
-# Median and max values of SNR and BestNR
-_, median_snr, _ = ppu.max_median_stat(slide_dict, time_veto_max_snr,
-                                       trig_snr, total_trials)
-max_bestnr, median_bestnr, full_time_veto_max_bestnr =\
-    ppu.max_median_stat(slide_dict, time_veto_max_bestnr, trig_bestnr,
-                        total_trials)
+# Calculate chirp masses of templates
+logging.info('Loading triggers template masses')
+bank_data = h5py.File(opts.bank_file, 'r')
+template_mchirps = mchirp_from_mass1_mass2(
+        bank_data['mass1'][...],
+        bank_data['mass2'][...]
+    )
 
+# =========================================
+# Output of loudest offsource triggers data
+# =========================================
 if lofft_outfile:
     # td: table data
     td = []
@@ -355,7 +387,7 @@ if lofft_outfile:
         trig_index = \
             np.where(trig_data['network/event_id'] == trig_id)[0][0]
         ifo_trig_index = {
-            ifo: np.where(trig_data['%s/event_id' % ifo] == trig_id)[0][0]
+            ifo: np.where(trig_data[ifo+'/event_id'] == trig_id)[0][0]
             for ifo in ifos
         }
         trig_slide_id = int(trig_data['network/slide_id'][trig_index])
@@ -370,18 +402,13 @@ if lofft_outfile:
             chunk_num = 'No trial'
 
         # Get FAP of trigger
-        num_trials_louder = 0
-        for slide_id in slide_dict:
-            for val in time_veto_max_bestnr[slide_id]:
-                if val > bestnr:
-                    num_trials_louder += 1
-        fap = num_trials_louder/total_trials
-        pval = '< %.3g' % (1./total_trials) if fap == 0 else '%.3g' % fap
+        pval = sum(sorted_bkgd > bestnr) / total_trials
+        pval = format_pvalue_str(pval, total_trials)
         d = [chunk_num, trig_slide_id, pval,
              trig_data['network/end_time_gc'][trig_index],
              bank_data['mass1'][trig_data['network/template_id'][trig_index]],
              bank_data['mass2'][trig_data['network/template_id'][trig_index]],
-             mchirps[trig_index],
+             template_mchirps[trig_data['network/template_id'][trig_index]],
              bank_data['spin1z'][trig_data['network/template_id'][trig_index]],
              bank_data['spin2z'][trig_data['network/template_id'][trig_index]],
              trig_data['network/ra'][trig_index],
@@ -389,7 +416,7 @@ if lofft_outfile:
              trig_data['network/coherent_snr'][trig_index],
              trig_data['network/my_network_chisq'][trig_index],
              trig_data['network/null_snr'][trig_index]]
-        d.extend([trig_data['%s/snr' % ifo][ifo_trig_index[ifo]]
+        d.extend([trig_data[ifo+'/snr'][ifo_trig_index[ifo]]
                   for ifo in ifos])
         d.extend([slide_dict[trig_slide_id][ifo] for ifo in ifos])
         d.append(bestnr)
@@ -399,8 +426,8 @@ if lofft_outfile:
     th = ['Trial', 'Slide Num', 'p-value', 'GPS time',
           'Rec. m1', 'Rec. m2', 'Rec. Mc', 'Rec. spin1z', 'Rec. spin2z',
           'Rec. RA', 'Rec. Dec', 'SNR', 'Chi^2', 'Null SNR']
-    th.extend(['%s SNR' % ifo for ifo in ifos])
-    th.extend(['%s time shift (s)' % ifo for ifo in ifos])
+    th.extend([ifo+' SNR' for ifo in ifos])
+    th.extend([ifo+' time shift (s)' for ifo in ifos])
     th.append('BestNR')
 
     # To ensure desired formatting in the h5 file and html table:
@@ -409,14 +436,14 @@ if lofft_outfile:
 
     # Write to h5 file
     logging.info("Writing %d loudest offsource triggers to h5 file.",
-                 len(td))
+                 len(td[0]))
     lofft_h5_fp = HFile(lofft_h5_outfile, 'w')
     for i, key in enumerate(th):
         lofft_h5_fp.create_dataset(key, data=td[i])
     lofft_h5_fp.close()
 
     # Write to html file
-    logging.info("Writing %d loudest triggers to html file.", len(td))
+    logging.info("Writing %d loudest triggers to html file.", len(td[0]))
 
     # To ensure desired formatting in the html table:
     # 2) convert the columns to numpy arrays
@@ -451,7 +478,7 @@ if lofft_outfile:
     # end of an observing run collectively
     # TODO: Needs a final place in the results webpage
     # np.savetxt('%s/bestnr_vs_fap_numbers.txt' %(outdir),
-    #            full_time_veto_max_bestnr, delimiter='/t')
+    #            sorted_bkgd, delimiter='/t')
 
 
 # =======================
@@ -460,8 +487,9 @@ if lofft_outfile:
 if onsource_file:
 
     # Get trigs
-    on_trigs = ppu.load_triggers(onsource_file, ifos, None,
-                                 rw_snr_threshold=opts.newsnr_threshold)
+    on_trigs = ppu.load_data(onsource_file, ifos, data_tag=None,
+                             rw_snr_threshold=opts.newsnr_threshold,
+                             slide_id='all')
 
     # Record loudest trig by BestNR
     loud_on_bestnr = 0
@@ -483,30 +511,21 @@ if onsource_file:
     td = []
 
     # Gather data
-    loud_on_fap = 1
     if loud_on_bestnr_trigs:
         trig_id = loud_on_bestnr_trigs
         trig_index = np.where(on_trigs['network/event_id'] == trig_id)[0][0]
         ifo_trig_index = {
-            ifo: np.where(on_trigs['%s/event_id' % ifo] == trig_id)[0][0]
+            ifo: np.where(on_trigs[ifo+'/event_id'] == trig_id)[0][0]
             for ifo in ifos
         }
         num_trials_louder = 0
-        tot_off_snr = np.array([])
-        for slide_id in slide_dict:
-            num_trials_louder += sum(time_veto_max_bestnr[slide_id] >
-                                     loud_on_bestnr)
-            tot_off_snr = np.concatenate([tot_off_snr,
-                                          time_veto_max_bestnr[slide_id]])
-        fap = num_trials_louder/total_trials
-        fap_test = sum(tot_off_snr > loud_on_bestnr)/total_trials
-        pval = '< %.3g' % (1./total_trials) if fap == 0 else '%.3g' % fap
-        loud_on_fap = fap
+        pval = sum(sorted_bkgd > loud_on_bestnr)/total_trials
+        pval = format_pvalue_str(pval, total_trials)
         d = [pval,
              on_trigs['network/end_time_gc'][trig_index],
              bank_data['mass1'][on_trigs['network/template_id'][trig_index]],
              bank_data['mass2'][on_trigs['network/template_id'][trig_index]],
-             mchirps[on_trigs['network/template_id'][trig_index]],
+             template_mchirps[on_trigs['network/template_id'][trig_index]],
              bank_data['spin1z'][on_trigs['network/template_id'][trig_index]],
              bank_data['spin2z'][on_trigs['network/template_id'][trig_index]],
              on_trigs['network/ra'][trig_index],
@@ -514,7 +533,7 @@ if onsource_file:
              on_trigs['network/coherent_snr'][trig_index],
              on_trigs['network/my_network_chisq'][trig_index],
              on_trigs['network/null_snr'][trig_index]] + \
-            [on_trigs['%s/snr' % ifo][ifo_trig_index[ifo]] for ifo in ifos] + \
+            [on_trigs[ifo+'/snr'][ifo_trig_index[ifo]] for ifo in ifos] + \
             [loud_on_bestnr]
         td.append(d)
     else:
@@ -524,7 +543,7 @@ if onsource_file:
     # Table header
     th = ['p-value', 'GPS time', 'Rec. m1', 'Rec. m2', 'Rec. Mc',
           'Rec. spin1z', 'Rec. spin2z', 'Rec. RA', 'Rec. Dec', 'SNR', 'Chi^2',
-          'Null SNR'] + ['%s SNR' % ifo for ifo in ifos] + ['BestNR']
+          'Null SNR'] + [ifo+' SNR' for ifo in ifos] + ['BestNR']
 
     td = list(zip(*td))
 
@@ -555,51 +574,45 @@ if onsource_file:
     pycbc.results.save_fig_with_metadata(str(html_table), lont_outfile,
                                          **kwds)
 
-else:
-    tot_off_snr = np.array([])
-    for slide_id in slide_dict:
-        tot_off_snr = np.concatenate([tot_off_snr,
-                                      time_veto_max_bestnr[slide_id]])
-    med_snr = np.median(tot_off_snr)
-    fap = sum(tot_off_snr > med_snr)/total_trials
-
 # =======================
 # Post-process injections
 # =======================
 if found_missed_file is not None:
-    found_injs, missed_injs = load_missed_found_injections(
-        found_missed_file, ifos, opts.newsnr_threshold, bank_data,
-        background_bestnrs=full_time_veto_max_bestnr)
-    logging.info("Missed/found injections/triggers loaded.")
-    logging.info("%d found injections found.", len(found_injs['mchirp']))
-    logging.info("%d missed injections found.", len(missed_injs['mchirp']))
+    # Load injections applying reweighted SNR cut
+    found_injs, missed_injs, cut_injs = load_missed_found_injections(
+        found_missed_file, ifos, bank_data,
+        snr_threshold=opts.newsnr_threshold,
+        background_bestnrs=sorted_bkgd
+    )
+
+    # Split in injections found surviving vetoes and ones found but vetoed
+    found_after_vetoes, vetoed, *_ = ppu.apply_vetoes_to_found_injs(
+        found_missed_file,
+        found_injs,
+        ifos,
+        veto_file=opts.veto_file
+    )
+
     # Construct conditions for injection:
     # 1) found louder than background,
-    zero_fap = found_injs['bestnr'] > max_bestnr
+    zero_fap = found_after_vetoes['bestnr'] > max_bestnr
 
     # 2) found (bestnr > 0) but not louder than background (non-zero FAP)
-    nonzero_fap = ~zero_fap & (found_injs['bestnr'] != 0)
-
-    # 3) missed after being recovered (i.e., vetoed)
-    # -- > question: is there ever another way this happens other than veto?
-    # vetoed_trigs = (~zero_fap) & (~nonzero_fap)
-    vetoed_trigs = found_injs['bestnr'] == 0
+    nonzero_fap = ~zero_fap & (found_after_vetoes['bestnr'] != 0)
 
-    logging.info("%d found injections analysed.", len(found_injs['mchirp']))
+    # 3) missed after being recovered: vetoed (these have bestnr = 0)
 
     # Avoids a problem with formatting in the non-static html output file
-    missed_na = [-0] * len(missed_injs['mchirp'])
-
-    logging.info("%d missed injections analysed.", len(missed_injs['mchirp']))
+    #missed_na = [-0] * len(missed_injs['mchirp'])
 
     # Write quiet triggers to file
     sites = [ifo[0] for ifo in ifos]
-    th = ['Dist'] + ['Eff. Dist. %s' % site for site in sites] +\
+    th = ['Dist'] + ['Eff. Dist. '+site for site in sites] +\
          ['GPS time', 'GPS time - Rec. Time'] +\
          ['Inj. m1', 'Inj. m2', 'Inj. Mc', 'Rec. m1', 'Rec. m2', 'Rec. Mc',
           'Inj. inc', 'Inj. RA', 'Inj. Dec', 'Rec. RA', 'Rec. Dec', 'SNR',
           'Chi^2', 'Null SNR'] +\
-         ['SNR %s' % ifo for ifo in ifos] +\
+         ['SNR '+ifo for ifo in ifos] +\
          ['BestNR', 'Inj S1x', 'Inj S1y', 'Inj S1z',
                     'Inj S2x', 'Inj S2y', 'Inj S2z',
                     'Rec S1z', 'Rec S2z']
@@ -617,43 +630,28 @@ if found_missed_file is not None:
                            '##.##', '##.##', '##.##',
                            '##.##', '##.##', '##.##',
                            '##.##', '##.##'])
-    sngl_snr_keys = ['snr_%s' % ifo for ifo in ifos]
+    sngl_snr_keys = ['snr_'+ifo for ifo in ifos]
     keys = ['distance']
-    keys += ['eff_dist_%s' % ifo for ifo in ifos]
+    keys += ['eff_dist_'+ifo for ifo in ifos]
     keys += ['tc', 'time_diff', 'mass1', 'mass2', 'mchirp', 'rec_mass1',
              'rec_mass2', 'rec_mchirp', 'inclination', 'ra', 'dec', 'rec_ra',
              'rec_dec', 'coherent_snr', 'chisq', 'null_snr']
     keys += sngl_snr_keys
     keys += ['bestnr', 'spin1x', 'spin1y', 'spin1z', 'spin2x', 'spin2y',
              'spin2z', 'rec_spin1z', 'rec_spin2z']
-    # The following parameters are available only for recovered injections
-    na_keys = ['time_diff', 'rec_mass1', 'rec_mass2', 'rec_mchirp',
-               'rec_spin1z', 'rec_spin2z', 'rec_ra', 'rec_dec', 'coherent_snr',
-               'chisq', 'null_snr', 'bestnr']
-    na_keys += sngl_snr_keys
-    td = []
-    for key in keys:
-        if key in na_keys:
-            td += [np.concatenate((found_injs[key][nonzero_fap],
-                                   found_injs[key][vetoed_trigs],
-                                   missed_na))]
-        else:
-            td += [np.concatenate((found_injs[key][nonzero_fap],
-                                   found_injs[key][vetoed_trigs],
-                                   missed_injs[key]))]
+    td = [found_after_vetoes[key][nonzero_fap] for key in keys]
     td = list(zip(*td))
     td.sort(key=lambda elem: elem[0])
+    logging.info("Writing %d quiet-found injections to h5 and html files.",
+                 len(td))
     td = list(zip(*td))
 
     # Write to h5 file
-    logging.info("Writing %d quiet-found injections to h5 file.", len(td))
     with HFile(qf_h5_outfile, 'w') as qf_h5_fp:
         for i, key in enumerate(th):
             qf_h5_fp.create_dataset(key, data=td[i])
 
     # Write to html file
-    logging.info("Writing %d quiet-found injections to html file.",
-                 len(td))
     td = [np.asarray(d) for d in td]
     html_table = pycbc.results.html_table(td, th,
                                           format_strings=format_strings,
@@ -665,15 +663,12 @@ if found_missed_file is not None:
     pycbc.results.save_fig_with_metadata(str(html_table), qf_outfile,
                                          **kwds)
 
-    # Write to html file
-    t_missed = []
-    for key in keys:
-        t_missed += [found_injs[key][vetoed_trigs]]
+    # Write quiet triggers to html file
+    t_missed = [np.concatenate((vetoed[key], cut_injs[key])) for key in keys]
     t_missed = list(zip(*t_missed))
     t_missed.sort(key=lambda elem: elem[0])
     logging.info("Writing %d missed-found injections to html file.",
                  len(t_missed))
-
     t_missed = zip(*t_missed)
     t_missed = [np.asarray(d) for d in t_missed]
     html_table = pycbc.results.html_table(t_missed, th,
@@ -681,8 +676,8 @@ if found_missed_file is not None:
                                           page_size=20)
     kwds = {'title': "Missed found injections",
             'caption': "Recovered parameters and statistic values of \
-            injections that are recovered, but downwieghted to BestNR = 0 \
-            (i.e., vetoed).",
+            injections that are recovered, but with reweighted SNR \
+            below threshold or vetoed.",
             'cmd': ' '.join(sys.argv), }
     pycbc.results.save_fig_with_metadata(str(html_table), mf_outfile,
                                          **kwds)