From d486ec29d4c52963a3cbff691ce361200475eed9 Mon Sep 17 00:00:00 2001 From: Gareth S Cabourn Davies Date: Tue, 11 Jun 2024 14:55:03 +0100 Subject: [PATCH] Ranking statistic for live singles (#4689) * Allow the live single trigger fits to use ranking statistic rather than sngl-ranking * inbin is no longer all the events above threshold, plotting to indicate with case where no triggers are found * deal better with cases where there are no triggers * Use ranking statistic for single-detector events * Fix some errors * fix some statistics so they can produce single-detector events * Some codeclimate suggestions * get fit coeff files into CI, set a maximum IFAR for singles * alter the CI example run * Codeclimate suggestions * Line too long * minor tweaks * Used shared code * Fix broken fixing * missed that this needs the module * typo * calculate plotmax earlier and use it to decide the histogram bins * Update bin/live/pycbc_live_plot_single_significance_fits Co-authored-by: Tito Dal Canton * TDC comments * Update threshold naming and description * update argument in example * Please do not look at the previous commit and see how much of an idiot I am * Update bin/live/pycbc_live_combine_single_significance_fits Co-authored-by: Tito Dal Canton --------- Co-authored-by: Tito Dal Canton --- ...ycbc_live_combine_single_significance_fits | 19 ++- ...ive_plot_combined_single_significance_fits | 2 +- .../pycbc_live_plot_single_significance_fits | 50 ++++-- bin/live/pycbc_live_single_significance_fits | 74 ++++++--- examples/live/make_fit_coeffs.py | 27 ++++ ...e.py => make_singles_significance_fits.py} | 2 +- examples/live/run.sh | 33 ++-- pycbc/events/single.py | 144 +++++++++++++----- pycbc/events/stat.py | 16 +- 9 files changed, 266 insertions(+), 101 deletions(-) create mode 100644 examples/live/make_fit_coeffs.py rename examples/live/{make_singles_fits_file.py => make_singles_significance_fits.py} (97%) diff --git a/bin/live/pycbc_live_combine_single_significance_fits b/bin/live/pycbc_live_combine_single_significance_fits index 857c1d84a8a..e47a5433805 100644 --- a/bin/live/pycbc_live_combine_single_significance_fits +++ b/bin/live/pycbc_live_combine_single_significance_fits @@ -62,8 +62,10 @@ for f in args.trfits_files: bl = fit_f['bins_lower'][:] bu = fit_f['bins_upper'][:] sngl_rank = fit_f.attrs['sngl_ranking'] - fit_thresh = fit_f.attrs['fit_threshold'] - fit_func = fit_f.attrs['fit_function'] + fit_thresh = {ifo: fit_f[ifo].attrs['fit_threshold'] + for ifo in args.ifos} + fit_func = {ifo: fit_f[ifo].attrs['fit_function'] + for ifo in args.ifos} # Now go back through the fit files and read the actual information. Skip the # files that have fit parameters inconsistent with what we found earlier. @@ -80,9 +82,13 @@ for f in args.trfits_files: with h5py.File(f, 'r') as fits_f: # Check that the file uses the same setup as file 0, to make sure # all coefficients are comparable + new_fit_func = {ifo: fits_f[ifo].attrs['fit_function'] + for ifo in args.ifos} + new_fit_thresh = {ifo: fits_f[ifo].attrs['fit_threshold'] + for ifo in args.ifos} same_conf = (fits_f.attrs['sngl_ranking'] == sngl_rank - and fits_f.attrs['fit_threshold'] == fit_thresh - and fits_f.attrs['fit_function'] == fit_func + and new_fit_thresh == fit_thresh + and new_fit_func == fit_func and fits_f['bins_lower'].size == bl.size and all(fits_f['bins_lower'][:] == bl) and all(fits_f['bins_upper'][:] == bu)) @@ -99,7 +105,7 @@ for f in args.trfits_files: gps_last = 0 gps_first = np.inf for ifo in args.ifos: - if ifo not in fits_f: + if ifo not in fits_f or 'triggers' not in fits_f[ifo]: continue trig_times = fits_f[ifo]['triggers']['end_time'][:] gps_last = max(gps_last, trig_times.max()) @@ -143,7 +149,6 @@ cons_counts_out = {ifo: np.inf * np.ones(len(alphas_bin[ifo])) for ifo in args.i logging.info("Writing results") fout = h5py.File(args.output, 'w') -fout.attrs['fit_threshold'] = fit_thresh fout.attrs['conservative_percentile'] = args.conservative_percentile fout.attrs['ifos'] = args.ifos fout['bins_edges'] = list(bl) + [bu[-1]] @@ -152,6 +157,8 @@ fout['fits_dates'] = ad + start_time_n for ifo in args.ifos: logging.info(ifo) fout_ifo = fout.create_group(ifo) + fout_ifo.attrs['fit_threshold'] = fit_thresh[ifo] + fout_ifo.attrs['fit_function'] = fit_func[ifo] l_times = np.array(live_times[ifo]) total_time = l_times.sum() fout_ifo.attrs['live_time'] = total_time diff --git a/bin/live/pycbc_live_plot_combined_single_significance_fits b/bin/live/pycbc_live_plot_combined_single_significance_fits index b14072f7887..db06e881c4b 100644 --- a/bin/live/pycbc_live_plot_combined_single_significance_fits +++ b/bin/live/pycbc_live_plot_combined_single_significance_fits @@ -142,7 +142,7 @@ for ifo in ifos: continue l_times = separate_times[ifo] - with np.errstate(divide='ignore'): + with np.errstate(divide='ignore', invalid='ignore'): rate = counts / l_times ma = mean_alpha[ifo][i] diff --git a/bin/live/pycbc_live_plot_single_significance_fits b/bin/live/pycbc_live_plot_single_significance_fits index 4caad3073c4..c4b74e3de06 100644 --- a/bin/live/pycbc_live_plot_single_significance_fits +++ b/bin/live/pycbc_live_plot_single_significance_fits @@ -71,9 +71,12 @@ with h5py.File(args.trigger_fits_file, 'r') as trfit_f: ifos = [k for k in trfit_f.keys() if not k.startswith('bins')] # Grab some info from the attributes - sngl_ranking = trfit_f.attrs['sngl_ranking'] - fit_threshold = trfit_f.attrs['fit_threshold'] - fit_function = trfit_f.attrs['fit_function'] + if trfit_f.attrs['ranking_statistic'] in ['quadsum','single_ranking_only']: + x_label = trfit_f.attrs['sngl_ranking'] + else: + x_label = "Ranking statistic" + fit_threshold = {ifo: trfit_f[ifo].attrs['fit_threshold'] for ifo in ifos} + fit_function = {ifo: trfit_f[ifo].attrs['fit_function'] for ifo in ifos} analysis_date = trfit_f.attrs['analysis_date'] # Get the triggers for each detector @@ -81,7 +84,9 @@ with h5py.File(args.trigger_fits_file, 'r') as trfit_f: stats = {ifo: {} for ifo in ifos} durations = {ifo: {} for ifo in ifos} for ifo in ifos: - stats[ifo] = trfit_f[ifo]['triggers'][sngl_ranking][:] + if 'triggers' not in trfit_f[ifo]: + continue + stats[ifo] = trfit_f[ifo]['triggers']['stat'][:] durations[ifo] = trfit_f[ifo]['triggers']['template_duration'][:] live_time = {ifo: trfit_f[ifo].attrs['live_time'] for ifo in ifos} alphas = {ifo: trfit_f[ifo]['fit_coeff'][:] for ifo in ifos} @@ -124,7 +129,10 @@ for ifo in all_ifos: maxstat = stats[ifo].max() max_rate = 0 - plotbins = np.linspace(fit_threshold, 1.05 * maxstat, 400) + statrange = maxstat - max(stats[ifo].min(), fit_threshold[ifo]) + plotmax = maxstat + statrange * 0.05 + + plotbins = np.linspace(fit_threshold[ifo], plotmax, 400) logging.info("Putting events into bins") event_bin = np.array([tbins[d] for d in durations[ifo]]) @@ -134,8 +142,26 @@ for ifo in all_ifos: binlabel = f"{lower:.3g} - {upper:.3g}" inbin = event_bin == bin_num + bin_prop = bin_num / len(tbins) + bin_colour = plt.get_cmap(args.colormap)(bin_prop) + # Skip if there are no triggers in this bin in this IFO - if not any(inbin): + if not any(inbin) or alphas[ifo][bin_num] == -1: + ax.plot( + [], + [], + linewidth=2, + color=bin_colour, + label=binlabel, + alpha=0.6 + ) + ax.plot( + [], + [], + "--", + color=bin_colour, + label="No triggers" + ) continue binned_sngl_stats = stats[ifo][event_bin == bin_num] @@ -147,15 +173,13 @@ for ifo in all_ifos: max_rate = max(max_rate, cum_rate[0]) ecf = eval_cum_fit( - fit_function, + fit_function[ifo], plotbins, alphas[ifo][bin_num], - fit_threshold + fit_threshold[ifo] ) cum_fit = counts[ifo][bin_num] / live_time[ifo] * ecf - bin_prop = bin_num / len(tbins) - bin_colour = plt.get_cmap(args.colormap)(bin_prop) ax.plot(edges[:-1], cum_rate, linewidth=2, color=bin_colour, label=binlabel, alpha=0.6) ax.plot(plotbins, cum_fit, "--", color=bin_colour, @@ -163,14 +187,14 @@ for ifo in all_ifos: ax.semilogy() ax.grid() ax.set_xlim( - fit_threshold if args.x_lim_lower is None else args.x_lim_lower, - 1.05 * maxstat if args.x_lim_upper is None else args.x_lim_upper + fit_threshold[ifo] if args.x_lim_lower is None else args.x_lim_lower, + plotmax if args.x_lim_upper is None else args.x_lim_upper ) ax.set_ylim( 0.5 / live_time[ifo] if args.y_lim_lower is None else args.y_lim_lower, 1.5 * max_rate if args.y_lim_upper is None else args.y_lim_upper ) - ax.set_xlabel(sngl_ranking) + ax.set_xlabel(x_label) ax.set_ylabel("Number of louder triggers per live time") title = f"{ifo} {analysis_date} trigger fits" ax.set_title(title) diff --git a/bin/live/pycbc_live_single_significance_fits b/bin/live/pycbc_live_single_significance_fits index d6e79cdc1f7..5fa50804032 100644 --- a/bin/live/pycbc_live_single_significance_fits +++ b/bin/live/pycbc_live_single_significance_fits @@ -25,10 +25,11 @@ import h5py import pycbc from pycbc.bin_utils import IrregularBins -from pycbc.events import cuts, trigger_fits as trstats +from pycbc.events import cuts, trigger_fits as trstats, stat from pycbc.io import DictArray from pycbc.events import ranking from pycbc.events.coinc import cluster_over_time +from pycbc.types import MultiDetOptionAction def duration_bins_from_cli(args): @@ -75,6 +76,7 @@ parser.add_argument("--file-identifier", default="H1L1V1-Live", help="String required in filename to be considered for " "analysis. Default: 'H1L1V1-Live'.") parser.add_argument("--fit-function", default="exponential", + action=MultiDetOptionAction, choices=["exponential", "rayleigh", "power"], help="Functional form for the maximum likelihood fit. " "Choose from exponential, rayleigh or power. " @@ -112,18 +114,20 @@ parser.add_argument("--prune-stat-threshold", type=float, help="Minimum --sngl-ranking value to consider a " "trigger for pruning.") parser.add_argument("--fit-threshold", type=float, default=5, + action=MultiDetOptionAction, help="Lower threshold used in fitting the triggers." - "Default 5.") + "Default 5. Can be supplied as a single value, " + "or as a set of IFO:value pairs, e.g. H1:0:, L1:-1") parser.add_argument("--cluster", action='store_true', help="Only use maximum of the --sngl-ranking value " "from each file.") parser.add_argument("--output", required=True, help="File in which to save the output trigger fit " "parameters.") -parser.add_argument("--sngl-ranking", default="newsnr", - choices=ranking.sngls_ranking_function_dict.keys(), - help="The single-detector trigger ranking to use.") - +stat.insert_statistic_option_group( + parser, + default_ranking_statistic='single_ranking_only' +) cuts.insert_cuts_option_group(parser) args = parser.parse_args() @@ -133,8 +137,11 @@ pycbc.init_logging(args.verbose) # Check input options # Pruning options are mutually required or not needed -prune_options = [args.prune_loudest, args.prune_window, - args.prune_stat_threshold] +prune_options = [ + args.prune_loudest is not None, + args.prune_window is not None, + args.prune_stat_threshold is not None +] if any(prune_options) and not all(prune_options): parser.error("Require all or none of --prune-loudest, " @@ -162,6 +169,9 @@ if args.duration_bin_end and \ "--duration-bin-start, got " f"{args.duration_bin_end} and {args.duration_bin_start}") + +stat_kwargs = stat.parse_statistic_keywords_opt(args.statistic_keywords) + duration_bin_edges = duration_bins_from_cli(args) logging.info("Duration bin edges: %s", duration_bin_edges) @@ -177,13 +187,7 @@ args.template_cuts = args.template_cuts or [] args.template_cuts.append(f"template_duration:{min(duration_bin_edges)}:lower") args.template_cuts.append(f"template_duration:{max(duration_bin_edges)}:upper_inc") -# Efficiency saving: add SNR cut before any others as sngl_ranking can -# only be less than SNR. args.trigger_cuts = args.trigger_cuts or [] -args.trigger_cuts.insert(0, f"snr:{args.fit_threshold}:lower_inc") - -# Cut triggers with sngl-ranking below threshold -args.trigger_cuts.append(f"{args.sngl_ranking}:{args.fit_threshold}:lower_inc") logging.info("Setting up the cut dictionaries") trigger_cut_dict, template_cut_dict = cuts.ingest_cuts_option_group(args) @@ -211,6 +215,9 @@ files = [f for f in os.listdir(date_directory) events = {} +rank_method = {ifo: stat.get_statistic_from_opts(args, [ifo]) for ifo in args.ifos} + +logging.info("Processing %d files", len(files)) for counter, filename in enumerate(files): if counter and counter % 1000 == 0: logging.info("Processed %d/%d files", counter, len(files)) @@ -284,10 +291,13 @@ for counter, filename in enumerate(files): for k in trigs_ifo.keys()} # Calculate the sngl_ranking values - sngls_value = ranking.get_sngls_ranking_from_trigs( - triggers_cut, args.sngl_ranking) + sds = rank_method[ifo].single(triggers_cut) + sngls_value = rank_method[ifo].rank_stat_single( + (ifo, sds), + **stat_kwargs + ) - triggers_cut[args.sngl_ranking] = sngls_value + triggers_cut['stat'] = sngls_value triggers_da = DictArray(data=triggers_cut) @@ -330,7 +340,7 @@ for ifo in events: for bin_num in range(n_bins): inbin = event_bins[ifo] == bin_num - binned_events = events[ifo].data[args.sngl_ranking][inbin] + binned_events = events[ifo].data['stat'][inbin] binned_event_times = events[ifo].data['end_time'][inbin] # Cluster triggers in time with the pruning window to ensure @@ -396,17 +406,32 @@ for ifo in events: alphas[ifo][bin_num] = -1 continue - counts[ifo][bin_num] = np.count_nonzero(inbin) + stat_inbin = events[ifo].data['stat'][inbin] + counts[ifo][bin_num] = \ + np.count_nonzero(stat_inbin > args.fit_threshold[ifo]) + alphas[ifo][bin_num], _ = trstats.fit_above_thresh( - args.fit_function, - events[ifo].data[args.sngl_ranking][inbin], - args.fit_threshold + args.fit_function[ifo], + stat_inbin, + args.fit_threshold[ifo] ) logging.info("Writing results") with h5py.File(args.output, 'w') as fout: - for ifo in events: + for ifo in args.ifos: fout_ifo = fout.create_group(ifo) + fout_ifo.attrs['fit_function'] = args.fit_function[ifo] + fout_ifo.attrs['fit_threshold'] = args.fit_threshold[ifo] + if ifo not in events: + # There were no triggers, but we should still produce some + # information + fout_ifo['fit_coeff'] = -1 * np.zeros(n_bins) + fout_ifo['counts'] = np.zeros(n_bins) + fout_ifo.attrs['live_time'] = live_time[ifo] + fout_ifo.attrs['pruned_times'] = [] + fout_ifo.attrs['n_pruned'] = 0 + continue + # Save the triggers we have used for the fits fout_ifo_trigs = fout_ifo.create_group('triggers') for key in events[ifo].data: @@ -427,8 +452,7 @@ with h5py.File(args.output, 'w') as fout: fout.attrs['analysis_date'] = args.analysis_date fout.attrs['input'] = sys.argv fout.attrs['cuts'] = args.template_cuts + args.trigger_cuts - fout.attrs['fit_function'] = args.fit_function - fout.attrs['fit_threshold'] = args.fit_threshold fout.attrs['sngl_ranking'] = args.sngl_ranking + fout.attrs['ranking_statistic'] = args.ranking_statistic logging.info("Done") diff --git a/examples/live/make_fit_coeffs.py b/examples/live/make_fit_coeffs.py new file mode 100644 index 00000000000..0894a9ca809 --- /dev/null +++ b/examples/live/make_fit_coeffs.py @@ -0,0 +1,27 @@ +""" +Makes files which can be used as the fit_coeffs statistic. +These are not of any scientific use, but the code will accept them +and run properly +""" + +import h5py +import numpy as np + +# Get number of templates from bank file +with h5py.File('template_bank.hdf', 'r') as bankf: + n_templates = bankf['mass1'].size + +for ifo in ['H1','L1','V1']: + with h5py.File(f'{ifo}-fit_coeffs.hdf','w') as fits_f: + fits_f.attrs['analysis_time'] = 430000 + fits_f.attrs['ifo'] = ifo + fits_f.attrs['stat'] = f'{ifo}-fit_coeffs' + fits_f.attrs['stat_threshold'] = 5 + + fits_f['count_above_thresh'] = np.ones(n_templates) * 100 + fits_f['count_in_template'] = np.ones(n_templates) * 20000 + fits_f['fit_coeff'] = np.ones(n_templates) * 5.5 + fits_f['median_sigma'] = np.ones(n_templates) * 5800 + fits_f['template_id'] = np.arange(n_templates) + + diff --git a/examples/live/make_singles_fits_file.py b/examples/live/make_singles_significance_fits.py similarity index 97% rename from examples/live/make_singles_fits_file.py rename to examples/live/make_singles_significance_fits.py index 770111dc458..192063fe075 100644 --- a/examples/live/make_singles_fits_file.py +++ b/examples/live/make_singles_significance_fits.py @@ -6,7 +6,7 @@ import h5py import numpy as np -f = h5py.File('single_trigger_fits.hdf','w') +f = h5py.File('single_significance_fits.hdf','w') # Some numbers to design the output # These are loosely based on the O3a trigger fits file diff --git a/examples/live/run.sh b/examples/live/run.sh index 3c523d6bf00..434f134d5ad 100755 --- a/examples/live/run.sh +++ b/examples/live/run.sh @@ -42,16 +42,24 @@ else echo -e "\\n\\n>> [`date`] Pre-existing template bank found" fi - # test if there is a single fits file. If not, make a representative one -if [[ ! -f single_trigger_fits.hdf ]] +if [[ ! -f single_significance_fits.hdf ]] then - echo -e "\\n\\n>> [`date`] Making single fits file" - python make_singles_fits_file.py + echo -e "\\n\\n>> [`date`] Making single significance fits file" + python make_singles_significance_fits.py else - echo -e "\\n\\n>> [`date`] Pre-existing single fits file found" + echo -e "\\n\\n>> [`date`] Pre-existing single significance fits file found" fi +# test if there are fit_coeffs files for each detector. +# If not, make some representative ones +if [[ `ls -1 H1-fit_coeffs.hdf L1-fit_coeffs.hdf V1-fit_coeffs.hdf 2> /dev/null | wc -l` -lt 3 ]] +then + echo -e "\\n\\n>> [`date`] Making fit coefficient files" + python make_fit_coeffs.py +else + echo -e "\\n\\n>> [`date`] All needed fit coeffs files found" +fi # test if there is a injection file. # If not, make one and delete any existing strain @@ -178,8 +186,14 @@ python -m mpi4py `which pycbc_live` \ --output-path output \ --day-hour-output-prefix \ --sngl-ranking newsnr_sgveto_psdvar_threshold \ ---ranking-statistic phasetd \ ---statistic-files statHL.hdf statHV.hdf statLV.hdf \ +--ranking-statistic phasetd_exp_fit_fgbg_norm \ +--statistic-files \ + statHL.hdf \ + statHV.hdf \ + statLV.hdf \ + H1-fit_coeffs.hdf \ + L1-fit_coeffs.hdf \ + V1-fit_coeffs.hdf \ --sgchisq-snr-threshold 4 \ --sgchisq-locations "mtotal>40:20-30,20-45,20-60,20-75,20-90,20-105,20-120" \ --enable-background-estimation \ @@ -201,10 +215,11 @@ python -m mpi4py `which pycbc_live` \ --snr-opt-di-popsize 100 \ --snr-opt-include-candidate " \ --sngl-ifar-est-dist conservative \ ---single-newsnr-threshold 9 \ +--single-ranking-threshold 9 \ --single-duration-threshold 7 \ --single-reduced-chisq-threshold 2 \ ---single-fit-file single_trigger_fits.hdf \ +--single-fit-file single_significance_fits.hdf \ +--single-maximum-ifar 100 \ --psd-variation \ --verbose diff --git a/pycbc/events/single.py b/pycbc/events/single.py index 6525602ea02..2df8fb8f4e6 100644 --- a/pycbc/events/single.py +++ b/pycbc/events/single.py @@ -1,9 +1,11 @@ """ utilities for assigning FAR to single detector triggers """ import logging +import copy import h5py import numpy as np -from pycbc.events import ranking, trigger_fits as fits + +from pycbc.events import trigger_fits as fits, stat from pycbc.types import MultiDetOptionAction from pycbc import conversions as conv from pycbc import bin_utils @@ -13,35 +15,51 @@ class LiveSingle(object): def __init__(self, ifo, - newsnr_threshold=10.0, + ranking_threshold=10.0, reduced_chisq_threshold=5, duration_threshold=0, fit_file=None, sngl_ifar_est_dist=None, - fixed_ifar=None): + fixed_ifar=None, + maximum_ifar=None, + statistic=None, + sngl_ranking=None, + stat_files=None, + **kwargs): self.ifo = ifo self.fit_file = fit_file self.sngl_ifar_est_dist = sngl_ifar_est_dist self.fixed_ifar = fixed_ifar + self.maximum_ifar = maximum_ifar + + stat_class = stat.get_statistic(statistic) + self.stat_calculator = stat_class( + sngl_ranking, + stat_files, + ifos=[ifo], + **kwargs + ) self.thresholds = { - "newsnr": newsnr_threshold, + "ranking": ranking_threshold, "reduced_chisq": reduced_chisq_threshold, "duration": duration_threshold} @staticmethod def insert_args(parser): - parser.add_argument('--single-newsnr-threshold', nargs='+', + parser.add_argument('--single-ranking-threshold', nargs='+', type=float, action=MultiDetOptionAction, - help='Newsnr min threshold for single triggers. ' - 'Can be given as a single value or as ' - 'detector-value pairs, e.g. H1:6 L1:7 V1:6.5') + help='Single ranking threshold for ' + 'single-detector events. Can be given ' + 'as a single value or as detector-value ' + 'pairs, e.g. H1:6 L1:7 V1:6.5') parser.add_argument('--single-reduced-chisq-threshold', nargs='+', type=float, action=MultiDetOptionAction, help='Maximum reduced chi-squared threshold for ' - 'single triggers. Can be given as a single ' - 'value or as detector-value pairs, e.g. ' - 'H1:2 L1:2 V1:3') + 'single triggers. Calcuated after any PSD ' + 'variation reweighting is applied. Can be ' + 'given as a single value or as ' + 'detector-value pairs, e.g. H1:2 L1:2 V1:3') parser.add_argument('--single-duration-threshold', nargs='+', type=float, action=MultiDetOptionAction, help='Minimum duration threshold for single ' @@ -54,6 +72,12 @@ def insert_args(parser): 'defined by command line. Can be given as ' 'a single value or as detector-value pairs, ' 'e.g. H1:0.001 L1:0.001 V1:0.0005') + parser.add_argument('--single-maximum-ifar', nargs='+', + type=float, action=MultiDetOptionAction, + help='A maximum possible value for IFAR for ' + 'single-detector events. Can be given as ' + 'a single value or as detector-value pairs, ' + 'e.g. H1:100 L1:1000 V1:50') parser.add_argument('--single-fit-file', help='File which contains definitons of fit ' 'coefficients and counts for specific ' @@ -70,11 +94,12 @@ def insert_args(parser): def verify_args(args, parser, ifos): sngl_opts = [args.single_reduced_chisq_threshold, args.single_duration_threshold, - args.single_newsnr_threshold, + args.single_ranking_threshold, args.sngl_ifar_est_dist] + sngl_opts_str = ("--single-reduced-chisq-threshold, " "--single-duration-threshold, " - "--single-newsnr-threshold, " + "--single-ranking-threshold, " "--sngl-ifar-est-dist") if any(sngl_opts) and not all(sngl_opts): @@ -87,12 +112,14 @@ def verify_args(args, parser, ifos): "--enable-gracedb-upload to be set.") sngl_optional_opts = [args.single_fixed_ifar, - args.single_fit_file] + args.single_fit_file, + args.single_maximum_ifar] sngl_optional_opts_str = ("--single-fixed-ifar, " - "--single-fit-file") + "--single-fit-file," + "--single-maximum-ifar") if any(sngl_optional_opts) and not all(sngl_opts): parser.error("Optional singles options " - f"({sngl_optional_opts_str}) given but no " + f"({sngl_optional_opts_str}) given but not all " f"required options ({sngl_opts_str}) are.") for ifo in ifos: @@ -141,13 +168,27 @@ def verify_args(args, parser, ifos): @classmethod def from_cli(cls, args, ifo): + # Allow None inputs + stat_files = args.statistic_files or [] + stat_keywords = args.statistic_keywords or [] + + # flatten the list of lists of filenames to a single list + # (may be empty) + stat_files = sum(stat_files, []) + + kwargs = stat.parse_statistic_keywords_opt(stat_keywords) return cls( - ifo, newsnr_threshold=args.single_newsnr_threshold[ifo], + ifo, ranking_threshold=args.single_ranking_threshold[ifo], reduced_chisq_threshold=args.single_reduced_chisq_threshold[ifo], duration_threshold=args.single_duration_threshold[ifo], fixed_ifar=args.single_fixed_ifar, + maximum_ifar=args.single_maximum_ifar[ifo], fit_file=args.single_fit_file, - sngl_ifar_est_dist=args.sngl_ifar_est_dist[ifo] + sngl_ifar_est_dist=args.sngl_ifar_est_dist[ifo], + statistic=args.ranking_statistic, + sngl_ranking=args.sngl_ranking, + stat_files=stat_files, + **kwargs ) def check(self, trigs, data_reader): @@ -156,37 +197,56 @@ def check(self, trigs, data_reader): """ # Apply cuts to trigs before clustering - # Cut on snr so that triggers which could not reach newsnr - # threshold do not have newsnr calculated + # Cut on snr so that triggers which could not reach the ranking + # threshold do not have ranking calculated + if 'psd_var_val' in trigs: + # We should apply the PSD variation rescaling, as this can + # re-weight the SNR to be above SNR + trig_chisq = trigs['chisq'] / trigs['psd_var_val'] + trig_snr = trigs['snr'] / (trigs['psd_var_val'] ** 0.5) + else: + trig_chisq = trigs['chisq'] + trig_snr = trigs['snr'] + valid_idx = (trigs['template_duration'] > self.thresholds['duration']) & \ - (trigs['chisq'] < + (trig_chisq < self.thresholds['reduced_chisq']) & \ - (trigs['snr'] > - self.thresholds['newsnr']) + (trig_snr > + self.thresholds['ranking']) if not np.any(valid_idx): return None - cutdurchi_trigs = {k: trigs[k][valid_idx] for k in trigs} + cut_trigs = {k: trigs[k][valid_idx] for k in trigs} - # This uses the pycbc live convention of chisq always meaning the - # reduced chisq. - nsnr_all = ranking.newsnr(cutdurchi_trigs['snr'], - cutdurchi_trigs['chisq']) - nsnr_idx = nsnr_all > self.thresholds['newsnr'] - if not np.any(nsnr_idx): + # Convert back from the pycbc live convention of chisq always + # meaning the reduced chisq. + trigsc = copy.copy(cut_trigs) + trigsc['ifo'] = self.ifo + trigsc['chisq'] = cut_trigs['chisq'] * cut_trigs['chisq_dof'] + trigsc['chisq_dof'] = (cut_trigs['chisq_dof'] + 2) / 2 + + # Calculate the ranking reweighted SNR for cutting + single_rank = self.stat_calculator.get_sngl_ranking(trigsc) + sngl_idx = single_rank > self.thresholds['ranking'] + if not np.any(sngl_idx): return None - cutall_trigs = {k: cutdurchi_trigs[k][nsnr_idx] + cutall_trigs = {k: trigsc[k][sngl_idx] for k in trigs} - # 'cluster' by taking the maximal newsnr value over the trigger set - i = nsnr_all[nsnr_idx].argmax() + # Calculate the ranking statistic + sngl_stat = self.stat_calculator.single(cutall_trigs) + rank = self.stat_calculator.rank_stat_single((self.ifo, sngl_stat)) + + # 'cluster' by taking the maximal statistic value over the trigger set + i = rank.argmax() # calculate the (inverse) false-alarm rate - nsnr = nsnr_all[nsnr_idx][i] - dur = cutall_trigs['template_duration'][i] - ifar = self.calculate_ifar(nsnr, dur) + ifar = self.calculate_ifar( + rank[i], + trigsc['template_duration'][i] + ) if ifar is None: return None @@ -194,7 +254,7 @@ def check(self, trigs, data_reader): candidate = { f'foreground/{self.ifo}/{k}': cutall_trigs[k][i] for k in trigs } - candidate['foreground/stat'] = nsnr + candidate['foreground/stat'] = rank[i] candidate['foreground/ifar'] = ifar candidate['HWINJ'] = data_reader.near_hwinj() return candidate @@ -232,10 +292,14 @@ def calculate_ifar(self, sngl_ranking, duration): ) return None - rate_louder = rate * fits.cum_fit('exponential', [sngl_ranking], - coeff, thresh)[0] + rate_louder = rate * fits.cum_fit( + 'exponential', + [sngl_ranking], + coeff, + thresh + )[0] # apply a trials factor of the number of duration bins rate_louder *= len(rates) - return conv.sec_to_year(1. / rate_louder) + return min(conv.sec_to_year(1. / rate_louder), self.maximum_ifar) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 5876bd81e9b..fb4bc8eee9a 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -682,7 +682,7 @@ def rank_stat_single(self, single_info, numpy.ndarray The array of single detector statistics """ - return single_info[1] + return single_info[1]['snglstat'] def rank_stat_coinc(self, sngls_list, slide, step, to_shift, **kwargs): # pylint:disable=unused-argument @@ -711,8 +711,8 @@ def coinc_lim_for_thresh(self, sngls_list, thresh, limifo, if not self.has_hist: self.get_hist() - lim_stat = [b['snglstat'] for a, b in sngls_list if a == limifo][0] - s1 = thresh ** 2. - lim_stat ** 2. + fixed_statsq = sum([b['snglstat'] ** 2 for a, b in sngls_list]) + s1 = thresh ** 2. - fixed_statsq # Assume best case scenario and use maximum signal rate s1 -= 2. * self.hist_max s1[s1 < 0] = 0 @@ -1015,6 +1015,7 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): # for low-mass templates the exponential slope alpha \approx 6 self.alpharef = 6. self.single_increasing = True + self.single_dtype = numpy.float32 def use_alphamax(self): """ @@ -1065,9 +1066,9 @@ def rank_stat_single(self, single_info, The array of single detector statistics """ if self.single_increasing: - sngl_multiifo = single_info[1]['snglstat'] + sngl_multiifo = single_info[1] else: - sngl_multiifo = -1. * single_info[1]['snglstat'] + sngl_multiifo = -1. * single_info[1] return sngl_multiifo def rank_stat_coinc(self, s, slide, step, to_shift, @@ -2435,7 +2436,10 @@ def get_statistic_from_opts(opts, ifos): opts.statistic_keywords = [] # flatten the list of lists of filenames to a single list (may be empty) - opts.statistic_files = sum(opts.statistic_files, []) + # if needed (e.g. not calling get_statistic_from_opts in a loop) + if len(opts.statistic_files) > 0 and \ + isinstance(opts.statistic_files[0], list): + opts.statistic_files = sum(opts.statistic_files, []) extra_kwargs = parse_statistic_keywords_opt(opts.statistic_keywords)