From 85a9877bc732c6a8e579cfb2395640ef41706c39 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Tue, 26 Nov 2024 06:14:17 -0800 Subject: [PATCH 1/4] Some efficiency savings for pycbc_fit_sngls_over_multiparam --- .../pycbc_fit_sngls_over_multiparam | 63 +++++++++++-------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/bin/all_sky_search/pycbc_fit_sngls_over_multiparam b/bin/all_sky_search/pycbc_fit_sngls_over_multiparam index 45b46fa32e3..5183f8e8334 100755 --- a/bin/all_sky_search/pycbc_fit_sngls_over_multiparam +++ b/bin/all_sky_search/pycbc_fit_sngls_over_multiparam @@ -59,6 +59,7 @@ def smooth_templates(nabove, invalphan, ntotal, template_idx, ------------------- weights: ndarray Weighting factor to apply to the templates specified by template_idx + If None, then numpy.average will revert to numpy.mean Returns ------- @@ -68,7 +69,6 @@ def smooth_templates(nabove, invalphan, ntotal, template_idx, Third float: the smoothed total count in template value """ - if weights is None: weights = numpy.ones_like(template_idx) nabove_t_smoothed = numpy.average(nabove[template_idx], weights=weights) ntotal_t_smoothed = numpy.average(ntotal[template_idx], weights=weights) invalphan_mean = numpy.average(invalphan[template_idx], weights=weights) @@ -119,7 +119,7 @@ def smooth_distance_weighted(nabove, invalphan, ntotal, dists): Smooth templates weighted according to dists in a unit-width normal distribution, truncated at three sigma """ - idx_within_area = numpy.flatnonzero(dists < 3.) + idx_within_area = dists < 3. weights = norm.pdf(dists[idx_within_area]) return smooth_templates(nabove, invalphan, ntotal, idx_within_area, weights=weights) @@ -404,10 +404,12 @@ for param, slog in zip(args.fit_param, args.log_param): else: raise ValueError("invalid log param argument, use 'true', or 'false'") -nabove_smoothed = [] -alpha_smoothed = [] -ntotal_smoothed = [] -rang = numpy.arange(0, len(nabove)) +n_templates = len(nabove) +rang = numpy.arange(0, n_templates) + +nabove_smoothed = numpy.zeros_like(parvals[0]) +alpha_smoothed = numpy.zeros_like(parvals[0]) +ntotal_smoothed = numpy.zeros_like(parvals[0]) # Handle the one-dimensional case of tophat smoothing separately # as it is easier to optimize computational performance. @@ -459,38 +461,49 @@ elif numpy.isfinite(_smooth_cut[args.smoothing_method]): # Sort the values to be smoothed by parameter value logging.info("Smoothing ...") slices = [slice(l,r) for l, r in zip(lefts, rights)] + nabove_sort = nabove[par_sort] + invalphan_sort = invalphan[par_sort] + ntotal_sort = ntotal[par_sort] for i in rang: - report_percentage(i, rang.max()) + report_percentage(i, n_templates) slc = slices[i] d = dist(i, slc, parvals, args.smoothing_width) - smoothed_tuple = smooth(nabove[par_sort][slc], - invalphan[par_sort][slc], - ntotal[par_sort][slc], - d, - args.smoothing_method, - **kwarg_dict) - nabove_smoothed.append(smoothed_tuple[0]) - alpha_smoothed.append(smoothed_tuple[1]) - ntotal_smoothed.append(smoothed_tuple[2]) + smoothed_tuple = smooth( + nabove_sort[slc], + invalphan_sort[slc], + ntotal_sort[slc], + d, + args.smoothing_method, + **kwarg_dict + ) + nabove_smoothed[i] = smoothed_tuple[0] + alpha_smoothed[i] = smoothed_tuple[1] + ntotal_smoothed[i] = smoothed_tuple[2] # Undo the sorts unsort = numpy.argsort(par_sort) parvals = [p[unsort] for p in parvals] - nabove_smoothed = numpy.array(nabove_smoothed)[unsort] - alpha_smoothed = numpy.array(alpha_smoothed)[unsort] - ntotal_smoothed = numpy.array(ntotal_smoothed)[unsort] + nabove_smoothed = nabove_smoothed[unsort] + alpha_smoothed = alpha_smoothed[unsort] + ntotal_smoothed = ntotal_smoothed[unsort] else: logging.info("Smoothing ...") for i in rang: - report_percentage(i, rang.max()) + report_percentage(i, n_templates) d = dist(i, rang, parvals, args.smoothing_width) - smoothed_tuple = smooth(nabove, invalphan, ntotal, d, - args.smoothing_method, **kwarg_dict) - nabove_smoothed.append(smoothed_tuple[0]) - alpha_smoothed.append(smoothed_tuple[1]) - ntotal_smoothed.append(smoothed_tuple[2]) + smoothed_tuple = smooth( + nabove, + invalphan, + ntotal, + d, + args.smoothing_method, + **kwarg_dict + ) + nabove_smoothed[i] = smoothed_tuple[0] + alpha_smoothed[i] = smoothed_tuple[1] + ntotal_smoothed[i] = smoothed_tuple[2] logging.info("Writing output") outfile = HFile(args.output, 'w') From dc7eab74849d7b5442caf26e08d77752c464a2c8 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Wed, 11 Dec 2024 03:46:48 -0800 Subject: [PATCH 2/4] TD review comments --- .../pycbc_fit_sngls_over_multiparam | 50 ++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/bin/all_sky_search/pycbc_fit_sngls_over_multiparam b/bin/all_sky_search/pycbc_fit_sngls_over_multiparam index 5183f8e8334..1c507c070dc 100755 --- a/bin/all_sky_search/pycbc_fit_sngls_over_multiparam +++ b/bin/all_sky_search/pycbc_fit_sngls_over_multiparam @@ -255,7 +255,7 @@ init_logging(args.verbose) analysis_time = 0 attr_dict = {} -# These end up as n_files * n_templates arrays +# These end up as n_files * num_templates arrays tid = numpy.array([], dtype=int) nabove = numpy.array([], dtype=int) ntotal = numpy.array([], dtype=int) @@ -323,7 +323,7 @@ invalphan = invalpha * nabove analysis_time /= len(args.template_fit_file) if len(args.template_fit_file) > 1: - # From the n_templates * n_files arrays, average within each template. + # From the num_templates * n_files arrays, average within each template. # To do this, we average the n_files occurrences which have the same tid # The linearity of the average means that we can do this in two steps @@ -404,12 +404,15 @@ for param, slog in zip(args.fit_param, args.log_param): else: raise ValueError("invalid log param argument, use 'true', or 'false'") -n_templates = len(nabove) -rang = numpy.arange(0, n_templates) +rang = numpy.arange(0, num_templates) -nabove_smoothed = numpy.zeros_like(parvals[0]) -alpha_smoothed = numpy.zeros_like(parvals[0]) -ntotal_smoothed = numpy.zeros_like(parvals[0]) +# Preallocate memory for smoothing results +# smoothed_vals is an array containing the smoothed important values +# regarding the template fits: +# smoothed_vals[:,0] is the number of triggers above the fit threshold +# smoothed_vals[:,1] is the fit coefficient (alpha) +# smoothed_vals[:,2] is the total number of trigger in the template +smoothed_vals = numpy.zeros((num_templates, 3)) # Handle the one-dimensional case of tophat smoothing separately # as it is easier to optimize computational performance. @@ -432,10 +435,10 @@ if len(parvals) == 1 and args.smoothing_method == 'smooth_tophat': num = right - left logging.info("Smoothing ...") - nabove_smoothed = (nasum[right] - nasum[left]) / num + smoothed_vals[:,0] = (nasum[right] - nasum[left]) / num invmean = (invsum[right] - invsum[left]) / num - alpha_smoothed = nabove_smoothed / invmean - ntotal_smoothed = (ntsum[right] - ntsum[left]) / num + smoothed_vals[:,1] = smoothed_vals[:,0] / invmean + smoothed_vals[:,2] = (ntsum[right] - ntsum[left]) / num elif numpy.isfinite(_smooth_cut[args.smoothing_method]): c = _smooth_cut[args.smoothing_method] @@ -455,7 +458,7 @@ elif numpy.isfinite(_smooth_cut[args.smoothing_method]): parvals[sort_dim] - cut_lengths[sort_dim]) rights = numpy.searchsorted(parvals[sort_dim], parvals[sort_dim] + cut_lengths[sort_dim]) - n_removed = len(parvals[0]) - rights + lefts + n_removed = num_templates - rights + lefts logging.info("Cutting between %d and %d templates for each smoothing", n_removed.min(), n_removed.max()) # Sort the values to be smoothed by parameter value @@ -464,8 +467,12 @@ elif numpy.isfinite(_smooth_cut[args.smoothing_method]): nabove_sort = nabove[par_sort] invalphan_sort = invalphan[par_sort] ntotal_sort = ntotal[par_sort] + # Preallocate memory for *param_vals[0]-sorted* smoothing results + nabove_smoothed = numpy.zeros(num_templates) + alpha_smoothed = numpy.zeros(num_templates) + ntotal_smoothed = numpy.zeros(num_templates) for i in rang: - report_percentage(i, n_templates) + report_percentage(i, num_templates) slc = slices[i] d = dist(i, slc, parvals, args.smoothing_width) @@ -484,16 +491,16 @@ elif numpy.isfinite(_smooth_cut[args.smoothing_method]): # Undo the sorts unsort = numpy.argsort(par_sort) parvals = [p[unsort] for p in parvals] - nabove_smoothed = nabove_smoothed[unsort] - alpha_smoothed = alpha_smoothed[unsort] - ntotal_smoothed = ntotal_smoothed[unsort] + smoothed_vals[:,0] = nabove_smoothed[unsort] + smoothed_vals[:,1] = alpha_smoothed[unsort] + smoothed_vals[:,2] = ntotal_smoothed[unsort] else: logging.info("Smoothing ...") for i in rang: - report_percentage(i, n_templates) + report_percentage(i, num_templates) d = dist(i, rang, parvals, args.smoothing_width) - smoothed_tuple = smooth( + smoothed_vals[i,:] = smooth( nabove, invalphan, ntotal, @@ -501,16 +508,13 @@ else: args.smoothing_method, **kwarg_dict ) - nabove_smoothed[i] = smoothed_tuple[0] - alpha_smoothed[i] = smoothed_tuple[1] - ntotal_smoothed[i] = smoothed_tuple[2] logging.info("Writing output") outfile = HFile(args.output, 'w') outfile['template_id'] = tid -outfile['count_above_thresh'] = nabove_smoothed -outfile['fit_coeff'] = alpha_smoothed -outfile['count_in_template'] = ntotal_smoothed +outfile['count_above_thresh'] = smoothed_vals[:,0] +outfile['fit_coeff'] = smoothed_vals[:,1] +outfile['count_in_template'] = smoothed_vals[:,2] if median_sigma is not None: outfile['median_sigma'] = median_sigma From c8da9ef9c41aa51fc2d9e961f3ad48acda323c5a Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Thu, 12 Dec 2024 01:34:10 -0800 Subject: [PATCH 3/4] remove triplicate allocation --- bin/all_sky_search/pycbc_fit_sngls_over_multiparam | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/bin/all_sky_search/pycbc_fit_sngls_over_multiparam b/bin/all_sky_search/pycbc_fit_sngls_over_multiparam index 1c507c070dc..ccd9805ebb7 100755 --- a/bin/all_sky_search/pycbc_fit_sngls_over_multiparam +++ b/bin/all_sky_search/pycbc_fit_sngls_over_multiparam @@ -467,16 +467,12 @@ elif numpy.isfinite(_smooth_cut[args.smoothing_method]): nabove_sort = nabove[par_sort] invalphan_sort = invalphan[par_sort] ntotal_sort = ntotal[par_sort] - # Preallocate memory for *param_vals[0]-sorted* smoothing results - nabove_smoothed = numpy.zeros(num_templates) - alpha_smoothed = numpy.zeros(num_templates) - ntotal_smoothed = numpy.zeros(num_templates) for i in rang: report_percentage(i, num_templates) slc = slices[i] d = dist(i, slc, parvals, args.smoothing_width) - smoothed_tuple = smooth( + smoothed_vals[i,:] = smooth( nabove_sort[slc], invalphan_sort[slc], ntotal_sort[slc], @@ -484,16 +480,11 @@ elif numpy.isfinite(_smooth_cut[args.smoothing_method]): args.smoothing_method, **kwarg_dict ) - nabove_smoothed[i] = smoothed_tuple[0] - alpha_smoothed[i] = smoothed_tuple[1] - ntotal_smoothed[i] = smoothed_tuple[2] # Undo the sorts unsort = numpy.argsort(par_sort) parvals = [p[unsort] for p in parvals] - smoothed_vals[:,0] = nabove_smoothed[unsort] - smoothed_vals[:,1] = alpha_smoothed[unsort] - smoothed_vals[:,2] = ntotal_smoothed[unsort] + smoothed_vals = smoothed_vals[unsort,:] else: logging.info("Smoothing ...") From 9e41dabcb2ca1d90f29c2e6f8a252c6f38f96d16 Mon Sep 17 00:00:00 2001 From: Thomas Dent Date: Thu, 12 Dec 2024 13:47:55 +0100 Subject: [PATCH 4/4] Minor pep8 / comment / ordering tweaks --- .../pycbc_fit_sngls_over_multiparam | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/bin/all_sky_search/pycbc_fit_sngls_over_multiparam b/bin/all_sky_search/pycbc_fit_sngls_over_multiparam index ccd9805ebb7..60adab5deeb 100755 --- a/bin/all_sky_search/pycbc_fit_sngls_over_multiparam +++ b/bin/all_sky_search/pycbc_fit_sngls_over_multiparam @@ -90,7 +90,6 @@ def smooth_tophat(nabove, invalphan, ntotal, dists): ntotal, idx_within_area) - # This is the default number of triggers required for n_closest smoothing _default_total_trigs = 500 @@ -172,6 +171,7 @@ def report_percentage(i, length): if not pc % 10 and pc_last % 10: logging.info(f"Template {i} out of {length} ({pc:.0f}%)") + parser = argparse.ArgumentParser(usage="", description="Smooth (regress) the dependence of coefficients describing " "single-ifo background trigger distributions on a template " @@ -407,11 +407,10 @@ for param, slog in zip(args.fit_param, args.log_param): rang = numpy.arange(0, num_templates) # Preallocate memory for smoothing results -# smoothed_vals is an array containing the smoothed important values -# regarding the template fits: +# smoothed_vals is an array containing smoothed template fit values : # smoothed_vals[:,0] is the number of triggers above the fit threshold -# smoothed_vals[:,1] is the fit coefficient (alpha) -# smoothed_vals[:,2] is the total number of trigger in the template +# smoothed_vals[:,1] is the fit coefficient 'alpha' +# smoothed_vals[:,2] is the total number of triggers in the template smoothed_vals = numpy.zeros((num_templates, 3)) # Handle the one-dimensional case of tophat smoothing separately @@ -437,7 +436,7 @@ if len(parvals) == 1 and args.smoothing_method == 'smooth_tophat': logging.info("Smoothing ...") smoothed_vals[:,0] = (nasum[right] - nasum[left]) / num invmean = (invsum[right] - invsum[left]) / num - smoothed_vals[:,1] = smoothed_vals[:,0] / invmean + smoothed_vals[:,1] = smoothed_vals[:, 0] / invmean smoothed_vals[:,2] = (ntsum[right] - ntsum[left]) / num elif numpy.isfinite(_smooth_cut[args.smoothing_method]): @@ -461,12 +460,13 @@ elif numpy.isfinite(_smooth_cut[args.smoothing_method]): n_removed = num_templates - rights + lefts logging.info("Cutting between %d and %d templates for each smoothing", n_removed.min(), n_removed.max()) + # Sort the values to be smoothed by parameter value logging.info("Smoothing ...") - slices = [slice(l,r) for l, r in zip(lefts, rights)] nabove_sort = nabove[par_sort] invalphan_sort = invalphan[par_sort] ntotal_sort = ntotal[par_sort] + slices = [slice(l, r) for l, r in zip(lefts, rights)] for i in rang: report_percentage(i, num_templates) slc = slices[i] @@ -484,14 +484,14 @@ elif numpy.isfinite(_smooth_cut[args.smoothing_method]): # Undo the sorts unsort = numpy.argsort(par_sort) parvals = [p[unsort] for p in parvals] - smoothed_vals = smoothed_vals[unsort,:] + smoothed_vals = smoothed_vals[unsort, :] else: logging.info("Smoothing ...") for i in rang: report_percentage(i, num_templates) d = dist(i, rang, parvals, args.smoothing_width) - smoothed_vals[i,:] = smooth( + smoothed_vals[i, :] = smooth( nabove, invalphan, ntotal, @@ -503,9 +503,9 @@ else: logging.info("Writing output") outfile = HFile(args.output, 'w') outfile['template_id'] = tid -outfile['count_above_thresh'] = smoothed_vals[:,0] -outfile['fit_coeff'] = smoothed_vals[:,1] -outfile['count_in_template'] = smoothed_vals[:,2] +outfile['count_above_thresh'] = smoothed_vals[:, 0] +outfile['fit_coeff'] = smoothed_vals[:, 1] +outfile['count_in_template'] = smoothed_vals[:, 2] if median_sigma is not None: outfile['median_sigma'] = median_sigma