diff --git a/pycbc/events/ranking.py b/pycbc/events/ranking.py index 0368bb05371..0d03582b12a 100644 --- a/pycbc/events/ranking.py +++ b/pycbc/events/ranking.py @@ -26,7 +26,7 @@ def newsnr(snr, reduced_x2, q=6., n=2.): reduced chi-squared values. See http://arxiv.org/abs/1208.3491 for definition. Previous implementation in glue/ligolw/lsctables.py """ - nsnr = numpy.array(snr, ndmin=1, dtype=numpy.float64) + nsnr = numpy.array(numpy.abs(snr), ndmin=1, dtype=numpy.float64) reduced_x2 = numpy.array(reduced_x2, ndmin=1, dtype=numpy.float64) # newsnr is only different from snr if reduced chisq > 1 diff --git a/pycbc/psd/__init__.py b/pycbc/psd/__init__.py index 99ef4bdb9a6..66bf710cf2d 100644 --- a/pycbc/psd/__init__.py +++ b/pycbc/psd/__init__.py @@ -15,6 +15,9 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import copy +import numpy +import logging +from pycbc.io import HFile from ligo import segments from pycbc.psd.read import * from pycbc.psd.analytical import * @@ -27,6 +30,9 @@ from pycbc.types import copy_opts_for_single_ifo from pycbc.types import required_opts, required_opts_multi_ifo from pycbc.types import ensure_one_opt, ensure_one_opt_multi_ifo +from pycbc import DYN_RANGE_FAC + +logger = logging.getLogger('pycbc.psd') def from_cli(opt, length, delta_f, low_frequency_cutoff, strain=None, dyn_range_factor=1, precision=None): @@ -241,8 +247,14 @@ def insert_psd_option_group(parser, output=True, include_data_options=True): psd_options.add_argument("--psdvar-high-freq", type=float, metavar="HERTZ", help="Maximum frequency to consider in strain " "bandpass.") - - if include_data_options : + psd_options.add_argument("--precomputed-psd-file", metavar='FILE', type=str, + help='Given HDF5 MERGE PSD file as the option, ' + 'appropriate PSD for each strain segment will be ' + 'chosen and use which is precomputed. PSD from the ' + 'strain data wil not be computed when this option ' + 'is provided.') + + if include_data_options: psd_options.add_argument("--psd-estimation", help="Measure PSD from the data, using " "given average method.", @@ -266,7 +278,10 @@ def insert_psd_option_group(parser, output=True, include_data_options=True): if output: psd_options.add_argument("--psd-output", help="(Optional) Write PSD to specified file") - + psd_options.add_argument("--overlapping-psd-output", + help="(Optional) Write PSD to specified file in the format " + "of calculate_psd output. --output-psd only saves psd of " + "only one segment") return psd_options def insert_psd_option_group_multi_ifo(parser): @@ -499,6 +514,32 @@ def generate_overlapping_psds(opt, gwstrain, flen, delta_f, flow, psd = from_cli(opt, flen, delta_f, flow, strain=strain_part, dyn_range_factor=dyn_range_factor, precision=precision) psds_and_times.append( (start_idx, end_idx, psd) ) + + if hasattr(opt, 'overlapping_psd_output') and opt.overlapping_psd_output: + logging.info(f"Saving overlapping PSDs for segments to " + f"{opt.overlapping_psd_output}") + f = HFile(opt.overlapping_psd_output, 'w') + ifo = opt.channel_name[0:2] + psd_group = f.create_group(ifo + '/psds') + start, end = [], [] + for inc, pnt in enumerate(psds_and_times): + start_idx, end_idx, psd = pnt + key = str(inc) + start_time = gwstrain.start_time + start_idx/gwstrain.sample_rate + end_time = gwstrain.start_time + end_idx/gwstrain.sample_rate + start.append(int(start_time)) + end.append(int(end_time)) + psd_group.create_dataset(key, data=psd.numpy(), compression='gzip', + compression_opts=9, shuffle=True) + psd_group[key].attrs['epoch'] = int(start_time) + psd_group[key].attrs['delta_f'] = psd.delta_f + + f[ifo + '/start_time'] = numpy.array(start, dtype=numpy.uint32) + f[ifo + '/end_time'] = numpy.array(end, dtype=numpy.uint32) + f.attrs['low_frequency_cutoff'] = opt.low_frequency_cutoff + f.attrs['dynamic_range_factor'] = DYN_RANGE_FAC + f.close() + return psds_and_times def associate_psds_to_segments(opt, fd_segments, gwstrain, flen, delta_f, flow, @@ -534,26 +575,45 @@ def associate_psds_to_segments(opt, fd_segments, gwstrain, flen, delta_f, flow, that precision. If 'double' the PSD will be converted to float64, if not already in that precision. """ - psds_and_times = generate_overlapping_psds(opt, gwstrain, flen, delta_f, - flow, dyn_range_factor=dyn_range_factor, - precision=precision) - - for fd_segment in fd_segments: - best_psd = None - psd_overlap = 0 - inp_seg = segments.segment(fd_segment.seg_slice.start, - fd_segment.seg_slice.stop) - for start_idx, end_idx, psd in psds_and_times: - psd_seg = segments.segment(start_idx, end_idx) - if psd_seg.intersects(inp_seg): - curr_overlap = abs(inp_seg & psd_seg) - if curr_overlap > psd_overlap: - psd_overlap = curr_overlap - best_psd = psd - if best_psd is None: - err_msg = "No PSDs found intersecting segment!" - raise ValueError(err_msg) - fd_segment.psd = best_psd + if opt.precomputed_psd_file: + logging.info(f"Reading Precomuted PSDs from {opt.precomputed_psd_file}") + tpsd = PrecomputedTimeVaryingPSD(opt, + length=len(fd_segments[0].data), + delta_f=fd_segments[0].delta_f, + sample_rate=fd_segments[0].sample_rate) + + for fd_segment in fd_segments: + inp_seg = segments.segment(fd_segment.start_time, + fd_segment.end_time) + best_psd = tpsd.assosiate_psd_to_inspiral_segment(inp_seg, + delta_f=fd_segment.delta_f) + if best_psd is None: + err_msg = "No PSDs found intersecting segment!" + raise ValueError(err_msg) + fd_segment.psd = best_psd + + else: + psds_and_times = generate_overlapping_psds(opt, gwstrain, flen, delta_f, + flow, dyn_range_factor=dyn_range_factor, + precision=precision) + + for fd_segment in fd_segments: + best_psd = None + psd_overlap = 0 + inp_seg = segments.segment(fd_segment.seg_slice.start, + fd_segment.seg_slice.stop) + for start_idx, end_idx, psd in psds_and_times: + psd_seg = segments.segment(start_idx, end_idx) + if psd_seg.intersects(inp_seg): + curr_overlap = abs(inp_seg & psd_seg) + if curr_overlap > psd_overlap: + psd_overlap = curr_overlap + best_psd = psd + if best_psd is None: + err_msg = "No PSDs found intersecting segment!" + raise ValueError(err_msg) + fd_segment.psd = best_psd + def associate_psds_to_single_ifo_segments(opt, fd_segments, gwstrain, flen, delta_f, flow, ifo, diff --git a/pycbc/psd/read.py b/pycbc/psd/read.py index 4ab9f29882a..48f6c4f883c 100644 --- a/pycbc/psd/read.py +++ b/pycbc/psd/read.py @@ -19,9 +19,14 @@ import logging import numpy +import h5py import scipy.interpolate from pycbc.types import FrequencySeries +from ligo import segments +from pycbc.types import FrequencySeries, load_frequencyseries, zeros, float32 +import pycbc.psd + logger = logging.getLogger('pycbc.psd.read') def from_numpy_arrays(freq_data, noise_data, length, delta_f, low_freq_cutoff): @@ -79,7 +84,7 @@ def from_numpy_arrays(freq_data, noise_data, length, delta_f, low_freq_cutoff): psd = numpy.zeros(length, dtype=numpy.float64) vals = numpy.log(numpy.arange(kmin, length) * delta_f) - psd[kmin:] = numpy.exp(psd_interp(vals)) + psd[kmin:] = numpy.exp(psd_interp(vals)) return FrequencySeries(psd, delta_f=delta_f) @@ -187,3 +192,90 @@ def from_xml(filename, length, delta_f, low_freq_cutoff, ifo_string=None, return from_numpy_arrays(freq_data, noise_data, length, delta_f, low_freq_cutoff) + + +class PrecomputedTimeVaryingPSD(object): + def __init__(self, opt, length, delta_f, sample_rate): + self.opt = opt + self.file_name = opt.precomputed_psd_file + self.f_low = opt.low_frequency_cutoff + self.length = length + self.delta_f = delta_f + self.sample_rate = sample_rate + self.psd_inverse_length = opt.psd_inverse_length + self.invpsd_trunc_method = opt.invpsd_trunc_method + + with h5py.File(self.file_name, 'r') as f: + detector = tuple(f.keys())[0] + self.start_times = f[detector + '/start_time'][:] + self.end_times = f[detector + '/end_time'][:] + self.file_f_low = f.attrs['low_frequency_cutoff'] + + self.begin = self.start_times.min() + self.end = self.end_times.max() + self.detector = detector + + def assosiate_psd_to_inspiral_segment(self, inp_seg, delta_f=None): + '''Find 2 PSDs that are closest to the segment and choose the best + based on the maximum overlap. + ''' + best_psd = None + if inp_seg[0] > self.end or inp_seg[1] < self.begin: + err_msg = "PSD file doesn't contain require times. \n" + err_msg += "PSDs are within range ({}, {})".format( + self.begin, self.end) + raise ValueError(err_msg) + + if len(self.start_times) > 2: + sidx = numpy.argpartition( + numpy.abs(self.start_times - inp_seg[0]), 2)[:2] + else: + sidx = numpy.argsort(numpy.abs(self.start_times - inp_seg[0])) + + nearest = segments.segment( + self.start_times[sidx[0]], self.end_times[sidx[0]]) + next_nearest = segments.segment( + self.start_times[sidx[1]], self.end_times[sidx[1]]) + + psd_overlap = 0 + if inp_seg.intersects(nearest): + psd_overlap = abs(nearest & inp_seg) + best_psd = self.get_psd(sidx[0], delta_f) + + if inp_seg.intersects(next_nearest): + if psd_overlap < abs(next_nearest & inp_seg): + psd_overlap = abs(next_nearest & inp_seg) + best_psd = self.get_psd(sidx[1], delta_f) + + if best_psd is None: + err_msg = "No PSDs found intersecting segment!" + raise ValueError(err_msg) + + if self.psd_inverse_length: + best_psd = pycbc.psd.inverse_spectrum_truncation(best_psd, + int(self.psd_inverse_length * + self.sample_rate), + low_frequency_cutoff=self.f_low, + trunc_method=self.invpsd_trunc_method) + return best_psd + + def get_psd(self, index, delta_f=None): + group = self.detector + '/psds/' + str(index) + psd = load_frequencyseries(self.file_name, group=group) + if delta_f is not None and psd.delta_f != delta_f: + psd = pycbc.psd.interpolate(psd, delta_f) + if self.length is not None and self.length != len(psd): + psd2 = FrequencySeries(zeros(self.length, dtype=psd.dtype), + delta_f=psd.delta_f) + if self.length > len(psd): + psd2[:] = numpy.inf + psd2[0:len(psd)] = psd + else: + psd2[:] = psd[0:self.length] + psd = psd2 + if self.f_low is not None and self.f_low < self.file_f_low: + # avoid using the PSD below the f_low given in the file + k = int(self.file_f_low / psd.delta_f) + psd[0:k] = numpy.inf + + return psd