Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Save overlapping PSDs with inspiral and use precomputed psds for inspiral #4930

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pycbc/events/ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def newsnr(snr, reduced_x2, q=6., n=2.):
reduced chi-squared values. See http://arxiv.org/abs/1208.3491 for
definition. Previous implementation in glue/ligolw/lsctables.py
"""
nsnr = numpy.array(snr, ndmin=1, dtype=numpy.float64)
nsnr = numpy.array(numpy.abs(snr), ndmin=1, dtype=numpy.float64)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change needed in the PR?

reduced_x2 = numpy.array(reduced_x2, ndmin=1, dtype=numpy.float64)

# newsnr is only different from snr if reduced chisq > 1
Expand Down
106 changes: 83 additions & 23 deletions pycbc/psd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import copy
import numpy
import logging
from pycbc.io import HFile
from ligo import segments
from pycbc.psd.read import *
from pycbc.psd.analytical import *
Expand All @@ -27,6 +30,9 @@
from pycbc.types import copy_opts_for_single_ifo
from pycbc.types import required_opts, required_opts_multi_ifo
from pycbc.types import ensure_one_opt, ensure_one_opt_multi_ifo
from pycbc import DYN_RANGE_FAC

logger = logging.getLogger('pycbc.psd')

def from_cli(opt, length, delta_f, low_frequency_cutoff,
strain=None, dyn_range_factor=1, precision=None):
Expand Down Expand Up @@ -241,8 +247,14 @@ def insert_psd_option_group(parser, output=True, include_data_options=True):
psd_options.add_argument("--psdvar-high-freq", type=float, metavar="HERTZ",
help="Maximum frequency to consider in strain "
"bandpass.")

if include_data_options :
psd_options.add_argument("--precomputed-psd-file", metavar='FILE', type=str,
help='Given HDF5 MERGE PSD file as the option, '
'appropriate PSD for each strain segment will be '
'chosen and use which is precomputed. PSD from the '
'strain data wil not be computed when this option '
'is provided.')

if include_data_options:
psd_options.add_argument("--psd-estimation",
help="Measure PSD from the data, using "
"given average method.",
Expand All @@ -266,7 +278,10 @@ def insert_psd_option_group(parser, output=True, include_data_options=True):
if output:
psd_options.add_argument("--psd-output",
help="(Optional) Write PSD to specified file")

psd_options.add_argument("--overlapping-psd-output",
help="(Optional) Write PSD to specified file in the format "
"of calculate_psd output. --output-psd only saves psd of "
"only one segment")
return psd_options

def insert_psd_option_group_multi_ifo(parser):
Expand Down Expand Up @@ -499,6 +514,32 @@ def generate_overlapping_psds(opt, gwstrain, flen, delta_f, flow,
psd = from_cli(opt, flen, delta_f, flow, strain=strain_part,
dyn_range_factor=dyn_range_factor, precision=precision)
psds_and_times.append( (start_idx, end_idx, psd) )

if hasattr(opt, 'overlapping_psd_output') and opt.overlapping_psd_output:
logging.info(f"Saving overlapping PSDs for segments to "
f"{opt.overlapping_psd_output}")
f = HFile(opt.overlapping_psd_output, 'w')
ifo = opt.channel_name[0:2]
psd_group = f.create_group(ifo + '/psds')
start, end = [], []
for inc, pnt in enumerate(psds_and_times):
start_idx, end_idx, psd = pnt
key = str(inc)
start_time = gwstrain.start_time + start_idx/gwstrain.sample_rate
end_time = gwstrain.start_time + end_idx/gwstrain.sample_rate
start.append(int(start_time))
end.append(int(end_time))
psd_group.create_dataset(key, data=psd.numpy(), compression='gzip',
compression_opts=9, shuffle=True)
psd_group[key].attrs['epoch'] = int(start_time)
psd_group[key].attrs['delta_f'] = psd.delta_f

f[ifo + '/start_time'] = numpy.array(start, dtype=numpy.uint32)
f[ifo + '/end_time'] = numpy.array(end, dtype=numpy.uint32)
f.attrs['low_frequency_cutoff'] = opt.low_frequency_cutoff
f.attrs['dynamic_range_factor'] = DYN_RANGE_FAC
f.close()

return psds_and_times

def associate_psds_to_segments(opt, fd_segments, gwstrain, flen, delta_f, flow,
Expand Down Expand Up @@ -534,26 +575,45 @@ def associate_psds_to_segments(opt, fd_segments, gwstrain, flen, delta_f, flow,
that precision. If 'double' the PSD will be converted to float64, if
not already in that precision.
"""
psds_and_times = generate_overlapping_psds(opt, gwstrain, flen, delta_f,
flow, dyn_range_factor=dyn_range_factor,
precision=precision)

for fd_segment in fd_segments:
best_psd = None
psd_overlap = 0
inp_seg = segments.segment(fd_segment.seg_slice.start,
fd_segment.seg_slice.stop)
for start_idx, end_idx, psd in psds_and_times:
psd_seg = segments.segment(start_idx, end_idx)
if psd_seg.intersects(inp_seg):
curr_overlap = abs(inp_seg & psd_seg)
if curr_overlap > psd_overlap:
psd_overlap = curr_overlap
best_psd = psd
if best_psd is None:
err_msg = "No PSDs found intersecting segment!"
raise ValueError(err_msg)
fd_segment.psd = best_psd
if opt.precomputed_psd_file:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it make sense to have the precomputed psd be gotten from the pycbc.psd.from_cli method instead? That way it can be invoked more broadly.

logging.info(f"Reading Precomuted PSDs from {opt.precomputed_psd_file}")
tpsd = PrecomputedTimeVaryingPSD(opt,
length=len(fd_segments[0].data),
delta_f=fd_segments[0].delta_f,
sample_rate=fd_segments[0].sample_rate)

for fd_segment in fd_segments:
inp_seg = segments.segment(fd_segment.start_time,
fd_segment.end_time)
best_psd = tpsd.assosiate_psd_to_inspiral_segment(inp_seg,
delta_f=fd_segment.delta_f)
if best_psd is None:
err_msg = "No PSDs found intersecting segment!"
raise ValueError(err_msg)
fd_segment.psd = best_psd

else:
psds_and_times = generate_overlapping_psds(opt, gwstrain, flen, delta_f,
flow, dyn_range_factor=dyn_range_factor,
precision=precision)

for fd_segment in fd_segments:
best_psd = None
psd_overlap = 0
inp_seg = segments.segment(fd_segment.seg_slice.start,
fd_segment.seg_slice.stop)
for start_idx, end_idx, psd in psds_and_times:
psd_seg = segments.segment(start_idx, end_idx)
if psd_seg.intersects(inp_seg):
curr_overlap = abs(inp_seg & psd_seg)
if curr_overlap > psd_overlap:
psd_overlap = curr_overlap
best_psd = psd
if best_psd is None:
err_msg = "No PSDs found intersecting segment!"
raise ValueError(err_msg)
fd_segment.psd = best_psd


def associate_psds_to_single_ifo_segments(opt, fd_segments, gwstrain, flen,
delta_f, flow, ifo,
Expand Down
94 changes: 93 additions & 1 deletion pycbc/psd/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,14 @@

import logging
import numpy
import h5py
import scipy.interpolate
from pycbc.types import FrequencySeries

from ligo import segments
from pycbc.types import FrequencySeries, load_frequencyseries, zeros, float32
import pycbc.psd

logger = logging.getLogger('pycbc.psd.read')

def from_numpy_arrays(freq_data, noise_data, length, delta_f, low_freq_cutoff):
Expand Down Expand Up @@ -79,7 +84,7 @@ def from_numpy_arrays(freq_data, noise_data, length, delta_f, low_freq_cutoff):
psd = numpy.zeros(length, dtype=numpy.float64)

vals = numpy.log(numpy.arange(kmin, length) * delta_f)
psd[kmin:] = numpy.exp(psd_interp(vals))
psd[kmin:] = numpy.exp(psd_interp(vals))

return FrequencySeries(psd, delta_f=delta_f)

Expand Down Expand Up @@ -187,3 +192,90 @@ def from_xml(filename, length, delta_f, low_freq_cutoff, ifo_string=None,

return from_numpy_arrays(freq_data, noise_data, length, delta_f,
low_freq_cutoff)


class PrecomputedTimeVaryingPSD(object):
def __init__(self, opt, length, delta_f, sample_rate):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bhooshan-gadre I think this is a nice way to do this, but I think it might also be useful to be able to invoke this class by hand. Would it not make sense to have a from_cli class method that uses a option/arg input and then a regular init method that could be invoked by hand ?

self.opt = opt
self.file_name = opt.precomputed_psd_file
self.f_low = opt.low_frequency_cutoff
self.length = length
self.delta_f = delta_f
self.sample_rate = sample_rate
self.psd_inverse_length = opt.psd_inverse_length
self.invpsd_trunc_method = opt.invpsd_trunc_method

with h5py.File(self.file_name, 'r') as f:
detector = tuple(f.keys())[0]
self.start_times = f[detector + '/start_time'][:]
self.end_times = f[detector + '/end_time'][:]
self.file_f_low = f.attrs['low_frequency_cutoff']

self.begin = self.start_times.min()
self.end = self.end_times.max()
self.detector = detector

def assosiate_psd_to_inspiral_segment(self, inp_seg, delta_f=None):
'''Find 2 PSDs that are closest to the segment and choose the best
based on the maximum overlap.
'''
best_psd = None
if inp_seg[0] > self.end or inp_seg[1] < self.begin:
err_msg = "PSD file doesn't contain require times. \n"
err_msg += "PSDs are within range ({}, {})".format(
self.begin, self.end)
raise ValueError(err_msg)

if len(self.start_times) > 2:
sidx = numpy.argpartition(
numpy.abs(self.start_times - inp_seg[0]), 2)[:2]
else:
sidx = numpy.argsort(numpy.abs(self.start_times - inp_seg[0]))

nearest = segments.segment(
self.start_times[sidx[0]], self.end_times[sidx[0]])
next_nearest = segments.segment(
self.start_times[sidx[1]], self.end_times[sidx[1]])

psd_overlap = 0
if inp_seg.intersects(nearest):
psd_overlap = abs(nearest & inp_seg)
best_psd = self.get_psd(sidx[0], delta_f)

if inp_seg.intersects(next_nearest):
if psd_overlap < abs(next_nearest & inp_seg):
psd_overlap = abs(next_nearest & inp_seg)
best_psd = self.get_psd(sidx[1], delta_f)

if best_psd is None:
err_msg = "No PSDs found intersecting segment!"
raise ValueError(err_msg)

if self.psd_inverse_length:
best_psd = pycbc.psd.inverse_spectrum_truncation(best_psd,
int(self.psd_inverse_length *
self.sample_rate),
low_frequency_cutoff=self.f_low,
trunc_method=self.invpsd_trunc_method)
return best_psd

def get_psd(self, index, delta_f=None):
group = self.detector + '/psds/' + str(index)
psd = load_frequencyseries(self.file_name, group=group)
if delta_f is not None and psd.delta_f != delta_f:
psd = pycbc.psd.interpolate(psd, delta_f)
if self.length is not None and self.length != len(psd):
psd2 = FrequencySeries(zeros(self.length, dtype=psd.dtype),
delta_f=psd.delta_f)
if self.length > len(psd):
psd2[:] = numpy.inf
psd2[0:len(psd)] = psd
else:
psd2[:] = psd[0:self.length]
psd = psd2
if self.f_low is not None and self.f_low < self.file_f_low:
# avoid using the PSD below the f_low given in the file
k = int(self.file_f_low / psd.delta_f)
psd[0:k] = numpy.inf

return psd
Loading