diff --git a/bin/hveto b/bin/hveto index 1edeec9..e2f5cc5 100755 --- a/bin/hveto +++ b/bin/hveto @@ -37,6 +37,7 @@ import sys from socket import getfqdn from getpass import getuser from distutils.spawn import find_executable +from pathlib import Path try: import configparser @@ -48,8 +49,7 @@ from numpy import unique from matplotlib import use use('agg') -from glue.lal import Cache - +from gwpy.io.cache import read_cache from gwpy.time import to_gps from gwpy.segments import (Segment, SegmentList, DataQualityFlag, DataQualityDict) @@ -101,8 +101,8 @@ parser.add_argument('-a', '--auxiliary-cache', action='append', default=[], '\' for L1:GDS-CALIB_STRAIN triggers') parser.add_argument('-S', '--analysis-segments', action='append', default=[], type=abs_path, - help='path to LIGO_LW XML file containing segments for ' - 'the analysis flag (name in segment_definer table ' + help='path to file containing segments for ' + 'the analysis flag (name in data file ' 'must match analysis-flag in config file)') parser.add_argument('-w', '--omega-scans', type=int, metavar='NSCAN', help='generate a workflow of omega scans for each round, ' @@ -237,7 +237,7 @@ pchannel = cp.get('primary', 'channel') # read auxiliary cache if args.auxiliary_cache: - acache = Cache.fromfilenames(args.auxiliary_cache) + acache = read_cache(args.auxiliary_cache) else: acache = None @@ -301,7 +301,7 @@ htmlv['config'] = inifile # read primary cache if args.primary_cache: - pcache = Cache.fromfilenames(args.primary_cache) + pcache = read_cache(args.primary_cache) else: pcache = None @@ -362,8 +362,8 @@ def _get_aux_triggers(channel): auxcache = None else: ifo, name = channel.split(':') - desc = name.replace('-', '_') - auxcache = acache.sieve(ifos=ifo, description='%s*' % desc) + match = "{}-{}".format(ifo, name.replace('-', '_')) + auxcache = [e for e in cache if Path(e).name.startswith(match)] # get triggers try: trigs = get_triggers(channel, auxetg, analysis.active, snr=minsnr, @@ -560,11 +560,7 @@ cum. deadtime : %s""" % ( ifo, round.n, start, duration)) write_ascii_segments(segfile, round.vetoes) logger.debug("Round %d vetoes written to %s" % (round.n, segfile)) - segxml = os.path.join(segdir, '%s-HVETO_VETO_SEGS_ROUND_%d-%d-%d.xml' % ( - ifo, round.n, start, duration)) - flag.write(segxml, overwrite=True) - logger.debug("Round %d vetoes written to %s" % (round.n, segxml)) - round.files['VETO_SEGS'] = (segfile, segxml) + round.files['VETO_SEGS'] = (segfile,) # write triggers trigfile = os.path.join(trigdir, '%s-HVETO_%%s_TRIGS_ROUND_%d-%d-%d.txt' % (ifo, round.n, start, duration)) @@ -694,7 +690,7 @@ cum. deadtime : %s""" % ( # write file with all segments segfile = os.path.join( - segdir, '%s-HVETO_SEGMENTS-%d-%d.xml.gz' % (ifo, start, duration)) + segdir, '%s-HVETO_SEGMENTS-%d-%d.h5' % (ifo, start, duration)) segments.write(segfile, overwrite=True) logger.debug("Segment summary written to %s" % segfile) diff --git a/bin/hveto-cache-events b/bin/hveto-cache-events index e6a71e2..28b9a49 100644 --- a/bin/hveto-cache-events +++ b/bin/hveto-cache-events @@ -29,18 +29,13 @@ import argparse import os import warnings import multiprocessing +from pathlib import Path -from lal.utils import CacheEntry +import h5py -from glue.lal import Cache -from glue.ligolw.ligolw import (Document, LIGO_LW, LIGOLWContentHandler) -from glue.ligolw.lsctables import ProcessTable -from glue.ligolw.utils import (write_filename as write_ligolw, - load_filename as load_ligolw) -from glue.ligolw.utils.process import (register_to_xmldoc as - append_process_table) +from astropy.table import vstack -from gwpy.io import ligolw as io_ligolw +from gwpy.io.cache import (cache_segments, file_segment, read_cache) from gwpy.time import to_gps from gwpy.segments import (Segment, SegmentList, DataQualityFlag, DataQualityDict) @@ -48,11 +43,10 @@ from gwpy.segments import (Segment, SegmentList, from hveto import (__version__, log, config) from hveto.triggers import (get_triggers, find_auxiliary_channels, find_trigger_files) +from hveto.utils import write_lal_cache __author__ = 'Duncan Macleod ' -Cache.entry_class = CacheEntry # remove deprecationwarning - IFO = os.getenv('IFO') logger = log.Logger('hveto-cache-events') @@ -61,7 +55,7 @@ logger = log.Logger('hveto-cache-events') # -- parse command line ------------------------------------------------------- def abs_path(p): - return os.path.abspath(os.path.expanduser(p)) + return Path(p).expanduser().resolve() parser = argparse.ArgumentParser(description=__doc__) @@ -97,7 +91,7 @@ parser.add_argument('--append', action='store_true', default=False, 'start from scratch (default)') pout = parser.add_argument_group('Output options') -pout.add_argument('-o', '--output-directory', default=os.curdir, +pout.add_argument('-o', '--output-directory', default=os.curdir, type=abs_path, help='path of output directory, default: %(default)s') args = parser.parse_args() @@ -107,16 +101,6 @@ start = int(args.gpsstart) end = int(args.gpsend) duration = end - start -# format process params for LIGO_LW -procparams = {k.replace('_', '-'): v for k, v in vars(args).items() if v} -for gpskey in ('gpsstart', 'gpsend'): - procparams[gpskey] = int(procparams[gpskey]) -for listkey in ('config-file', 'primary-cache', 'auxiliary-cache'): - try: - procparams[listkey] = ','.join(procparams[listkey]) - except KeyError: - pass - logger.info("-- Welcome to Hveto --") logger.info("GPS start time: %d" % start) logger.info("GPS end time: %d" % end) @@ -126,21 +110,15 @@ logger.info("Interferometer: %s" % ifo) # read configuration cp = config.HvetoConfigParser(ifo=args.ifo) -cp.read(args.config_file) +cp.read(map(str, args.config_file)) logger.info("Parsed configuration file(s)") # format output directory -outdir = abs_path(args.output_directory) -if not os.path.isdir(outdir): - os.makedirs(outdir) -os.chdir(outdir) -logger.info("Working directory: %s" % outdir) -trigdir = 'triggers' -if not os.path.isdir(trigdir): - os.makedirs(trigdir) - -os.chdir(trigdir) -trigdir = os.getcwd() +outdir = args.output_directory +outdir.mkdir(parents=True, exist_ok=True) +logger.info("Working directory: {}".format(outdir)) +trigdir = outdir / 'triggers' +trigdir.mkdir(parents=True, exist_ok=True) # get segments aflag = cp.get('segments', 'analysis-flag') @@ -166,24 +144,25 @@ logger.info("Retrieved %d segments for %s with %ss (%.2f%%) livetime" snrs = cp.getfloats('hveto', 'snr-thresholds') minsnr = min(snrs) -# -- utility methods ---------------------------------------------------------- - -contenthandler = LIGOLWContentHandler +# -- utility methods ---------------------------------------------------------- -def create_filename(channel): +def create_path(channel): ifo, name = channel.split(':', 1) name = name.replace('-', '_') - return os.path.join(trigdir, - '%s-%s-%d-%d.xml.gz' % (ifo, name, start, duration)) + return trigdir / "{}-{}-{}-{}.h5".format(ifo, name, start, duration) def read_and_cache_events(channel, etg, cache=None, trigfind_kw={}, **read_kw): - cfile = create_filename(channel) + cfile = create_path(channel) # read existing cached triggers and work out new segments to query - if args.append and os.path.isfile(cfile): - previous = DataQualityFlag.read(cfile, format='ligolw').coalesce() + if args.append and cfile.is_file(): + previous = DataQualityFlag.read( + str(cfile), + path='segments', + format='hdf5', + ).coalesce() new = analysis - previous else: new = analysis.copy() @@ -191,23 +170,26 @@ def read_and_cache_events(channel, etg, cache=None, trigfind_kw={}, if cache is None: cache = find_trigger_files(channel, etg, new.active, **trigfind_kw) else: - cache = cache.sieve(segmentlist=new.active) + cache = list(filter( + lambda e: new.active.intersects_segment(file_segment(e)), + cache, + )) # restrict 'active' segments to when we have data try: - new.active &= cache.to_segmentlistdict().values()[0] + new.active &= cache_segments(cache) except IndexError: new.active = type(new.active)() # find new triggers try: - trigs = get_triggers(channel, auxetg, new.active, cache=cache, + trigs = get_triggers(channel, etg, new.active, cache=cache, raw=True, **read_kw) # catch error and continue except ValueError as e: warnings.warn('%s: %s' % (type(e).__name__, str(e))) else: - a = write_events(channel, trigs, new) + path = write_events(channel, trigs, new) try: - return CacheEntry.from_T050017(a), len(trigs) + return path, len(trigs) except TypeError: # None return @@ -216,39 +198,32 @@ def write_events(channel, tab, segments): """Write events to file with a given filename """ # get filename - filename = create_filename(channel) + path = create_path(channel) + h5f = h5py.File(str(path), 'a') - # read existing document - if args.append and os.path.isfile(filename): - xmldoc = io_ligolw.read_ligolw(filename) - # or, create document + # read existing table from file + try: + old = tab.read(h5f["triggers"], format="hdf5") + except KeyError: + pass else: - xmldoc = Document() - xmldoc.appendChild(LIGO_LW()) + tab = vstack(old, tab) - # append process table - with multiprocessing.Lock(): - ProcessTable.next_id = type(ProcessTable.next_id)(0) - process = append_process_table(xmldoc, os.path.basename(__file__), - procparams) + # append event table + tab.write(h5f, path="triggers", append=True, overwrite=True) - # append segment tables + # write segments try: - segments.write(xmldoc, format='ligolw', append=True, - attrs={'process_id': process.process_id}) - except TypeError as exc: - if 'process_id' in str(exc): - segments.write(xmldoc, format='ligolw', append=True) - else: - raise - - # append event table - if len(tab): - tab.write(xmldoc, append=True) + oldsegs = DataQualityFlag.read(h5f, path="segments", format="hdf5") + except KeyError: + pass + else: + segments = oldsegs + segments + segments.write(h5f, path="segments", append=True, overwrite=True) # write file to disk - write_ligolw(xmldoc, filename, gz=True) - return filename + h5f.close() + return path # -- load channels ------------------------------------------------------------ @@ -258,7 +233,7 @@ pchannel = cp.get('primary', 'channel') # read auxiliary cache if args.auxiliary_cache: - acache = Cache.fromfilenames(args.auxiliary_cache) + acache = [e for c in args.auxiliary_cache for e in read_cache(str(c))] else: acache = None @@ -295,7 +270,7 @@ logger.debug("Read list of %d auxiliary channels" % len(auxchannels)) # remove unsafe channels nunsafe = 0 -for i in xrange(len(auxchannels) -1, -1, -1): +for i in range(len(auxchannels) -1, -1, -1): if auxchannels[i] in unsafe: logger.warning("Auxiliary channel %r identified as unsafe and has " "been removed" % auxchannels[i]) @@ -311,7 +286,7 @@ logger.info("Reading events for primary channel...") # read primary cache if args.primary_cache: - pcache = Cache.fromfilenames(args.primary_cache) + pcache = [e for c in args.primary_cache for e in read_cache(str(c))] else: pcache = None @@ -333,7 +308,7 @@ except TypeError: n = 0 if n: logger.info("Cached %d new events for %s" % (n, pchannel)) -elif args.append and os.path.isfile(e.path): +elif args.append and e.is_file(): logger.info("Cached 0 new events for %s" % pchannel) else: message = "No events found for %r in %d seconds of livetime" % ( @@ -341,12 +316,11 @@ else: logger.critical(message) # write primary to local cache -pcache = Cache([e]) -with open('%s-HVETO_PRIMARY_CACHE-%d-%d.lcf' - % (ifo, start, duration), 'w') as f: - pcache.tofile(f) -pname = os.path.join(trigdir, f.name) -logger.info('Primary cache written to %s' % pname) +pname = trigdir / '{}-HVETO_PRIMARY_CACHE-{}-{}.lcf'.format( + ifo, start, duration, +) +write_lal_cache(str(pname), [e]) +logger.info('Primary cache written to {}'.format(pname)) # -- load auxiliary triggers -------------------------------------------------- @@ -362,8 +336,9 @@ def read_and_write_aux_triggers(channel): auxcache = None else: ifo, name = channel.split(':') - desc = name.replace('-', '_') - auxcache = acache.sieve(ifos=ifo, description='%s*' % desc) + match = "{}-{}".format(ifo, name.replace('-', '_')) + auxcache = [e for e in acache if Path(e).name.startswith(match)] + out = read_and_cache_events(channel, auxetg, cache=auxcache, snr=minsnr, frange=auxfreq, trigfind_kw=atrigfindkw, **areadkw) @@ -394,15 +369,15 @@ if args.nproc > 1: else: results = map(read_and_write_aux_triggers, auxchannels) -acache = Cache(x for x in results if x is not None) -with open('%s-HVETO_AUXILIARY_CACHE-%d-%d.lcf' - % (ifo, start, duration), 'w') as f: - acache.tofile(f) -aname = os.path.join(trigdir, f.name) -logger.info('Auxiliary cache written to %s' % aname) +acache = [x for x in results if x is not None] +aname = trigdir / '{}-HVETO_AUXILIARY_CACHE-{}-{}.lcf'.format( + ifo, start, duration, +) +write_lal_cache(str(aname), [e for e in results if e is not None]) +logger.info('Auxiliary cache written to {}'.format(aname)) # -- finish ------------------------------------------------------------------- logger.info('Done, you can use these cache files in an hveto analysis by ' - 'passing the following arguments:\n --primary-cache %s ' - '--auxiliary-cache %s' % (pname, aname)) + 'passing the following arguments:\n --primary-cache {} ' + '--auxiliary-cache {}'.format(pname, aname)) diff --git a/docs/conf.py b/docs/conf.py index 2239502..6397ec0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -38,7 +38,6 @@ 'sphinx.ext.autosummary', 'numpydoc', 'sphinxcontrib.programoutput', - 'sphinxcontrib.epydoc', ] # Add any paths that contain templates here, relative to this directory. @@ -306,8 +305,3 @@ 'matplotlib': ('http://matplotlib.sourceforge.net/', None), 'gwpy': ('http://gwpy.github.io/docs/latest/', None), } - -# Epydoc extension config for GLUE -epydoc_mapping = { - 'http://software.ligo.org/docs/glue/': [r'glue(\.|$)'], -} diff --git a/hveto/config.py b/hveto/config.py index b876dac..2d50fd0 100755 --- a/hveto/config.py +++ b/hveto/config.py @@ -258,7 +258,7 @@ def __init__(self, ifo=None, defaults=dict(), **kwargs): def set_hveto_defaults(self): for section in self.HVETO_DEFAULTS: self.add_section(section) - for key, val in self.HVETO_DEFAULTS[section].iteritems(): + for key, val in self.HVETO_DEFAULTS[section].items(): if key.endswith('channels') and isinstance(val, (tuple, list)): self.set(section, key, '\n'.join(list(val))) elif isinstance(val, tuple): @@ -294,4 +294,4 @@ def getparams(self, section, prefix): def comma_separated_floats(string): - return map(float, string.split(',')) + return tuple(map(float, string.split(','))) diff --git a/hveto/core.py b/hveto/core.py index 986839c..cc5819a 100755 --- a/hveto/core.py +++ b/hveto/core.py @@ -163,7 +163,7 @@ def find_max_significance(primary, auxiliary, channel, snrs, windows, livetime): the parameters and segments generated by the (snr, dt) with the highest significance """ - rec = vstack_tables([primary] + auxiliary.values()) + rec = vstack_tables([primary] + list(auxiliary.values())) coincs = find_all_coincidences(rec, channel, snrs, windows) winner = HvetoWinner(name='unknown', significance=-1) sigs = dict((c, 0) for c in auxiliary) @@ -361,6 +361,6 @@ def veto_all(auxiliary, segmentlist): for details on the veto algorithm itself """ channels = auxiliary.keys() - t = vstack_tables(auxiliary.values()) + t = vstack_tables(list(auxiliary.values())) keep, _ = veto(t, segmentlist) return dict((c, keep[keep['channel'] == c]) for c in channels) diff --git a/hveto/html.py b/hveto/html.py index 0503e06..f26f364 100755 --- a/hveto/html.py +++ b/hveto/html.py @@ -24,7 +24,6 @@ import sys import os.path import datetime -import subprocess from functools import wraps from getpass import getuser @@ -32,7 +31,9 @@ from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter -from glue import markup +from MarkupPy import markup + +from gwdetchar.io.html import package_table from ._version import get_versions @@ -427,7 +428,7 @@ def scaffold_plots(plots, nperrow=2): Returns ------- - page : `~glue.markup.page` + page : `~MarkupPy.markup.page` the markup object containing the scaffolded HTML """ page = markup.page() @@ -459,7 +460,7 @@ def write_footer(about=None, date=None): Returns ------- - page : `~glue.markup.page` + page : `~MarkupPy.markup.page` the markup object containing the footer HTML """ page = markup.page() @@ -506,7 +507,7 @@ def write_summary( Returns ------- - page : `~glue.markup.page` + page : `~MarkupPy.markup.page` the formatted markup object containing the analysis summary table, and images """ @@ -570,7 +571,7 @@ def write_round(round): Returns ------- - page : `~glue.markup.page` + page : `~MarkupPy.markup.page` the formatted HTML for this round """ page = markup.page() @@ -744,13 +745,21 @@ def write_about_page(configfile): formatter = HtmlFormatter(noclasses=True) # set up page page = markup.page() + + # command line page.h2('On the command line') page.p('This page was generated with the command line call shown below.') commandline = highlight(' '.join(sys.argv), blexer, formatter) page.add(commandline) + + # configuration file page.h2('Configuration') with open(configfile, 'r') as fobj: inifile = fobj.read() contents = highlight(inifile, ilexer, formatter) page.add(contents) + + # runtime environment + page.add(package_table()) + return page diff --git a/hveto/plot.py b/hveto/plot.py index 803b2e7..8e063a8 100755 --- a/hveto/plot.py +++ b/hveto/plot.py @@ -344,8 +344,10 @@ def veto_scatter( 'handlelength': 1, 'handletextpad': .5 } - legargs.update(dict((x[7:], axargs.pop(x)) for x in axargs.keys() - if x.startswith('legend_'))) + legargs.update(dict( + (x[7:], axargs.pop(x)) for x in list(axargs.keys()) if + x.startswith('legend_') + )) ax.legend(**legargs) # finalize for axis in ['x', 'y']: diff --git a/hveto/tests/test_utils.py b/hveto/tests/test_utils.py index a612eec..98f894d 100755 --- a/hveto/tests/test_utils.py +++ b/hveto/tests/test_utils.py @@ -19,11 +19,25 @@ """Tests for `hveto.utils` """ +import tempfile + import pytest +from gwpy.io.cache import read_cache + from .. import utils +def test_write_lal_cache(tmpdir): + cache = [ + "/test/path/X-TEST-0-1.txt", + "/test/path/X-TEST-2-3.txt", + ] + target = tmpdir.join("cache.lcf") + utils.write_lal_cache(str(target), cache) + assert read_cache(str(target)) == cache + + @pytest.mark.parametrize('n, out', [ (1, [[1, 2, 3, 4, 5]]), (2, [[1, 2, 3], [4, 5]]), diff --git a/hveto/triggers.py b/hveto/triggers.py index 30f71bc..dbb5e55 100755 --- a/hveto/triggers.py +++ b/hveto/triggers.py @@ -122,6 +122,15 @@ def find_trigger_files(channel, etg, segments, **kwargs): gwtrigfind.find_trigger_urls for details on file discovery """ + # format arguments + etg = _sanitize_name(etg) + try: + readfmt = kwargs.pop("format", DEFAULT_FORMAT[etg]) + except KeyError: + raise ValueError("unsupported ETG {!r}".format(etg)) + for key, val in DEFAULT_TRIGFIND_OPTIONS.get((etg, readfmt), {}).items(): + kwargs.setdefault(key, val) + cache = [] for start, end in segments: try: diff --git a/hveto/utils.py b/hveto/utils.py index 0b406cf..327352d 100755 --- a/hveto/utils.py +++ b/hveto/utils.py @@ -19,12 +19,14 @@ """General utilities for hveto """ -from __future__ import division +from __future__ import (division, print_function) import sys import os.path from math import ceil +from gwdatafind.utils import filename_metadata + try: # python 3.x from io import StringIO from html.parser import HTMLParser @@ -38,6 +40,21 @@ __credits__ = 'Alex Urban ' +def write_lal_cache(target, paths): + # if not an open file, open it + if isinstance(target, str): + with open(target, "w") as fobj: + write_lal_cache(fobj, paths) + return target + + # write to file + for path in paths: + obs, tag, segment = filename_metadata(path) + print(obs, tag, segment[0], abs(segment), path, file=target) + + return target + + # -- class for HTML parsing --------------------------------------------------- class HvetoHTMLParser(HTMLParser): diff --git a/requirements.txt b/requirements.txt index 6d18993..3145c2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ dqsegdb gitpython -gwdetchar +gwdetchar >= 0.3.0 gwpy >= 0.14.0 gwtrigfind jinja2 -lscsoft-glue >= 2.0.0 lxml +MarkupPy >= 1.14 matplotlib >= 1.5 numpy >= 1.10 -pykerberos -pytest >= 3.0.0 +pathlib ; python_version < '3' +pytest >= 3.1.0 scipy pygments diff --git a/setup.cfg b/setup.cfg index 6703595..8ec7785 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,19 +40,19 @@ python_requires = >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.* setup_requires = setuptools >=30.3.0 install_requires = - gwdetchar + gwdetchar >= 0.3.0 gwpy >=0.14.0 gwtrigfind jinja2 - lscsoft-glue >=2.0.0 lxml + MarkupPy >=1.14 matplotlib >=1.5 numpy >=1.10 - pykerberos + pathlib ; python_version < '3' scipy pygments tests_require = - pytest >=3.0.0 + pytest >=3.1.0 mock ; python_version < '3' [options.extras_require]