Skip to content

Commit

Permalink
Merge pull request #431 from desihub/db-integration-test
Browse files Browse the repository at this point in the history
Load DB in integration test
  • Loading branch information
sbailey authored Sep 29, 2017
2 parents 19ae2da + 0712f16 commit c4032a1
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 142 deletions.
210 changes: 131 additions & 79 deletions py/desispec/database/datachallenge.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,33 +173,35 @@ class ObsList(SchemaMixin, Base):
"""Representation of the obslist table.
"""

mjd = Column(Float, nullable=False)
exptime = Column(Float, nullable=False)
program = Column(String, nullable=False)
passnum = Column(Integer, nullable=False)
tileid = Column(Integer, primary_key=True, autoincrement=False)
passnum = Column(Integer, nullable=False)
ra = Column(Float, nullable=False)
dec = Column(Float, nullable=False)
moonfrac = Column(Float, nullable=False)
moondist = Column(Float, nullable=False)
moonalt = Column(Float, nullable=False)
night = Column(String, nullable=False)
mjd = Column(Float, nullable=False)
exptime = Column(Float, nullable=False)
seeing = Column(Float, nullable=False)
airmass = Column(Float, nullable=False)
# program = Column(String, nullable=False)
moonfrac = Column(Float, nullable=False)
moonalt = Column(Float, nullable=False)
moonsep = Column(Float, nullable=False)
# dateobs = Column(DateTime(timezone=True), nullable=False)

def __repr__(self):
return ("<ObsList(mjd={0.mjd:f}, " +
"exptime={0.exptime:f}, " +
"program='{0.program}', " +
"passnum={0.passnum:d}, " +
return ("<ObsList(" +
"tileid={0.tileid:d}, " +
"passnum={0.passnum:d}, " +
"ra={0.ra:f}, dec={0.dec:f}, " +
"ebmv={0.ebmv:f}, " +
"night='{0.night}', " +
"mjd={0.mjd:f}, " +
"exptime={0.exptime:f}, " +
"seeing={0.seeing:f}, " +
"airmass={0.airmass:f}," +
"moonfrac={0.moonfrac:f}, " +
"moondist={0.moondist:f}, " +
"moonalt={0.moonalt:f}, " +
"seeing={0.seeing:f}, " +
"airmass={0.airmass:f})>").format(self)
"moonsep={0.moonsep:f}" +
")>").format(self)


class ZCat(SchemaMixin, Base):
Expand Down Expand Up @@ -391,6 +393,8 @@ def load_zcat(datapath, run1d='dc17a2', q3c=False):
log = get_logger()
zbestpath = join(datapath, 'spectro', 'redux', run1d, 'spectra-64',
'*', '*', 'zbest-64-*.fits')
# zbestpath = join(datapath, 'spectra-64',
# '*', '*', 'zbest-64-*.fits')
log.info("Using zbest file search path: %s.", zbestpath)
zbest_files = glob(zbestpath)
if len(zbest_files) == 0:
Expand All @@ -406,12 +410,16 @@ def load_zcat(datapath, run1d='dc17a2', q3c=False):
data = hdulist[1].data
log.info("Read data from %s.", f)
good_targetids = data['TARGETID'] != 0
q = dbSession.query(ZCat).filter(ZCat.targetid.in_(data['TARGETID'].tolist())).all()
if len(q) != 0:
log.warning("Duplicate TARGETID found in %s.", f)
for z in q:
log.warning("Duplicate TARGETID = %d.", z.targetid)
good_targetids = good_targetids & (data['TARGETID'] != z.targetid)
#
# If there are too many targetids, the in_ clause will blow up.
# Disabling this test, and crossing fingers.
#
# q = dbSession.query(ZCat).filter(ZCat.targetid.in_(data['TARGETID'].tolist())).all()
# if len(q) != 0:
# log.warning("Duplicate TARGETID found in %s.", f)
# for z in q:
# log.warning("Duplicate TARGETID = %d.", z.targetid)
# good_targetids = good_targetids & (data['TARGETID'] != z.targetid)
data_list = [data[col][good_targetids].tolist()
for col in data.names if col != 'COEFF']
data_names = [col.lower() for col in data.names if col != 'COEFF']
Expand Down Expand Up @@ -460,7 +468,7 @@ def load_fiberassign(datapath, maxpass=4, q3c=False, latest_epoch=False):
from astropy.io import fits
from desiutil.log import get_logger
log = get_logger()
fiberpath = join(datapath, 'fiberassign', 'output',
fiberpath = join(datapath, 'fiberassign',
'tile_*.fits')
log.info("Using tile file search path: %s.", fiberpath)
tile_files = glob(fiberpath)
Expand Down Expand Up @@ -540,61 +548,26 @@ def q3c_index(table):
return


def main():
"""Entry point for command-line script.
def setup_db(options):
"""Initialize the database connection.
Parameters
----------
options : :class:`argpare.Namespace`
Parsed command-line options.
Returns
-------
:class:`int`
An integer suitable for passing to :func:`sys.exit`.
:class:`bool`
``True`` if the configured database is a PostgreSQL database.
"""
global engine, schemaname
from os import remove
from os.path import basename, exists, join
from sys import argv
from argparse import ArgumentParser
from pkg_resources import resource_filename
from pytz import utc
from desiutil.log import get_logger, DEBUG, INFO
#
# command-line arguments
#
prsr = ArgumentParser(description=("Load a data challenge simulation into a " +
"database."),
prog=basename(argv[0]))
prsr.add_argument('-c', '--clobber', action='store_true', dest='clobber',
help='Delete any existing file(s) before loading.')
prsr.add_argument('-f', '--filename', action='store', dest='dbfile',
default='quicksurvey.db', metavar='FILE',
help="Store data in FILE.")
prsr.add_argument('-H', '--hostname', action='store', dest='hostname',
metavar='HOSTNAME',
help='If specified, connect to a PostgreSQL database on HOSTNAME.')
prsr.add_argument('-m', '--max-rows', action='store', dest='maxrows',
type=int, default=0, metavar='M',
help="Load up to M rows in the tables (default is all rows).")
prsr.add_argument('-r', '--rows', action='store', dest='chunksize',
type=int, default=50000, metavar='N',
help="Load N rows at a time (default %(default)s).")
prsr.add_argument('-s', '--schema', action='store', dest='schema',
metavar='SCHEMA',
help='Set the schema name in the PostgreSQL database.')
prsr.add_argument('-U', '--username', action='store', dest='username',
metavar='USERNAME', default='desidev_admin',
help="If specified, connect to a PostgreSQL database with USERNAME.")
prsr.add_argument('-v', '--verbose', action='store_true', dest='verbose',
help='Print extra information.')
prsr.add_argument('datapath', metavar='DIR', help='Load the data in DIR.')
options = prsr.parse_args()
#
# Logging
#
if options.verbose:
log = get_logger(DEBUG, timestamp=True)
else:
log = get_logger(INFO, timestamp=True)
from desiutil.log import get_logger
log = get_logger()
#
# Schema.
# Schema creation
#
if options.schema:
schemaname = options.schema
Expand Down Expand Up @@ -635,6 +608,85 @@ def main():
tab.schema = schemaname
Base.metadata.create_all(engine)
log.info("Finished creating tables.")
return postgresql


def get_options(*args):
"""Parse command-line options.
Parameters
----------
args : iterable
If arguments are passed, use them instead of ``sys.argv``.
Returns
-------
:class:`argparse.Namespace`
The parsed options.
"""
from sys import argv
from os.path import basename
from argparse import ArgumentParser
prsr = ArgumentParser(description=("Load a data challenge simulation into a " +
"database."),
prog=basename(argv[0]))
prsr.add_argument('-c', '--clobber', action='store_true', dest='clobber',
help='Delete any existing file(s) before loading.')
prsr.add_argument('-f', '--filename', action='store', dest='dbfile',
default='quicksurvey.db', metavar='FILE',
help="Store data in FILE.")
prsr.add_argument('-H', '--hostname', action='store', dest='hostname',
metavar='HOSTNAME',
help='If specified, connect to a PostgreSQL database on HOSTNAME.')
prsr.add_argument('-m', '--max-rows', action='store', dest='maxrows',
type=int, default=0, metavar='M',
help="Load up to M rows in the tables (default is all rows).")
prsr.add_argument('-r', '--rows', action='store', dest='chunksize',
type=int, default=50000, metavar='N',
help="Load N rows at a time (default %(default)s).")
prsr.add_argument('-s', '--schema', action='store', dest='schema',
metavar='SCHEMA',
help='Set the schema name in the PostgreSQL database.')
prsr.add_argument('-U', '--username', action='store', dest='username',
metavar='USERNAME', default='desidev_admin',
help="If specified, connect to a PostgreSQL database with USERNAME.")
prsr.add_argument('-v', '--verbose', action='store_true', dest='verbose',
help='Print extra information.')
prsr.add_argument('datapath', metavar='DIR', help='Load the data in DIR.')
if len(args) > 0:
options = prsr.parse_args(args)
else:
options = prsr.parse_args()
return options


def main():
"""Entry point for command-line script.
Returns
-------
:class:`int`
An integer suitable for passing to :func:`sys.exit`.
"""
from os.path import join
# from pkg_resources import resource_filename
from pytz import utc
from desiutil.log import get_logger, DEBUG, INFO
#
# command-line arguments
#
options = get_options()
#
# Logging
#
if options.verbose:
log = get_logger(DEBUG, timestamp=True)
else:
log = get_logger(INFO, timestamp=True)
#
# Initialize DB
#
postgresql = setup_db(options)
#
# Load configuration
#
Expand Down Expand Up @@ -665,16 +717,16 @@ def main():
'convert': None,
'q3c': postgresql,
'chunksize': options.chunksize,
'maxrows': options.maxrows},
{'filepath': join(options.datapath, 'twopct.ecsv'),
'tcls': ObsList,
'hdu': 1,
'expand': {'PASS': 'passnum'},
# 'convert': {'dateobs': lambda x: convert_dateobs(x, tzinfo=utc)},
'convert': None,
'q3c': postgresql,
'chunksize': options.chunksize,
'maxrows': options.maxrows},]
# {'filepath': join(options.datapath, 'survey', 'exposures.fits'),
# 'tcls': ObsList,
# 'hdu': 1,
# 'expand': {'PASS': 'passnum'},
# # 'convert': {'dateobs': lambda x: convert_dateobs(x, tzinfo=utc)},
# 'convert': None,
# 'q3c': postgresql,
# 'chunksize': options.chunksize,
# 'maxrows': options.maxrows},]
# {'filepath': join(options.datapath, 'output', 'dark', '4', 'zcat.fits'),
# 'tcls': ZCat,
# 'hdu': 1,
Expand Down Expand Up @@ -704,7 +756,7 @@ def main():
q = dbSession.query(ZCat).first()
if q is None:
log.info("Loading ZCat from %s.", options.datapath)
load_zcat(options.datapath)
load_zcat(options.datapath, run1d='mini')
log.info("Finished loading ZCat.")
else:
log.info("ZCat table already loaded.")
Expand Down
Loading

0 comments on commit c4032a1

Please sign in to comment.