Skip to content

Commit

Permalink
Merge pull request #3017 from weaverba137/sdss-dr18-urls
Browse files Browse the repository at this point in the history
Update download URLs for SDSS DR18
  • Loading branch information
bsipocz committed Jun 28, 2024
2 parents 5d42c1f + b014ad0 commit 95c9216
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 45 deletions.
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ vizier
- Change the type of raised error when the catalog is not found in ``Vizier.get_catalog_metadata``
from ``IndexError`` to ``EmptyResponseError`` [#2980]

sdss
^^^^

- Support new SDSS-V DR18 access URLs. [#3017]

simbad
^^^^^^

Expand Down
64 changes: 46 additions & 18 deletions astroquery/sdss/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
Access Sloan Digital Sky Survey database online.
"""
import re
import warnings
import numpy as np
import sys
Expand All @@ -11,12 +12,12 @@
from astropy.table import Table, Column
from astropy.utils.exceptions import AstropyWarning

from ..query import BaseQuery
from . import conf
from ..utils import commons, async_to_sync, prepend_docstr_nosections
from ..exceptions import RemoteServiceError, NoResultsWarning
from .field_names import (photoobj_defs, specobj_defs,
crossid_defs, get_field_info)
from astroquery.query import BaseQuery
from astroquery.sdss import conf
from astroquery.utils import commons, async_to_sync, prepend_docstr_nosections
from astroquery.exceptions import RemoteServiceError, NoResultsWarning
from astroquery.sdss.field_names import (photoobj_defs, specobj_defs,
crossid_defs, get_field_info)

__all__ = ['SDSS', 'SDSSClass']
__doctest_skip__ = ['SDSSClass.*']
Expand All @@ -28,6 +29,7 @@
@async_to_sync
class SDSSClass(BaseQuery):
TIMEOUT = conf.timeout
PARSE_BOSS_RUN2D = re.compile(r'v(?P<major>[0-9]+)_(?P<minor>[0-9]+)_(?P<bugfix>[0-9]+)')
MAX_CROSSID_RADIUS = 3.0 * u.arcmin
QUERY_URL_SUFFIX_DR_OLD = '/dr{dr}/en/tools/search/x_sql.asp'
QUERY_URL_SUFFIX_DR_10 = '/dr{dr}/en/tools/search/x_sql.aspx'
Expand All @@ -39,8 +41,9 @@ class SDSSClass(BaseQuery):
'{rerun}/{run}/{camcol}/'
'frame-{band}-{run:06d}-{camcol}-'
'{field:04d}.fits.bz2')
SPECTRA_URL_SUFFIX = ('{base}/dr{dr}/sdss/spectro/redux/'
'{run2d}/spectra/{plate:0>4d}/'
# Note: {plate:0>4d} does allow 5-digit plates, while still zero-padding 3-digit plates.
SPECTRA_URL_SUFFIX = ('{base}/dr{dr}/{redux_path}/'
'{run2d}/{spectra_path}/{plate:0>4d}/'
'spec-{plate:0>4d}-{mjd}-{fiber:04d}.fits')

TEMPLATES_URL = 'http://classic.sdss.org/dr7/algorithms/spectemplates/spDR2'
Expand Down Expand Up @@ -737,12 +740,39 @@ def get_spectra_async(self, *, coordinates=None, radius=2. * u.arcsec,
run2d = str(row['run2d'])
else:
run2d = row['run2d']
format_args = dict()
format_args['base'] = conf.sas_baseurl
format_args['dr'] = data_release
format_args['redux_path'] = 'sdss/spectro/redux'
format_args['run2d'] = run2d
format_args['spectra_path'] = 'spectra'
format_args['mjd'] = row['mjd']
try:
format_args['plate'] = row['plate']
format_args['fiber'] = row['fiberID']
except KeyError:
format_args['fieldid'] = row['fieldID']
format_args['catalogid'] = row['catalogID']
if data_release > 15 and run2d not in ('26', '103', '104'):
linkstr = linkstr.replace('/spectra/', '/spectra/full/')
link = linkstr.format(
base=conf.sas_baseurl, dr=data_release,
run2d=run2d, plate=row['plate'],
fiber=row['fiberID'], mjd=row['mjd'])
#
# Still want this applied to data_release > 17.
#
format_args['spectra_path'] = 'spectra/full'
if data_release > 17:
#
# This change will fix everything except run2d==v6_0_4 in DR18,
# which is handled by the if major > 5 block below.
#
format_args['redux_path'] = 'spectro/sdss/redux'
match_run2d = self.PARSE_BOSS_RUN2D.match(run2d)
if match_run2d is not None:
major = int(match_run2d.group('major'))
if major > 5:
linkstr = linkstr.replace('/{plate:0>4d}/', '/{fieldid:0>4d}p/{mjd:5d}/')
linkstr = linkstr.replace('spec-{plate:0>4d}-{mjd}-{fiber:04d}.fits',
'spec-{fieldid:0>4d}-{mjd:5d}-{catalogid:0>11d}.fits')

link = linkstr.format(**format_args)
results.append(commons.FileContainer(link,
encoding='binary',
remote_timeout=timeout,
Expand Down Expand Up @@ -903,6 +933,8 @@ def get_images_async(self, coordinates=None, radius=2. * u.arcsec,
instrument = 'boss'
if data_release > 12:
instrument = 'eboss'
if data_release > 17:
instrument = 'prior-surveys/sdss4-dr17-eboss'
link = linkstr.format(base=conf.sas_baseurl, run=row['run'],
dr=data_release, instrument=instrument,
rerun=row['rerun'], camcol=row['camcol'],
Expand Down Expand Up @@ -1267,7 +1299,7 @@ def _get_crossid_url(self, data_release):
self._last_url = url
return url

def _rectangle_sql(self, ra, dec, width, height=None, cosdec=False):
def _rectangle_sql(self, ra, dec, width, height=None):
"""
Generate SQL for a rectangular query centered on ``ra``, ``dec``.
Expand All @@ -1284,10 +1316,6 @@ def _rectangle_sql(self, ra, dec, width, height=None, cosdec=False):
Width of rectangle in degrees.
height : float, optional
Height of rectangle in degrees. If not specified, ``width`` is used.
cosdec : bool, optional
If ``True`` apply ``cos(dec)`` correction to the rectangle.
Otherwise, rectangles become increasingly triangle-like
near the poles.
Returns
-------
Expand Down
4 changes: 2 additions & 2 deletions astroquery/sdss/field_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from astropy.utils.data import get_pkg_data_contents
from astropy.utils.exceptions import AstropyUserWarning

from . import conf
from ..utils.mocks import MockResponse
from astroquery.sdss import conf
from astroquery.utils.mocks import MockResponse

__all__ = ['get_field_info', 'photoobj_defs', 'specobj_defs', 'crossid_defs']

Expand Down
100 changes: 96 additions & 4 deletions astroquery/sdss/tests/test_sdss.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
from astropy.utils.exceptions import AstropyWarning
import pytest

from ... import sdss
from astroquery.sdss import conf
from astroquery import sdss
from astroquery.exceptions import TimeoutError
from astroquery.utils import commons
from astroquery.utils.mocks import MockResponse
from ...exceptions import TimeoutError
from ...utils import commons

# actual spectra/data are a bit heavy to include in astroquery, so we don't try
# to deal with them. Would be nice to find a few very small examples
Expand Down Expand Up @@ -114,7 +115,7 @@ def data_path(filename):
coords_column = Column(coords_list, name='coordinates')

# List of all data releases.
dr_list = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)
dr_list = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)


# We are not testing queries for DR11 because it is not easily available to
Expand Down Expand Up @@ -145,6 +146,47 @@ def url_tester_crossid(data_release):
assert sdss.SDSS._last_url == baseurl


def url_tester_images(data_release, rerun, run, camcol, band, field):
instrument = 'boss'
if data_release > 12:
instrument = 'eboss'
if data_release > 17:
instrument = 'prior-surveys/sdss4-dr17-eboss'
url = sdss.SDSS.IMAGING_URL_SUFFIX.format(base=conf.sas_baseurl, run=run,
dr=data_release, instrument=instrument,
rerun=rerun, camcol=camcol,
field=field, band=band)
return url


def url_tester_spectra(data_release, run2d, plate, mjd, fiber):
linkstr = sdss.SDSS.SPECTRA_URL_SUFFIX
eFEDS = False
redux_path = 'sdss/spectro/redux'
spectra_path = 'spectra'
if data_release > 15 and run2d not in ('26', '103', '104'):
spectra_path = 'spectra/full'
if data_release > 17:
redux_path = 'spectro/sdss/redux'
match_run2d = sdss.SDSS.PARSE_BOSS_RUN2D.match(run2d)
if match_run2d is not None:
major = int(match_run2d.group('major'))
if major > 5:
eFEDS = True
linkstr = linkstr.replace('/{plate:0>4d}/', '/{fieldid:0>4d}p/{mjd:5d}/')
linkstr = linkstr.replace('spec-{plate:0>4d}-{mjd}-{fiber:04d}.fits',
'spec-{fieldid:0>4d}-{mjd:5d}-{catalogid:0>11d}.fits')
if eFEDS:
url = linkstr.format(base=conf.sas_baseurl, dr=data_release,
redux_path=redux_path, run2d=run2d, spectra_path=spectra_path,
fieldid=plate, catalogid=fiber, mjd=mjd)
else:
url = linkstr.format(base=conf.sas_baseurl, dr=data_release,
redux_path=redux_path, run2d=run2d, spectra_path=spectra_path,
plate=plate, fiber=fiber, mjd=mjd)
return url


def compare_xid_data(xid, data):
for col in xid.colnames:
if xid[col].dtype.type is np.str_:
Expand Down Expand Up @@ -660,6 +702,56 @@ def test_get_images_coordinates_payload(patch_request, dr):
assert query_payload['photoScope'] == 'nearPrim'


@pytest.mark.parametrize("dr", dr_list)
def test_get_images_async_url(patch_request, patch_get_readable_fileobj, dr):
matches = Table()
matches['run'] = [1, 12, 123, 1234]
matches['camcol'] = [1, 2, 4, 6]
matches['field'] = [10, 100, 1000, 10000]
matches['rerun'] = [301, 301, 301, 301]
download_urls = sdss.SDSS.get_images_async(matches=matches, band='ugriz',
data_release=dr)
for i, row in enumerate(matches):
for j, band in enumerate('ugriz'):
k = 5*i + j
assert download_urls[k]._target == url_tester_images(dr,
row['rerun'],
row['run'],
row['camcol'],
band,
row['field'])


@pytest.mark.parametrize("dr", dr_list)
def test_get_spectra_async_url(patch_request, patch_get_readable_fileobj, dr):
matches = Table()
matches['plate'] = [12, 123, 1234, 1234, 5432, 12345]
matches['fiberID'] = [10, 100, 621, 123, 456, 986]
matches['mjd'] = [54321, 54321, 54321, 65432, 76543, 87654]
matches['run2d'] = ['26', '26', '26', 'v5_12_2', 'v5_12_2', 'v5_12_2']
download_urls = sdss.SDSS.get_spectra_async(matches=matches,
data_release=dr)
for i, row in enumerate(matches):
assert matches[i]['plate'] == row['plate']
assert download_urls[i]._target == url_tester_spectra(dr, row['run2d'],
row['plate'],
row['mjd'],
row['fiberID'])
if dr > 17:
matches = Table()
matches['fieldID'] = [15170, 15265]
matches['mjd'] = [59292, 59316]
matches['catalogID'] = [4570401475, 4592713531]
matches['run2d'] = ['v6_0_4', 'v6_0_4']
download_urls = sdss.SDSS.get_spectra_async(matches=matches,
data_release=dr)
for i, row in enumerate(matches):
assert download_urls[i]._target == url_tester_spectra(dr, row['run2d'],
row['fieldID'],
row['mjd'],
row['catalogID'])


@pytest.mark.parametrize("dr", dr_list)
def test_spectra_plate_mjd_payload(patch_request, dr):
expect = ("SELECT DISTINCT "
Expand Down
64 changes: 43 additions & 21 deletions astroquery/sdss/tests/test_sdss_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@

from urllib.error import URLError

from ... import sdss
from ...exceptions import TimeoutError
# Timeout is the superclass of both ReadTimeout and ConnectTimeout
from requests.exceptions import Timeout

from astroquery import sdss
from astroquery.exceptions import TimeoutError

# DR11 is a quasi-internal data release that does not have SkyServer support.
dr_list = (8, 9, 10, 12, 13, 14, 15, 16, 17)
dr_list = (8, 9, 10, 12, 13, 14, 15, 16, 17, 18)
dr_warn_list = (8, 9)


Expand Down Expand Up @@ -57,19 +60,38 @@ def test_sdss_spectrum(self, dr):
xid = sdss.SDSS.query_region(self.coords, width=2.0 * u.arcsec, spectro=True, data_release=dr)

assert isinstance(xid, Table)
sdss.SDSS.get_spectra(matches=xid, data_release=dr)
downloaded_files = sdss.SDSS.get_spectra(matches=xid, data_release=dr)
assert len(downloaded_files) == len(xid)

def test_sdss_spectrum_plate_mjd_fiber(self):
"""These plates are only available in recent data releases.
"""These plates are only available in relatively recent data releases.
"""
downloaded_files = sdss.SDSS.get_spectra(plate=9403, mjd=58018, fiberID=485, data_release=16)
assert len(downloaded_files) == 1
downloaded_files = sdss.SDSS.get_spectra(plate=10909, mjd=58280, fiberID=485, data_release=16)
assert len(downloaded_files) == 1

def test_sdss_spectrum_field_mjd_catalog(self):
"""These eFEDS spectra are only available in data releases >= 18.
https://data.sdss.org/sas/dr18/spectro/sdss/redux/v6_0_4/spectra/full/15170p/59292/spec-15170-59292-04570401475.fits
https://data.sdss.org/sas/dr18/spectro/sdss/redux/v6_0_4/spectra/full/15265p/59316/spec-15265-59316-04592713531.fits
"""
sdss.SDSS.get_spectra(plate=9403, mjd=58018, fiberID=485, data_release=16)
sdss.SDSS.get_spectra(plate=10909, mjd=58280, fiberID=485, data_release=16)
matches = Table()
matches['fieldID'] = [15170, 15265]
matches['mjd'] = [59292, 59316]
matches['catalogID'] = [4570401475, 4592713531]
matches['run2d'] = ['v6_0_4', 'v6_0_4']
downloaded_files = sdss.SDSS.get_spectra(matches=matches, data_release=18, cache=False)
assert len(downloaded_files) == 2

def test_sdss_spectrum_mjd(self):
sdss.SDSS.get_spectra(plate=2345, fiberID=572)
downloaded_files = sdss.SDSS.get_spectra(plate=2345, fiberID=572)
assert len(downloaded_files) == 1

def test_sdss_spectrum_coords(self):
sdss.SDSS.get_spectra(coordinates=self.coords)
downloaded_files = sdss.SDSS.get_spectra(coordinates=self.coords)
assert len(downloaded_files) == 1

def test_sdss_sql(self):
query = """
Expand All @@ -88,16 +110,20 @@ class = 'galaxy'
def test_sdss_image(self):
xid = sdss.SDSS.query_region(self.coords, width=2.0 * u.arcsec)
assert isinstance(xid, Table)
sdss.SDSS.get_images(matches=xid)
downloaded_files = sdss.SDSS.get_images(matches=xid)
assert len(downloaded_files) == len(xid)

def test_sdss_template(self):
sdss.SDSS.get_spectral_template('qso')
downloaded_files = sdss.SDSS.get_spectral_template('qso')
assert len(downloaded_files) == 1

def test_sdss_image_run(self):
sdss.SDSS.get_images(run=1904, camcol=3, field=164)
downloaded_files = sdss.SDSS.get_images(run=1904, camcol=3, field=164)
assert len(downloaded_files) == 1

def test_sdss_image_coord(self):
sdss.SDSS.get_images(coordinates=self.coords)
downloaded_files = sdss.SDSS.get_images(coordinates=self.coords)
assert len(downloaded_files) == 1

def test_sdss_specobj(self):
colnames = ['ra', 'dec', 'objid', 'run', 'rerun', 'camcol', 'field',
Expand Down Expand Up @@ -157,17 +183,13 @@ def test_sdss_photoobj(self):
else:
assert xid[i][c] == row[c]

@pytest.mark.xfail(reason=("Timeout isn't raised since switching to "
"self._request, fix it before merging #586"))
def test_query_timeout(self):
with pytest.raises(TimeoutError):
sdss.SDSS.query_region(self.coords, timeout=self.mintimeout)
with pytest.raises(Timeout):
sdss.SDSS.query_region(self.coords, width=2.0 * u.arcsec, cache=False, timeout=self.mintimeout)

@pytest.mark.xfail(reason=("Timeout isn't raised since switching to "
"self._request, fix it before merging #586"))
def test_spectra_timeout(self):
with pytest.raises(TimeoutError):
sdss.SDSS.get_spectra(coordinates=self.coords, timeout=self.mintimeout)
with pytest.raises(Timeout):
sdss.SDSS.get_spectra(coordinates=self.coords, cache=False, timeout=self.mintimeout)

def test_query_non_default_field(self):
# A regression test for #469
Expand Down

0 comments on commit 95c9216

Please sign in to comment.