Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update download URLs for SDSS DR18 #3017

Merged
merged 1 commit into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ vizier
- Change the type of raised error when the catalog is not found in ``Vizier.get_catalog_metadata``
from ``IndexError`` to ``EmptyResponseError`` [#2980]

sdss
^^^^

- Support new SDSS-V DR18 access URLs. [#3017]
weaverba137 marked this conversation as resolved.
Show resolved Hide resolved

simbad
^^^^^^

Expand Down
64 changes: 46 additions & 18 deletions astroquery/sdss/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
Access Sloan Digital Sky Survey database online.
"""
import re
import warnings
import numpy as np
import sys
Expand All @@ -11,12 +12,12 @@
from astropy.table import Table, Column
from astropy.utils.exceptions import AstropyWarning

from ..query import BaseQuery
from . import conf
from ..utils import commons, async_to_sync, prepend_docstr_nosections
from ..exceptions import RemoteServiceError, NoResultsWarning
from .field_names import (photoobj_defs, specobj_defs,
crossid_defs, get_field_info)
from astroquery.query import BaseQuery
from astroquery.sdss import conf
from astroquery.utils import commons, async_to_sync, prepend_docstr_nosections
from astroquery.exceptions import RemoteServiceError, NoResultsWarning
from astroquery.sdss.field_names import (photoobj_defs, specobj_defs,
crossid_defs, get_field_info)

__all__ = ['SDSS', 'SDSSClass']
__doctest_skip__ = ['SDSSClass.*']
Expand All @@ -28,6 +29,7 @@
@async_to_sync
class SDSSClass(BaseQuery):
TIMEOUT = conf.timeout
PARSE_BOSS_RUN2D = re.compile(r'v(?P<major>[0-9]+)_(?P<minor>[0-9]+)_(?P<bugfix>[0-9]+)')
MAX_CROSSID_RADIUS = 3.0 * u.arcmin
QUERY_URL_SUFFIX_DR_OLD = '/dr{dr}/en/tools/search/x_sql.asp'
QUERY_URL_SUFFIX_DR_10 = '/dr{dr}/en/tools/search/x_sql.aspx'
Expand All @@ -39,8 +41,9 @@ class SDSSClass(BaseQuery):
'{rerun}/{run}/{camcol}/'
'frame-{band}-{run:06d}-{camcol}-'
'{field:04d}.fits.bz2')
SPECTRA_URL_SUFFIX = ('{base}/dr{dr}/sdss/spectro/redux/'
'{run2d}/spectra/{plate:0>4d}/'
# Note: {plate:0>4d} does allow 5-digit plates, while still zero-padding 3-digit plates.
SPECTRA_URL_SUFFIX = ('{base}/dr{dr}/{redux_path}/'
'{run2d}/{spectra_path}/{plate:0>4d}/'
'spec-{plate:0>4d}-{mjd}-{fiber:04d}.fits')

TEMPLATES_URL = 'http://classic.sdss.org/dr7/algorithms/spectemplates/spDR2'
Expand Down Expand Up @@ -737,12 +740,39 @@ def get_spectra_async(self, *, coordinates=None, radius=2. * u.arcsec,
run2d = str(row['run2d'])
else:
run2d = row['run2d']
format_args = dict()
format_args['base'] = conf.sas_baseurl
format_args['dr'] = data_release
format_args['redux_path'] = 'sdss/spectro/redux'
format_args['run2d'] = run2d
format_args['spectra_path'] = 'spectra'
format_args['mjd'] = row['mjd']
try:
format_args['plate'] = row['plate']
format_args['fiber'] = row['fiberID']
except KeyError:
format_args['fieldid'] = row['fieldID']
format_args['catalogid'] = row['catalogID']
if data_release > 15 and run2d not in ('26', '103', '104'):
linkstr = linkstr.replace('/spectra/', '/spectra/full/')
link = linkstr.format(
base=conf.sas_baseurl, dr=data_release,
run2d=run2d, plate=row['plate'],
fiber=row['fiberID'], mjd=row['mjd'])
#
# Still want this applied to data_release > 17.
#
format_args['spectra_path'] = 'spectra/full'
if data_release > 17:
#
# This change will fix everything except run2d==v6_0_4 in DR18,
# which is handled by the if major > 5 block below.
#
format_args['redux_path'] = 'spectro/sdss/redux'
match_run2d = self.PARSE_BOSS_RUN2D.match(run2d)
if match_run2d is not None:
major = int(match_run2d.group('major'))
if major > 5:
linkstr = linkstr.replace('/{plate:0>4d}/', '/{fieldid:0>4d}p/{mjd:5d}/')
linkstr = linkstr.replace('spec-{plate:0>4d}-{mjd}-{fiber:04d}.fits',
'spec-{fieldid:0>4d}-{mjd:5d}-{catalogid:0>11d}.fits')

link = linkstr.format(**format_args)
results.append(commons.FileContainer(link,
encoding='binary',
remote_timeout=timeout,
Expand Down Expand Up @@ -903,6 +933,8 @@ def get_images_async(self, coordinates=None, radius=2. * u.arcsec,
instrument = 'boss'
if data_release > 12:
instrument = 'eboss'
if data_release > 17:
instrument = 'prior-surveys/sdss4-dr17-eboss'
link = linkstr.format(base=conf.sas_baseurl, run=row['run'],
dr=data_release, instrument=instrument,
rerun=row['rerun'], camcol=row['camcol'],
Expand Down Expand Up @@ -1267,7 +1299,7 @@ def _get_crossid_url(self, data_release):
self._last_url = url
return url

def _rectangle_sql(self, ra, dec, width, height=None, cosdec=False):
def _rectangle_sql(self, ra, dec, width, height=None):
"""
Generate SQL for a rectangular query centered on ``ra``, ``dec``.

Expand All @@ -1284,10 +1316,6 @@ def _rectangle_sql(self, ra, dec, width, height=None, cosdec=False):
Width of rectangle in degrees.
height : float, optional
Height of rectangle in degrees. If not specified, ``width`` is used.
cosdec : bool, optional
If ``True`` apply ``cos(dec)`` correction to the rectangle.
Otherwise, rectangles become increasingly triangle-like
near the poles.

Returns
-------
Expand Down
4 changes: 2 additions & 2 deletions astroquery/sdss/field_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from astropy.utils.data import get_pkg_data_contents
from astropy.utils.exceptions import AstropyUserWarning

from . import conf
from ..utils.mocks import MockResponse
from astroquery.sdss import conf
from astroquery.utils.mocks import MockResponse

__all__ = ['get_field_info', 'photoobj_defs', 'specobj_defs', 'crossid_defs']

Expand Down
100 changes: 96 additions & 4 deletions astroquery/sdss/tests/test_sdss.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
from astropy.utils.exceptions import AstropyWarning
import pytest

from ... import sdss
from astroquery.sdss import conf
from astroquery import sdss
from astroquery.exceptions import TimeoutError
from astroquery.utils import commons
from astroquery.utils.mocks import MockResponse
weaverba137 marked this conversation as resolved.
Show resolved Hide resolved
from ...exceptions import TimeoutError
from ...utils import commons

# actual spectra/data are a bit heavy to include in astroquery, so we don't try
# to deal with them. Would be nice to find a few very small examples
Expand Down Expand Up @@ -114,7 +115,7 @@ def data_path(filename):
coords_column = Column(coords_list, name='coordinates')

# List of all data releases.
dr_list = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)
dr_list = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)


# We are not testing queries for DR11 because it is not easily available to
Expand Down Expand Up @@ -145,6 +146,47 @@ def url_tester_crossid(data_release):
assert sdss.SDSS._last_url == baseurl


def url_tester_images(data_release, rerun, run, camcol, band, field):
instrument = 'boss'
if data_release > 12:
instrument = 'eboss'
if data_release > 17:
instrument = 'prior-surveys/sdss4-dr17-eboss'
url = sdss.SDSS.IMAGING_URL_SUFFIX.format(base=conf.sas_baseurl, run=run,
dr=data_release, instrument=instrument,
rerun=rerun, camcol=camcol,
field=field, band=band)
return url


def url_tester_spectra(data_release, run2d, plate, mjd, fiber):
linkstr = sdss.SDSS.SPECTRA_URL_SUFFIX
eFEDS = False
redux_path = 'sdss/spectro/redux'
spectra_path = 'spectra'
if data_release > 15 and run2d not in ('26', '103', '104'):
spectra_path = 'spectra/full'
if data_release > 17:
redux_path = 'spectro/sdss/redux'
match_run2d = sdss.SDSS.PARSE_BOSS_RUN2D.match(run2d)
if match_run2d is not None:
major = int(match_run2d.group('major'))
if major > 5:
eFEDS = True
linkstr = linkstr.replace('/{plate:0>4d}/', '/{fieldid:0>4d}p/{mjd:5d}/')
linkstr = linkstr.replace('spec-{plate:0>4d}-{mjd}-{fiber:04d}.fits',
'spec-{fieldid:0>4d}-{mjd:5d}-{catalogid:0>11d}.fits')
if eFEDS:
url = linkstr.format(base=conf.sas_baseurl, dr=data_release,
redux_path=redux_path, run2d=run2d, spectra_path=spectra_path,
fieldid=plate, catalogid=fiber, mjd=mjd)
else:
url = linkstr.format(base=conf.sas_baseurl, dr=data_release,
redux_path=redux_path, run2d=run2d, spectra_path=spectra_path,
plate=plate, fiber=fiber, mjd=mjd)
return url


def compare_xid_data(xid, data):
for col in xid.colnames:
if xid[col].dtype.type is np.str_:
Expand Down Expand Up @@ -660,6 +702,56 @@ def test_get_images_coordinates_payload(patch_request, dr):
assert query_payload['photoScope'] == 'nearPrim'


@pytest.mark.parametrize("dr", dr_list)
def test_get_images_async_url(patch_request, patch_get_readable_fileobj, dr):
matches = Table()
matches['run'] = [1, 12, 123, 1234]
matches['camcol'] = [1, 2, 4, 6]
matches['field'] = [10, 100, 1000, 10000]
matches['rerun'] = [301, 301, 301, 301]
download_urls = sdss.SDSS.get_images_async(matches=matches, band='ugriz',
data_release=dr)
for i, row in enumerate(matches):
for j, band in enumerate('ugriz'):
k = 5*i + j
assert download_urls[k]._target == url_tester_images(dr,
row['rerun'],
row['run'],
row['camcol'],
band,
row['field'])


@pytest.mark.parametrize("dr", dr_list)
def test_get_spectra_async_url(patch_request, patch_get_readable_fileobj, dr):
matches = Table()
matches['plate'] = [12, 123, 1234, 1234, 5432, 12345]
matches['fiberID'] = [10, 100, 621, 123, 456, 986]
matches['mjd'] = [54321, 54321, 54321, 65432, 76543, 87654]
matches['run2d'] = ['26', '26', '26', 'v5_12_2', 'v5_12_2', 'v5_12_2']
download_urls = sdss.SDSS.get_spectra_async(matches=matches,
data_release=dr)
for i, row in enumerate(matches):
assert matches[i]['plate'] == row['plate']
assert download_urls[i]._target == url_tester_spectra(dr, row['run2d'],
row['plate'],
row['mjd'],
row['fiberID'])
if dr > 17:
matches = Table()
matches['fieldID'] = [15170, 15265]
matches['mjd'] = [59292, 59316]
matches['catalogID'] = [4570401475, 4592713531]
matches['run2d'] = ['v6_0_4', 'v6_0_4']
download_urls = sdss.SDSS.get_spectra_async(matches=matches,
data_release=dr)
for i, row in enumerate(matches):
assert download_urls[i]._target == url_tester_spectra(dr, row['run2d'],
row['fieldID'],
row['mjd'],
row['catalogID'])


@pytest.mark.parametrize("dr", dr_list)
def test_spectra_plate_mjd_payload(patch_request, dr):
expect = ("SELECT DISTINCT "
Expand Down
64 changes: 43 additions & 21 deletions astroquery/sdss/tests/test_sdss_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@

from urllib.error import URLError

from ... import sdss
from ...exceptions import TimeoutError
# Timeout is the superclass of both ReadTimeout and ConnectTimeout
from requests.exceptions import Timeout

from astroquery import sdss
from astroquery.exceptions import TimeoutError

# DR11 is a quasi-internal data release that does not have SkyServer support.
dr_list = (8, 9, 10, 12, 13, 14, 15, 16, 17)
dr_list = (8, 9, 10, 12, 13, 14, 15, 16, 17, 18)
dr_warn_list = (8, 9)


Expand Down Expand Up @@ -57,19 +60,38 @@ def test_sdss_spectrum(self, dr):
xid = sdss.SDSS.query_region(self.coords, width=2.0 * u.arcsec, spectro=True, data_release=dr)

assert isinstance(xid, Table)
sdss.SDSS.get_spectra(matches=xid, data_release=dr)
downloaded_files = sdss.SDSS.get_spectra(matches=xid, data_release=dr)
assert len(downloaded_files) == len(xid)

def test_sdss_spectrum_plate_mjd_fiber(self):
"""These plates are only available in recent data releases.
"""These plates are only available in relatively recent data releases.
"""
downloaded_files = sdss.SDSS.get_spectra(plate=9403, mjd=58018, fiberID=485, data_release=16)
assert len(downloaded_files) == 1
downloaded_files = sdss.SDSS.get_spectra(plate=10909, mjd=58280, fiberID=485, data_release=16)
assert len(downloaded_files) == 1

def test_sdss_spectrum_field_mjd_catalog(self):
"""These eFEDS spectra are only available in data releases >= 18.

https://data.sdss.org/sas/dr18/spectro/sdss/redux/v6_0_4/spectra/full/15170p/59292/spec-15170-59292-04570401475.fits
https://data.sdss.org/sas/dr18/spectro/sdss/redux/v6_0_4/spectra/full/15265p/59316/spec-15265-59316-04592713531.fits
"""
sdss.SDSS.get_spectra(plate=9403, mjd=58018, fiberID=485, data_release=16)
sdss.SDSS.get_spectra(plate=10909, mjd=58280, fiberID=485, data_release=16)
matches = Table()
matches['fieldID'] = [15170, 15265]
matches['mjd'] = [59292, 59316]
matches['catalogID'] = [4570401475, 4592713531]
matches['run2d'] = ['v6_0_4', 'v6_0_4']
downloaded_files = sdss.SDSS.get_spectra(matches=matches, data_release=18, cache=False)
assert len(downloaded_files) == 2

def test_sdss_spectrum_mjd(self):
sdss.SDSS.get_spectra(plate=2345, fiberID=572)
downloaded_files = sdss.SDSS.get_spectra(plate=2345, fiberID=572)
assert len(downloaded_files) == 1

def test_sdss_spectrum_coords(self):
sdss.SDSS.get_spectra(coordinates=self.coords)
downloaded_files = sdss.SDSS.get_spectra(coordinates=self.coords)
assert len(downloaded_files) == 1

def test_sdss_sql(self):
query = """
Expand All @@ -88,16 +110,20 @@ class = 'galaxy'
def test_sdss_image(self):
xid = sdss.SDSS.query_region(self.coords, width=2.0 * u.arcsec)
assert isinstance(xid, Table)
sdss.SDSS.get_images(matches=xid)
downloaded_files = sdss.SDSS.get_images(matches=xid)
assert len(downloaded_files) == len(xid)

def test_sdss_template(self):
sdss.SDSS.get_spectral_template('qso')
downloaded_files = sdss.SDSS.get_spectral_template('qso')
assert len(downloaded_files) == 1

def test_sdss_image_run(self):
sdss.SDSS.get_images(run=1904, camcol=3, field=164)
downloaded_files = sdss.SDSS.get_images(run=1904, camcol=3, field=164)
assert len(downloaded_files) == 1

def test_sdss_image_coord(self):
sdss.SDSS.get_images(coordinates=self.coords)
downloaded_files = sdss.SDSS.get_images(coordinates=self.coords)
assert len(downloaded_files) == 1

def test_sdss_specobj(self):
colnames = ['ra', 'dec', 'objid', 'run', 'rerun', 'camcol', 'field',
Expand Down Expand Up @@ -157,17 +183,13 @@ def test_sdss_photoobj(self):
else:
assert xid[i][c] == row[c]

@pytest.mark.xfail(reason=("Timeout isn't raised since switching to "
"self._request, fix it before merging #586"))
def test_query_timeout(self):
with pytest.raises(TimeoutError):
sdss.SDSS.query_region(self.coords, timeout=self.mintimeout)
with pytest.raises(Timeout):
sdss.SDSS.query_region(self.coords, width=2.0 * u.arcsec, cache=False, timeout=self.mintimeout)

@pytest.mark.xfail(reason=("Timeout isn't raised since switching to "
"self._request, fix it before merging #586"))
def test_spectra_timeout(self):
with pytest.raises(TimeoutError):
sdss.SDSS.get_spectra(coordinates=self.coords, timeout=self.mintimeout)
with pytest.raises(Timeout):
sdss.SDSS.get_spectra(coordinates=self.coords, cache=False, timeout=self.mintimeout)

def test_query_non_default_field(self):
# A regression test for #469
Expand Down
Loading