From b014ad083c159f8c9cd397d7fa12f6bbe569347c Mon Sep 17 00:00:00 2001 From: Benjamin Alan Weaver Date: Wed, 29 May 2024 15:06:13 -0700 Subject: [PATCH] Update module for DR18 --- CHANGES.rst | 5 ++ astroquery/sdss/core.py | 64 ++++++++++---- astroquery/sdss/field_names.py | 4 +- astroquery/sdss/tests/test_sdss.py | 100 +++++++++++++++++++++- astroquery/sdss/tests/test_sdss_remote.py | 64 +++++++++----- 5 files changed, 192 insertions(+), 45 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 13c9e8a3db..3fd0292457 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -42,6 +42,11 @@ vizier - Change the type of raised error when the catalog is not found in ``Vizier.get_catalog_metadata`` from ``IndexError`` to ``EmptyResponseError`` [#2980] +sdss +^^^^ + +- Support new SDSS-V DR18 access URLs. [#3017] + simbad ^^^^^^ diff --git a/astroquery/sdss/core.py b/astroquery/sdss/core.py index fdee3ebad7..56ea50db5a 100644 --- a/astroquery/sdss/core.py +++ b/astroquery/sdss/core.py @@ -2,6 +2,7 @@ """ Access Sloan Digital Sky Survey database online. """ +import re import warnings import numpy as np import sys @@ -11,12 +12,12 @@ from astropy.table import Table, Column from astropy.utils.exceptions import AstropyWarning -from ..query import BaseQuery -from . import conf -from ..utils import commons, async_to_sync, prepend_docstr_nosections -from ..exceptions import RemoteServiceError, NoResultsWarning -from .field_names import (photoobj_defs, specobj_defs, - crossid_defs, get_field_info) +from astroquery.query import BaseQuery +from astroquery.sdss import conf +from astroquery.utils import commons, async_to_sync, prepend_docstr_nosections +from astroquery.exceptions import RemoteServiceError, NoResultsWarning +from astroquery.sdss.field_names import (photoobj_defs, specobj_defs, + crossid_defs, get_field_info) __all__ = ['SDSS', 'SDSSClass'] __doctest_skip__ = ['SDSSClass.*'] @@ -28,6 +29,7 @@ @async_to_sync class SDSSClass(BaseQuery): TIMEOUT = conf.timeout + PARSE_BOSS_RUN2D = re.compile(r'v(?P[0-9]+)_(?P[0-9]+)_(?P[0-9]+)') MAX_CROSSID_RADIUS = 3.0 * u.arcmin QUERY_URL_SUFFIX_DR_OLD = '/dr{dr}/en/tools/search/x_sql.asp' QUERY_URL_SUFFIX_DR_10 = '/dr{dr}/en/tools/search/x_sql.aspx' @@ -39,8 +41,9 @@ class SDSSClass(BaseQuery): '{rerun}/{run}/{camcol}/' 'frame-{band}-{run:06d}-{camcol}-' '{field:04d}.fits.bz2') - SPECTRA_URL_SUFFIX = ('{base}/dr{dr}/sdss/spectro/redux/' - '{run2d}/spectra/{plate:0>4d}/' + # Note: {plate:0>4d} does allow 5-digit plates, while still zero-padding 3-digit plates. + SPECTRA_URL_SUFFIX = ('{base}/dr{dr}/{redux_path}/' + '{run2d}/{spectra_path}/{plate:0>4d}/' 'spec-{plate:0>4d}-{mjd}-{fiber:04d}.fits') TEMPLATES_URL = 'http://classic.sdss.org/dr7/algorithms/spectemplates/spDR2' @@ -737,12 +740,39 @@ def get_spectra_async(self, *, coordinates=None, radius=2. * u.arcsec, run2d = str(row['run2d']) else: run2d = row['run2d'] + format_args = dict() + format_args['base'] = conf.sas_baseurl + format_args['dr'] = data_release + format_args['redux_path'] = 'sdss/spectro/redux' + format_args['run2d'] = run2d + format_args['spectra_path'] = 'spectra' + format_args['mjd'] = row['mjd'] + try: + format_args['plate'] = row['plate'] + format_args['fiber'] = row['fiberID'] + except KeyError: + format_args['fieldid'] = row['fieldID'] + format_args['catalogid'] = row['catalogID'] if data_release > 15 and run2d not in ('26', '103', '104'): - linkstr = linkstr.replace('/spectra/', '/spectra/full/') - link = linkstr.format( - base=conf.sas_baseurl, dr=data_release, - run2d=run2d, plate=row['plate'], - fiber=row['fiberID'], mjd=row['mjd']) + # + # Still want this applied to data_release > 17. + # + format_args['spectra_path'] = 'spectra/full' + if data_release > 17: + # + # This change will fix everything except run2d==v6_0_4 in DR18, + # which is handled by the if major > 5 block below. + # + format_args['redux_path'] = 'spectro/sdss/redux' + match_run2d = self.PARSE_BOSS_RUN2D.match(run2d) + if match_run2d is not None: + major = int(match_run2d.group('major')) + if major > 5: + linkstr = linkstr.replace('/{plate:0>4d}/', '/{fieldid:0>4d}p/{mjd:5d}/') + linkstr = linkstr.replace('spec-{plate:0>4d}-{mjd}-{fiber:04d}.fits', + 'spec-{fieldid:0>4d}-{mjd:5d}-{catalogid:0>11d}.fits') + + link = linkstr.format(**format_args) results.append(commons.FileContainer(link, encoding='binary', remote_timeout=timeout, @@ -903,6 +933,8 @@ def get_images_async(self, coordinates=None, radius=2. * u.arcsec, instrument = 'boss' if data_release > 12: instrument = 'eboss' + if data_release > 17: + instrument = 'prior-surveys/sdss4-dr17-eboss' link = linkstr.format(base=conf.sas_baseurl, run=row['run'], dr=data_release, instrument=instrument, rerun=row['rerun'], camcol=row['camcol'], @@ -1267,7 +1299,7 @@ def _get_crossid_url(self, data_release): self._last_url = url return url - def _rectangle_sql(self, ra, dec, width, height=None, cosdec=False): + def _rectangle_sql(self, ra, dec, width, height=None): """ Generate SQL for a rectangular query centered on ``ra``, ``dec``. @@ -1284,10 +1316,6 @@ def _rectangle_sql(self, ra, dec, width, height=None, cosdec=False): Width of rectangle in degrees. height : float, optional Height of rectangle in degrees. If not specified, ``width`` is used. - cosdec : bool, optional - If ``True`` apply ``cos(dec)`` correction to the rectangle. - Otherwise, rectangles become increasingly triangle-like - near the poles. Returns ------- diff --git a/astroquery/sdss/field_names.py b/astroquery/sdss/field_names.py index 53d417652e..aedec57f59 100644 --- a/astroquery/sdss/field_names.py +++ b/astroquery/sdss/field_names.py @@ -6,8 +6,8 @@ from astropy.utils.data import get_pkg_data_contents from astropy.utils.exceptions import AstropyUserWarning -from . import conf -from ..utils.mocks import MockResponse +from astroquery.sdss import conf +from astroquery.utils.mocks import MockResponse __all__ = ['get_field_info', 'photoobj_defs', 'specobj_defs', 'crossid_defs'] diff --git a/astroquery/sdss/tests/test_sdss.py b/astroquery/sdss/tests/test_sdss.py index 10e90995e1..c766944b34 100644 --- a/astroquery/sdss/tests/test_sdss.py +++ b/astroquery/sdss/tests/test_sdss.py @@ -15,10 +15,11 @@ from astropy.utils.exceptions import AstropyWarning import pytest -from ... import sdss +from astroquery.sdss import conf +from astroquery import sdss +from astroquery.exceptions import TimeoutError +from astroquery.utils import commons from astroquery.utils.mocks import MockResponse -from ...exceptions import TimeoutError -from ...utils import commons # actual spectra/data are a bit heavy to include in astroquery, so we don't try # to deal with them. Would be nice to find a few very small examples @@ -114,7 +115,7 @@ def data_path(filename): coords_column = Column(coords_list, name='coordinates') # List of all data releases. -dr_list = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17) +dr_list = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18) # We are not testing queries for DR11 because it is not easily available to @@ -145,6 +146,47 @@ def url_tester_crossid(data_release): assert sdss.SDSS._last_url == baseurl +def url_tester_images(data_release, rerun, run, camcol, band, field): + instrument = 'boss' + if data_release > 12: + instrument = 'eboss' + if data_release > 17: + instrument = 'prior-surveys/sdss4-dr17-eboss' + url = sdss.SDSS.IMAGING_URL_SUFFIX.format(base=conf.sas_baseurl, run=run, + dr=data_release, instrument=instrument, + rerun=rerun, camcol=camcol, + field=field, band=band) + return url + + +def url_tester_spectra(data_release, run2d, plate, mjd, fiber): + linkstr = sdss.SDSS.SPECTRA_URL_SUFFIX + eFEDS = False + redux_path = 'sdss/spectro/redux' + spectra_path = 'spectra' + if data_release > 15 and run2d not in ('26', '103', '104'): + spectra_path = 'spectra/full' + if data_release > 17: + redux_path = 'spectro/sdss/redux' + match_run2d = sdss.SDSS.PARSE_BOSS_RUN2D.match(run2d) + if match_run2d is not None: + major = int(match_run2d.group('major')) + if major > 5: + eFEDS = True + linkstr = linkstr.replace('/{plate:0>4d}/', '/{fieldid:0>4d}p/{mjd:5d}/') + linkstr = linkstr.replace('spec-{plate:0>4d}-{mjd}-{fiber:04d}.fits', + 'spec-{fieldid:0>4d}-{mjd:5d}-{catalogid:0>11d}.fits') + if eFEDS: + url = linkstr.format(base=conf.sas_baseurl, dr=data_release, + redux_path=redux_path, run2d=run2d, spectra_path=spectra_path, + fieldid=plate, catalogid=fiber, mjd=mjd) + else: + url = linkstr.format(base=conf.sas_baseurl, dr=data_release, + redux_path=redux_path, run2d=run2d, spectra_path=spectra_path, + plate=plate, fiber=fiber, mjd=mjd) + return url + + def compare_xid_data(xid, data): for col in xid.colnames: if xid[col].dtype.type is np.str_: @@ -660,6 +702,56 @@ def test_get_images_coordinates_payload(patch_request, dr): assert query_payload['photoScope'] == 'nearPrim' +@pytest.mark.parametrize("dr", dr_list) +def test_get_images_async_url(patch_request, patch_get_readable_fileobj, dr): + matches = Table() + matches['run'] = [1, 12, 123, 1234] + matches['camcol'] = [1, 2, 4, 6] + matches['field'] = [10, 100, 1000, 10000] + matches['rerun'] = [301, 301, 301, 301] + download_urls = sdss.SDSS.get_images_async(matches=matches, band='ugriz', + data_release=dr) + for i, row in enumerate(matches): + for j, band in enumerate('ugriz'): + k = 5*i + j + assert download_urls[k]._target == url_tester_images(dr, + row['rerun'], + row['run'], + row['camcol'], + band, + row['field']) + + +@pytest.mark.parametrize("dr", dr_list) +def test_get_spectra_async_url(patch_request, patch_get_readable_fileobj, dr): + matches = Table() + matches['plate'] = [12, 123, 1234, 1234, 5432, 12345] + matches['fiberID'] = [10, 100, 621, 123, 456, 986] + matches['mjd'] = [54321, 54321, 54321, 65432, 76543, 87654] + matches['run2d'] = ['26', '26', '26', 'v5_12_2', 'v5_12_2', 'v5_12_2'] + download_urls = sdss.SDSS.get_spectra_async(matches=matches, + data_release=dr) + for i, row in enumerate(matches): + assert matches[i]['plate'] == row['plate'] + assert download_urls[i]._target == url_tester_spectra(dr, row['run2d'], + row['plate'], + row['mjd'], + row['fiberID']) + if dr > 17: + matches = Table() + matches['fieldID'] = [15170, 15265] + matches['mjd'] = [59292, 59316] + matches['catalogID'] = [4570401475, 4592713531] + matches['run2d'] = ['v6_0_4', 'v6_0_4'] + download_urls = sdss.SDSS.get_spectra_async(matches=matches, + data_release=dr) + for i, row in enumerate(matches): + assert download_urls[i]._target == url_tester_spectra(dr, row['run2d'], + row['fieldID'], + row['mjd'], + row['catalogID']) + + @pytest.mark.parametrize("dr", dr_list) def test_spectra_plate_mjd_payload(patch_request, dr): expect = ("SELECT DISTINCT " diff --git a/astroquery/sdss/tests/test_sdss_remote.py b/astroquery/sdss/tests/test_sdss_remote.py index 48afd0164b..7df397c593 100644 --- a/astroquery/sdss/tests/test_sdss_remote.py +++ b/astroquery/sdss/tests/test_sdss_remote.py @@ -10,11 +10,14 @@ from urllib.error import URLError -from ... import sdss -from ...exceptions import TimeoutError +# Timeout is the superclass of both ReadTimeout and ConnectTimeout +from requests.exceptions import Timeout + +from astroquery import sdss +from astroquery.exceptions import TimeoutError # DR11 is a quasi-internal data release that does not have SkyServer support. -dr_list = (8, 9, 10, 12, 13, 14, 15, 16, 17) +dr_list = (8, 9, 10, 12, 13, 14, 15, 16, 17, 18) dr_warn_list = (8, 9) @@ -57,19 +60,38 @@ def test_sdss_spectrum(self, dr): xid = sdss.SDSS.query_region(self.coords, width=2.0 * u.arcsec, spectro=True, data_release=dr) assert isinstance(xid, Table) - sdss.SDSS.get_spectra(matches=xid, data_release=dr) + downloaded_files = sdss.SDSS.get_spectra(matches=xid, data_release=dr) + assert len(downloaded_files) == len(xid) def test_sdss_spectrum_plate_mjd_fiber(self): - """These plates are only available in recent data releases. + """These plates are only available in relatively recent data releases. + """ + downloaded_files = sdss.SDSS.get_spectra(plate=9403, mjd=58018, fiberID=485, data_release=16) + assert len(downloaded_files) == 1 + downloaded_files = sdss.SDSS.get_spectra(plate=10909, mjd=58280, fiberID=485, data_release=16) + assert len(downloaded_files) == 1 + + def test_sdss_spectrum_field_mjd_catalog(self): + """These eFEDS spectra are only available in data releases >= 18. + + https://data.sdss.org/sas/dr18/spectro/sdss/redux/v6_0_4/spectra/full/15170p/59292/spec-15170-59292-04570401475.fits + https://data.sdss.org/sas/dr18/spectro/sdss/redux/v6_0_4/spectra/full/15265p/59316/spec-15265-59316-04592713531.fits """ - sdss.SDSS.get_spectra(plate=9403, mjd=58018, fiberID=485, data_release=16) - sdss.SDSS.get_spectra(plate=10909, mjd=58280, fiberID=485, data_release=16) + matches = Table() + matches['fieldID'] = [15170, 15265] + matches['mjd'] = [59292, 59316] + matches['catalogID'] = [4570401475, 4592713531] + matches['run2d'] = ['v6_0_4', 'v6_0_4'] + downloaded_files = sdss.SDSS.get_spectra(matches=matches, data_release=18, cache=False) + assert len(downloaded_files) == 2 def test_sdss_spectrum_mjd(self): - sdss.SDSS.get_spectra(plate=2345, fiberID=572) + downloaded_files = sdss.SDSS.get_spectra(plate=2345, fiberID=572) + assert len(downloaded_files) == 1 def test_sdss_spectrum_coords(self): - sdss.SDSS.get_spectra(coordinates=self.coords) + downloaded_files = sdss.SDSS.get_spectra(coordinates=self.coords) + assert len(downloaded_files) == 1 def test_sdss_sql(self): query = """ @@ -88,16 +110,20 @@ class = 'galaxy' def test_sdss_image(self): xid = sdss.SDSS.query_region(self.coords, width=2.0 * u.arcsec) assert isinstance(xid, Table) - sdss.SDSS.get_images(matches=xid) + downloaded_files = sdss.SDSS.get_images(matches=xid) + assert len(downloaded_files) == len(xid) def test_sdss_template(self): - sdss.SDSS.get_spectral_template('qso') + downloaded_files = sdss.SDSS.get_spectral_template('qso') + assert len(downloaded_files) == 1 def test_sdss_image_run(self): - sdss.SDSS.get_images(run=1904, camcol=3, field=164) + downloaded_files = sdss.SDSS.get_images(run=1904, camcol=3, field=164) + assert len(downloaded_files) == 1 def test_sdss_image_coord(self): - sdss.SDSS.get_images(coordinates=self.coords) + downloaded_files = sdss.SDSS.get_images(coordinates=self.coords) + assert len(downloaded_files) == 1 def test_sdss_specobj(self): colnames = ['ra', 'dec', 'objid', 'run', 'rerun', 'camcol', 'field', @@ -157,17 +183,13 @@ def test_sdss_photoobj(self): else: assert xid[i][c] == row[c] - @pytest.mark.xfail(reason=("Timeout isn't raised since switching to " - "self._request, fix it before merging #586")) def test_query_timeout(self): - with pytest.raises(TimeoutError): - sdss.SDSS.query_region(self.coords, timeout=self.mintimeout) + with pytest.raises(Timeout): + sdss.SDSS.query_region(self.coords, width=2.0 * u.arcsec, cache=False, timeout=self.mintimeout) - @pytest.mark.xfail(reason=("Timeout isn't raised since switching to " - "self._request, fix it before merging #586")) def test_spectra_timeout(self): - with pytest.raises(TimeoutError): - sdss.SDSS.get_spectra(coordinates=self.coords, timeout=self.mintimeout) + with pytest.raises(Timeout): + sdss.SDSS.get_spectra(coordinates=self.coords, cache=False, timeout=self.mintimeout) def test_query_non_default_field(self): # A regression test for #469