Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get list of unique products #3096

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ mast
- Support for case-insensitive criteria keyword arguments in ``mast.Observations.query_criteria`` and
``mast.Catalogs.query_criteria``. [#3087]

- Added function ``mast.Observations.get_unique_product_list`` to return the unique data products associated with
given observations. [#3096]

mpc
^^^

Expand Down
28 changes: 27 additions & 1 deletion astroquery/mast/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,8 @@ def get_product_list_async(self, observations):
Note that obsid is NOT the same as obs_id, and inputting obs_id values will result in
an error. See column documentation `here <https://masttest.stsci.edu/api/v0/_productsfields.html>`__.

To return unique data products, use ``Observations.get_unique_product_list``.

Parameters
----------
observations : str or `~astropy.table.Row` or list/Table of same
Expand Down Expand Up @@ -959,8 +961,32 @@ def _remove_duplicate_products(self, data_products):
number_unique = len(unique_products)
if number_unique < number:
log.info(f"{number - number_unique} of {number} products were duplicates. "
f"Only downloading {number_unique} unique product(s).")
f"Only returning {number_unique} unique product(s).")

return unique_products

def get_unique_product_list(self, observations):
"""
Given a "Product Group Id" (column name obsid), returns a list of associated data products with
unique dataURIs. Note that obsid is NOT the same as obs_id, and inputting obs_id values will result in
an error. See column documentation `here <https://masttest.stsci.edu/api/v0/_productsfields.html>`__.

Parameters
----------
observations : str or `~astropy.table.Row` or list/Table of same
Row/Table of MAST query results (e.g. output from `query_object`)
or single/list of MAST Product Group Id(s) (obsid).
See description `here <https://masttest.stsci.edu/api/v0/_c_a_o_mfields.html>`__.

Returns
-------
unique_products : `~astropy.table.Table`
Table containing products with unique dataURIs.
"""
products = self.get_product_list(observations)
unique_products = self._remove_duplicate_products(products)
if len(unique_products) < len(products):
log.info("To return all products, use `Observations.get_product_list`")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This log message could be slightly more informative if you include the numbers, for example "{len(products) - len(unique_products)} duplicate products removed; To return all products, use Observations.get_product_list"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah never mind, the other log message captures that info! You're way ahead of me :)

return unique_products


Expand Down
28 changes: 27 additions & 1 deletion astroquery/mast/tests/test_mast_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from requests.models import Response

from astropy.table import Table
from astropy.table import Table, unique
from astropy.coordinates import SkyCoord
from astropy.io import fits
import astropy.units as u
Expand Down Expand Up @@ -380,6 +380,32 @@ def test_observations_get_product_list_tess_tica(self, caplog):
assert len(prods) > 0
assert (np.char.find(prods['obs_id'], '429031146') != -1).all()

def test_observations_get_unique_product_list(self, caplog):
# Check that no rows are filtered out when all products are unique
obsids = ['24832668']
products = Observations.get_product_list(obsids)
unique_products = Observations.get_unique_product_list(obsids)

# Should have the same length
assert len(products) == len(unique_products)
# No INFO messages should be logged
with caplog.at_level('INFO', logger='astroquery'):
assert caplog.text == ''

# Check that rows are filtered out when products are not unique
obsids.append('26421364')
products = Observations.get_product_list(obsids)
unique_products = Observations.get_unique_product_list(obsids)

# Unique product list should have fewer rows
assert len(products) > len(unique_products)
# Rows should be unique based on dataURI
assert (unique_products == unique(unique_products, keys='dataURI')).all()
# Check that INFO messages were logged
with caplog.at_level('INFO', logger='astroquery'):
assert 'products were duplicates' in caplog.text
assert 'To return all products' in caplog.text

def test_observations_filter_products(self):
observations = Observations.query_object("M8", radius=".04 deg")
obsLoc = np.where(observations["obs_id"] == 'ktwo200071160-c92_lc')
Expand Down
27 changes: 26 additions & 1 deletion docs/mast/mast_obsquery.rst
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,31 @@ Using "obs_id" instead of "obsid" from the previous example will result in the f
...
RemoteServiceError: Error converting data type varchar to bigint.

To return only unique data products for an observation, use `~astroquery.mast.ObservationsClass.get_unique_product_list`.

.. doctest-remote-data::
>>> obs = Observations.query_criteria(obs_collection='HST',
... filters='F606W',
... instrument_name='ACS/WFC',
... proposal_id=['12062'],
... dataRights='PUBLIC')
>>> unique_products = Observations.get_unique_product_list(obs)
INFO: 180 of 370 products were duplicates. Only returning 190 unique product(s). [astroquery.mast.observations]
INFO: To return all products, use `Observations.get_product_list` [astroquery.mast.observations]
>>> print(unique_products[:10]['dataURI'])
dataURI
-------------------------------------------------------------------
mast:HST/product/hst_12062_eo_acs_wfc_f606w_jbeveo_drc.fits
mast:HST/product/hst_12062_eo_acs_wfc_f606w_jbeveo_drc.jpg
mast:HST/product/hst_12062_eo_acs_wfc_f606w_jbeveo_point-cat.ecsv
mast:HST/product/hst_12062_eo_acs_wfc_f606w_jbeveo_segment-cat.ecsv
mast:HST/product/hst_12062_eo_acs_wfc_f606w_jbeveo_trl.txt
mast:HST/product/hst_12062_eo_acs_wfc_f606w_jbeveoes_drc.fits
mast:HST/product/hst_12062_eo_acs_wfc_f606w_jbeveoes_drc.jpg
mast:HST/product/hst_12062_eo_acs_wfc_f606w_jbeveoes_flc.fits
mast:HST/product/hst_12062_eo_acs_wfc_f606w_jbeveoes_hlet.fits
mast:HST/product/hst_12062_eo_acs_wfc_f606w_jbeveoes_trl.txt

Filtering
---------

Expand Down Expand Up @@ -481,7 +506,7 @@ This approach is recommended for code brevity. Query criteria are supplied as ke
... proposal_id=['12062'],
... dataRights='PUBLIC',
... filter_products={'productSubGroupDescription': 'DRZ'})
INFO: 2 of 4 products were duplicates. Only downloading 2 unique product(s). [astroquery.mast.observations]
INFO: 2 of 4 products were duplicates. Only returning 2 unique product(s). [astroquery.mast.observations]
>>> print(s3_uris)
['s3://stpubdata/hst/public/jbev/jbeveo010/jbeveo010_drz.fits', 's3://stpubdata/hst/public/jbev/jbevet010/jbevet010_drz.fits']
>>> Observations.disable_cloud_dataset()
Expand Down
Loading