From 54229d47ccc3befebefb3cc09c6d97e860aa4fe4 Mon Sep 17 00:00:00 2001 From: Stuart Mumford Date: Tue, 23 Jan 2024 13:23:29 +0000 Subject: [PATCH] Refactor attr value fetching --- .pre-commit-config.yaml | 2 +- dkist/data/api_search_values.json | 1 + dkist/data/test/api_search_values.json | 1 - dkist/net/attrs_values.py | 124 +++++++++++++++++++++++++ dkist/net/client.py | 26 +----- dkist/utils/net.py | 74 --------------- 6 files changed, 129 insertions(+), 99 deletions(-) create mode 100644 dkist/data/api_search_values.json delete mode 100644 dkist/data/test/api_search_values.json create mode 100644 dkist/net/attrs_values.py delete mode 100644 dkist/utils/net.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fff3bebf2..ae8c51a85 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ repos: - id: mixed-line-ending files: ".*.py" - id: end-of-file-fixer - exclude: ".*(.fits|.asdf)" + exclude: ".*(.fits|.asdf|.json)" - repo: https://github.com/pycqa/flake8 rev: 7.0.0 hooks: diff --git a/dkist/data/api_search_values.json b/dkist/data/api_search_values.json new file mode 100644 index 000000000..c43d8bd83 --- /dev/null +++ b/dkist/data/api_search_values.json @@ -0,0 +1 @@ +{"parameterValues":[{"parameterName":"createDateMin","values":{"minValue":"2022-12-08T19:07:55.038280","maxValue":"2024-01-23T03:21:27.034961"}},{"parameterName":"createDateMax","values":{"minValue":"2022-12-08T19:07:55.038280","maxValue":"2024-01-23T03:21:27.034961"}},{"parameterName":"endTimeMin","values":{"minValue":"2022-02-23T20:48:55.393500","maxValue":"2023-11-01T20:51:20.287000"}},{"parameterName":"endTimeMax","values":{"minValue":"2022-02-23T20:48:55.393500","maxValue":"2023-11-01T20:51:20.287000"}},{"parameterName":"exposureTimeMin","values":{"minValue":0.037,"maxValue":1380.2332394366197}},{"parameterName":"exposureTimeMax","values":{"minValue":0.037,"maxValue":1380.2332394366197}},{"parameterName":"instrumentNames","values":{"categoricalValues":["VBI","VISP"]}},{"parameterName":"qualityAverageFriedParameterMin","values":{"minValue":0.027724481746640606,"maxValue":2.6520787500175156e+30}},{"parameterName":"qualityAverageFriedParameterMax","values":{"minValue":0.027724481746640606,"maxValue":2.6520787500175156e+30}},{"parameterName":"qualityAveragePolarimetricAccuracyMin","values":{"minValue":0.7556396371714269,"maxValue":0.9845845208228297}},{"parameterName":"qualityAveragePolarimetricAccuracyMax","values":{"minValue":0.7556396371714269,"maxValue":0.9845845208228297}},{"parameterName":"startTimeMin","values":{"minValue":"2022-02-23T19:05:32.338002","maxValue":"2023-11-01T19:53:02.868500"}},{"parameterName":"startTimeMax","values":{"minValue":"2022-02-23T19:05:32.338002","maxValue":"2023-11-01T19:53:02.868500"}},{"parameterName":"targetTypes","values":{"categoricalValues":["quietsun","unknown","sunspot"]}},{"parameterName":"averageDatasetSpectralSamplingMin","values":{"minValue":0.000540156130946172,"maxValue":0.001631075310766238}},{"parameterName":"averageDatasetSpectralSamplingMax","values":{"minValue":0.000540156130946172,"maxValue":0.001631075310766238}},{"parameterName":"averageDatasetSpatialSamplingMin","values":{"minValue":0.0,"maxValue":12388.04306084}},{"parameterName":"averageDatasetSpatialSamplingMax","values":{"minValue":0.0,"maxValue":12388.04306084}},{"parameterName":"averageDatasetTemporalSamplingMin","values":{"minValue":9.139999999997528,"maxValue":5263.145059399399}},{"parameterName":"averageDatasetTemporalSamplingMax","values":{"minValue":9.139999999997528,"maxValue":5263.145059399399}},{"parameterName":"highLevelSoftwareVersion","values":{"categoricalValues":["Pono_2.1.0","Pono_1.0.0","Alakai_5-1","Pono_3.1.0","Alakai_3-0","Alakai_4-0","Alakai_11.1.0","Alakai_6-0","Alakai_8-0","Alakai_10-0","Alakai_7-0"]}},{"parameterName":"workflowName","values":{"categoricalValues":["l0_to_l1_vbi_summit-calibrated","l0_to_l1_visp"]}},{"parameterName":"workflowVersion","values":{"categoricalValues":["1.4.11","2.10.1","2.0.2","2.7.3","1.4.1","1.1.5","1.2.0","2.10.2","2.7.4","2.6.1","1.2.1","2.7.5","1.1.7","2.0.1","0.16.0","1.4.8","2.9.0","2.3.1","2.3.0","2.10.0","1.1.10","2.7.2","1.0.0","2.7.0"]}},{"parameterName":"headerDataUnitCreationDateMin","values":{"minValue":"2022-12-08T17:25:51.965000","maxValue":"2024-01-23T03:17:38.126000"}},{"parameterName":"headerDataUnitCreationDateMax","values":{"minValue":"2022-12-08T17:25:51.965000","maxValue":"2024-01-23T03:17:38.126000"}},{"parameterName":"headerVersion","values":{"categoricalValues":["3.6.0","4.0.0","3.3.0","3.0.0","3.4.0","3.9.0","3.5.0","3.7.1","3.8.1"]}}]} \ No newline at end of file diff --git a/dkist/data/test/api_search_values.json b/dkist/data/test/api_search_values.json deleted file mode 100644 index f6cea5a63..000000000 --- a/dkist/data/test/api_search_values.json +++ /dev/null @@ -1 +0,0 @@ -{"parameterValues": [{"parameterName": "createDateMin", "values": {"minValue": "2022-02-23T19:05:32.338002", "maxValue": "2023-11-01T19:53:02.868500"}}, {"parameterName": "createDateMax", "values": {"minValue": "2022-02-23T19:05:32.338002", "maxValue": "2023-11-01T19:53:02.868500"}}, {"parameterName": "endTimeMin", "values": {"minValue": "2022-02-23T20:48:55.393500", "maxValue": "2023-11-01T20:51:20.287000"}}, {"parameterName": "endTimeMax", "values": {"minValue": "2022-02-23T20:48:55.393500", "maxValue": "2023-11-01T20:51:20.287000"}}, {"parameterName": "exposureTimeMin", "values": {"minValue": 0.037, "maxValue": 1380.2332394366197}}, {"parameterName": "exposureTimeMax", "values": {"minValue": 0.037, "maxValue": 1380.2332394366197}}, {"parameterName": "instrumentNames", "values": {"categoricalValues": ["VBI", "VISP"]}}, {"parameterName": "qualityAverageFriedParameterMin", "values": {"minValue": 0.027724481746640606, "maxValue": 2.6520787500175156e+30}}, {"parameterName": "qualityAverageFriedParameterMax", "values": {"minValue": 0.027724481746640606, "maxValue": 2.6520787500175156e+30}}, {"parameterName": "startTimeMin", "values": {"minValue": "2022-02-23T19:05:32.338002", "maxValue": "2023-11-01T19:53:02.868500"}}, {"parameterName": "startTimeMax", "values": {"minValue": "2022-02-23T19:05:32.338002", "maxValue": "2023-11-01T19:53:02.868500"}}, {"parameterName": "targetTypes", "values": {"categoricalValues": ["quietsun", "unknown", "sunspot"]}}, {"parameterName": "averageDatasetSpectralSamplingMin", "values": {"minValue": 0.000540156130946172, "maxValue": 0.001631075310766238}}, {"parameterName": "averageDatasetSpectralSamplingMax", "values": {"minValue": 0.000540156130946172, "maxValue": 0.001631075310766238}}, {"parameterName": "averageDatasetSpatialSamplingMin", "values": {"minValue": 0.0, "maxValue": 12388.04306084}}, {"parameterName": "averageDatasetSpatialSamplingMax", "values": {"minValue": 0.0, "maxValue": 12388.04306084}}, {"parameterName": "averageDatasetTemporalSamplingMin", "values": {"minValue": 9.139999999997528, "maxValue": 5263.145059399399}}, {"parameterName": "averageDatasetTemporalSamplingMax", "values": {"minValue": 9.139999999997528, "maxValue": 5263.145059399399}}, {"parameterName": "highLevelSoftwareVersion", "values": {"categoricalValues": ["Pono_2.1.0", "Pono_1.0.0", "Alakai_5-1", "Pono_3.1.0", "Alakai_3-0", "Alakai_4-0", "Alakai_11.1.0", "Alakai_6-0", "Alakai_8-0", "Alakai_10-0", "Alakai_7-0"]}}, {"parameterName": "workflowName", "values": {"categoricalValues": ["l0_to_l1_vbi_summit-calibrated", "l0_to_l1_visp"]}}, {"parameterName": "workflowVersion", "values": {"categoricalValues": ["2.7.4", "2.6.1", "1.2.1", "2.7.5", "1.1.7", "2.0.1", "0.16.0", "1.4.8", "2.3.1", "2.3.0", "1.1.10", "2.7.2", "1.0.0", "2.7.0", "1.4.11", "2.0.2", "2.7.3", "1.4.1", "1.1.5", "1.2.0"]}}, {"parameterName": "headerDataUnitCreationDateMin", "values": {"minValue": "2022-12-08T17:25:51.965000", "maxValue": "2023-11-15T07:56:17.263000"}}, {"parameterName": "headerDataUnitCreationDateMax", "values": {"minValue": "2022-12-08T17:25:51.965000", "maxValue": "2023-11-15T07:56:17.263000"}}, {"parameterName": "headerVersion", "values": {"categoricalValues": ["3.6.0", "3.3.0", "3.0.0", "3.4.0", "3.5.0", "3.7.1", "3.8.1"]}}]} diff --git a/dkist/net/attrs_values.py b/dkist/net/attrs_values.py new file mode 100644 index 000000000..a7ab1b408 --- /dev/null +++ b/dkist/net/attrs_values.py @@ -0,0 +1,124 @@ +"Functions for working with the net submodule" +import json +import urllib +import datetime as dt +import importlib.resources + +import platformdirs + +from sunpy.net import attrs as sattrs + +import dkist.data +from dkist import log +from dkist.net import attrs as dattrs + +__all__ = ["get_search_attrs_values"] + +# TODO: This should be in the config file +# Threshold age at which to refresh search values +MAX_AGE = dt.timedelta(days=7).total_seconds() + +# Map keys in dataset inventory to Fido attrs +INVENTORY_ATTR_MAP = { + # Only categorical data are supported currently + "categorical": { + "instrumentNames": sattrs.Instrument, + "targetTypes": dattrs.TargetType, + "workflowName": dattrs.WorkflowName, + "workflowVersion": dattrs.WorkflowVersion, + "headerVersion": dattrs.HeaderVersion, + "highLevelSoftwareVersion": dattrs.SummitSoftwareVersion, + }, +} + + +def get_file_age(path): + last_modified = dt.datetime.fromtimestamp(path.stat().st_mtime) + now = dt.datetime.now() + return (now - last_modified).total_seconds() + + +def get_cached_json(): + """ + Return the path to a local copy of the JSON file, and if the file should be updated. + + If a user-local copy has been downloaded that will always be used. + """ + package_file = importlib.resources.files(dkist.data) / "api_search_values.json" + user_file = platformdirs.user_data_path("dkist") / "api_search_values.json" + + return_file = package_file + if user_file_exists := user_file.exists(): + return_file = user_file + + update_needed = False + if not user_file_exists: + update_needed = True + if not user_file_exists and get_file_age(return_file) > MAX_AGE: + update_needed = True + + return return_file, update_needed + + +def fetch_values_to_file(filepath, *, timeout=1): + # Import here to avoid unitialised module + from dkist.net import conf as net_conf + data = urllib.request.urlopen( + net_conf.dataset_endpoint + net_conf.dataset_search_values_path, timeout=timeout + ) + with open(filepath, "wb") as f: + f.write(data.read()) + + +def attempt_local_update(*, timeout=1): + """ + Attempt to update the local data copy of the values. + """ + user_file = platformdirs.user_data_path("dkist") / "api_search_values.json" + user_file.parent.mkdir(exist_ok=True) + + log.info("Fetching updated search values for the DKIST client.") + + success = False + try: + fetch_values_to_file(user_file, timeout=timeout) + success = True + except Exception as err: + log.error("Failed to download new attrs values.") + log.debug(str(err)) + # If an error has occured then remove the local file so it isn't + # corrupted or invalid. + user_file.unlink() + + # Test that the file we just saved can be parsed as json + try: + with open(user_file, "r") as f: + json.load(f) + except Exception: + user_file.unlink() + return False + + return success + + +def get_search_attrs_values(*, allow_update=True, timeout=1): + """ + Return the search values, updating if needed. + """ + local_path, update_needed = get_cached_json() + if allow_update and update_needed: + attempt_local_update(timeout=timeout) + if not update_needed: + log.debug("No update to attr values needed.") + log.debug(local_path.as_posix()) + + with open(local_path, "r") as f: + search_values = json.load(f) + + search_values = {param["parameterName"]: param["values"] for param in search_values["parameterValues"]} + + return_values = {} + for key, attr in INVENTORY_ATTR_MAP["categorical"].items(): + return_values[attr] = [(name, "") for name in search_values[key]["categoricalValues"]] + + return return_values diff --git a/dkist/net/client.py b/dkist/net/client.py index 28d1f4918..f11065755 100644 --- a/dkist/net/client.py +++ b/dkist/net/client.py @@ -20,8 +20,8 @@ QueryResponseTable, convert_row_to_table) from sunpy.util.net import parse_header +from dkist.net.attrs_values import get_search_attrs_values from dkist.utils.inventory import INVENTORY_KEY_MAP -from dkist.utils.net import INVENTORY_ATTR_MAP, search_values from . import attrs as dattrs from .attr_walker import walker @@ -269,36 +269,16 @@ def register_values(cls): """ return_values = { sattrs.Provider: [("DKIST", "Data provided by the DKIST Data Center")], - # instrumentNames - # Using these descriptions instead of auto-populating because they're more useful - sattrs.Instrument: [("VBI", "Visible Broadband Imager"), - ("VISP", "Visible Spectro-Polarimeter"), - ("VTF", "Visible Tunable Filter"), - ("Cryo-NIRSP", "Cryogenic Near Infrared SpectroPolarimiter"), - ("DL-NIRSP", "Diffraction-Limited Near-InfraRed Spectro-Polarimeter")], + # hasAllStokes sattrs.Physobs: [("stokes_parameters", "Stokes I, Q, U and V are provided in the dataset"), ("intensity", "Only Stokes I is provided in the dataset.")], # isEmbargoed dattrs.Embargoed: [("True", "Data is subject to access restrictions."), ("False", "Data is not subject to access restrictions.")], - # targetTypes - #dattrs.TargetType: [], # This should be a controlled list. - - # Time - Time attr allows times in the full range but start and end time are given separately by the DKIST API - sattrs.Time: [("time", f"Min: {search_values['startTimeMin']['minValue']}; max: {search_values['endTimeMax']['maxValue']}.")], # Completeness sattrs.Level: [("1", "DKIST data calibrated to level 1.")], } - # Auto-populate with additional keys from DKIST search API - for key in INVENTORY_ATTR_MAP["categorical"].keys(): - k = INVENTORY_ATTR_MAP["categorical"][key] - return_values[k["attr"]] = [(name, k["desc"]) for name in search_values[key]["categoricalValues"]] - - for key in INVENTORY_ATTR_MAP["range"].keys(): - k = INVENTORY_ATTR_MAP["range"][key] - return_values[k["attr"]] = [(key, k["desc"]+f" {search_values[key+'Min']['minValue']}-{search_values[key+'Max']['maxValue']}.")] - - return return_values + return {**return_values, **get_search_attrs_values()} diff --git a/dkist/utils/net.py b/dkist/utils/net.py deleted file mode 100644 index 6f783ca75..000000000 --- a/dkist/utils/net.py +++ /dev/null @@ -1,74 +0,0 @@ -"Functions for working with the net submodule" -import os -import json -import urllib -import logging -import datetime as dt -from pathlib import Path - -from ..net import attrs as dattrs - - -def get_api_response_location(): - # Location of DKIST package installation - dkist_data = Path(*Path(__file__).parts[:-2]) / 'data' - # This is definitely not the best way to do this but I can't get pytest to mock this function - # properly so here we are - if os.environ.get('IS_TEST_ENV'): - dkist_data = dkist_data / 'test' - return dkist_data / 'api_search_values.json' - - -INVENTORY_ATTR_MAP = { - "range": { - "averageDatasetSpatialSampling": {"attr": dattrs.SpatialSampling, - "desc": "The min/max allowable dataset spatial sampling."}, - "averageDatasetSpectralSampling": {"attr": dattrs.SpectralSampling, - "desc": "The min/max allowable dataset spectral sampling (nm)."}, - "averageDatasetTemporalSampling": {"attr": dattrs.TemporalSampling, - "desc": "The min/max allowable dataset temporal sampling."}, - "exposureTime": {"attr": dattrs.ExposureTime, - "desc": "The min/max allowable exposure time within a dataset, in milliseconds."}, - "qualityAverageFriedParameter": {"attr": dattrs.FriedParameter, - "desc": "The min/max allowable value of the average Fried Parameter within a dataset, in meters."}, - }, - "categorical": { - "targetTypes": {"attr": dattrs.TargetType, - "desc": "A list of target types that can be present within a dataset."}, - "workflowName": {"attr": dattrs.WorkflowName, - "desc": "Name of the workflow used to process the dataset."}, - "workflowVersion": {"attr": dattrs.WorkflowVersion, - "desc": "Version of the workflow used to process the dataset."}, - "headerVersion": {"attr": dattrs.HeaderVersion, - "desc": "Version of the header schema used in the dataset."}, - "highLevelSoftwareVersion": {"attr": dattrs.SummitSoftwareVersion, - "desc": "Version of the High Level Software (HLS) used on the summit during data collection."}, - } -} - - -# Location of DKIST package installation -search_api_response = get_api_response_location() -update_search_values = False -# Threshold age at which to refresh search values -max_age = dt.timedelta(days=7).total_seconds() -if not os.environ.get('DKIST_SKIP_UPDATE_SEARCH_VALUES'): - if not search_api_response.exists(): - update_search_values = True #pragma: no cover - else: - last_modified = dt.datetime.fromtimestamp(search_api_response.stat().st_mtime) - now = dt.datetime.now() - file_age = (now - last_modified).total_seconds() - if file_age > max_age: #pragma: no cover - update_search_values = True - -if update_search_values and not os.environ.get('IS_TEST_ENV'): #pragma: no cover - logging.info("Downloading valid search values") - data = urllib.request.urlopen('https://api.dkistdc.nso.edu/datasets/v1/searchValues') - with open(search_api_response, "w") as f: - search_values = json.dump(json.loads(data.read()), f) - -with open(search_api_response, "r") as f: - search_values = json.load(f) - -search_values = {param["parameterName"]: param["values"] for param in search_values["parameterValues"]}