Skip to content

Commit

Permalink
Refactor attr value fetching
Browse files Browse the repository at this point in the history
  • Loading branch information
Cadair committed Jan 23, 2024
1 parent 9bdfcef commit 54229d4
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 99 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ repos:
- id: mixed-line-ending
files: ".*.py"
- id: end-of-file-fixer
exclude: ".*(.fits|.asdf)"
exclude: ".*(.fits|.asdf|.json)"
- repo: https://github.com/pycqa/flake8
rev: 7.0.0
hooks:
Expand Down
1 change: 1 addition & 0 deletions dkist/data/api_search_values.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"parameterValues":[{"parameterName":"createDateMin","values":{"minValue":"2022-12-08T19:07:55.038280","maxValue":"2024-01-23T03:21:27.034961"}},{"parameterName":"createDateMax","values":{"minValue":"2022-12-08T19:07:55.038280","maxValue":"2024-01-23T03:21:27.034961"}},{"parameterName":"endTimeMin","values":{"minValue":"2022-02-23T20:48:55.393500","maxValue":"2023-11-01T20:51:20.287000"}},{"parameterName":"endTimeMax","values":{"minValue":"2022-02-23T20:48:55.393500","maxValue":"2023-11-01T20:51:20.287000"}},{"parameterName":"exposureTimeMin","values":{"minValue":0.037,"maxValue":1380.2332394366197}},{"parameterName":"exposureTimeMax","values":{"minValue":0.037,"maxValue":1380.2332394366197}},{"parameterName":"instrumentNames","values":{"categoricalValues":["VBI","VISP"]}},{"parameterName":"qualityAverageFriedParameterMin","values":{"minValue":0.027724481746640606,"maxValue":2.6520787500175156e+30}},{"parameterName":"qualityAverageFriedParameterMax","values":{"minValue":0.027724481746640606,"maxValue":2.6520787500175156e+30}},{"parameterName":"qualityAveragePolarimetricAccuracyMin","values":{"minValue":0.7556396371714269,"maxValue":0.9845845208228297}},{"parameterName":"qualityAveragePolarimetricAccuracyMax","values":{"minValue":0.7556396371714269,"maxValue":0.9845845208228297}},{"parameterName":"startTimeMin","values":{"minValue":"2022-02-23T19:05:32.338002","maxValue":"2023-11-01T19:53:02.868500"}},{"parameterName":"startTimeMax","values":{"minValue":"2022-02-23T19:05:32.338002","maxValue":"2023-11-01T19:53:02.868500"}},{"parameterName":"targetTypes","values":{"categoricalValues":["quietsun","unknown","sunspot"]}},{"parameterName":"averageDatasetSpectralSamplingMin","values":{"minValue":0.000540156130946172,"maxValue":0.001631075310766238}},{"parameterName":"averageDatasetSpectralSamplingMax","values":{"minValue":0.000540156130946172,"maxValue":0.001631075310766238}},{"parameterName":"averageDatasetSpatialSamplingMin","values":{"minValue":0.0,"maxValue":12388.04306084}},{"parameterName":"averageDatasetSpatialSamplingMax","values":{"minValue":0.0,"maxValue":12388.04306084}},{"parameterName":"averageDatasetTemporalSamplingMin","values":{"minValue":9.139999999997528,"maxValue":5263.145059399399}},{"parameterName":"averageDatasetTemporalSamplingMax","values":{"minValue":9.139999999997528,"maxValue":5263.145059399399}},{"parameterName":"highLevelSoftwareVersion","values":{"categoricalValues":["Pono_2.1.0","Pono_1.0.0","Alakai_5-1","Pono_3.1.0","Alakai_3-0","Alakai_4-0","Alakai_11.1.0","Alakai_6-0","Alakai_8-0","Alakai_10-0","Alakai_7-0"]}},{"parameterName":"workflowName","values":{"categoricalValues":["l0_to_l1_vbi_summit-calibrated","l0_to_l1_visp"]}},{"parameterName":"workflowVersion","values":{"categoricalValues":["1.4.11","2.10.1","2.0.2","2.7.3","1.4.1","1.1.5","1.2.0","2.10.2","2.7.4","2.6.1","1.2.1","2.7.5","1.1.7","2.0.1","0.16.0","1.4.8","2.9.0","2.3.1","2.3.0","2.10.0","1.1.10","2.7.2","1.0.0","2.7.0"]}},{"parameterName":"headerDataUnitCreationDateMin","values":{"minValue":"2022-12-08T17:25:51.965000","maxValue":"2024-01-23T03:17:38.126000"}},{"parameterName":"headerDataUnitCreationDateMax","values":{"minValue":"2022-12-08T17:25:51.965000","maxValue":"2024-01-23T03:17:38.126000"}},{"parameterName":"headerVersion","values":{"categoricalValues":["3.6.0","4.0.0","3.3.0","3.0.0","3.4.0","3.9.0","3.5.0","3.7.1","3.8.1"]}}]}
1 change: 0 additions & 1 deletion dkist/data/test/api_search_values.json

This file was deleted.

124 changes: 124 additions & 0 deletions dkist/net/attrs_values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"Functions for working with the net submodule"
import json
import urllib
import datetime as dt
import importlib.resources

import platformdirs

from sunpy.net import attrs as sattrs

import dkist.data
from dkist import log
from dkist.net import attrs as dattrs

__all__ = ["get_search_attrs_values"]

# TODO: This should be in the config file
# Threshold age at which to refresh search values
MAX_AGE = dt.timedelta(days=7).total_seconds()

# Map keys in dataset inventory to Fido attrs
INVENTORY_ATTR_MAP = {
# Only categorical data are supported currently
"categorical": {
"instrumentNames": sattrs.Instrument,
"targetTypes": dattrs.TargetType,
"workflowName": dattrs.WorkflowName,
"workflowVersion": dattrs.WorkflowVersion,
"headerVersion": dattrs.HeaderVersion,
"highLevelSoftwareVersion": dattrs.SummitSoftwareVersion,
},
}


def get_file_age(path):
last_modified = dt.datetime.fromtimestamp(path.stat().st_mtime)
now = dt.datetime.now()
return (now - last_modified).total_seconds()


def get_cached_json():
"""
Return the path to a local copy of the JSON file, and if the file should be updated.
If a user-local copy has been downloaded that will always be used.
"""
package_file = importlib.resources.files(dkist.data) / "api_search_values.json"
user_file = platformdirs.user_data_path("dkist") / "api_search_values.json"

return_file = package_file
if user_file_exists := user_file.exists():
return_file = user_file

update_needed = False
if not user_file_exists:
update_needed = True
if not user_file_exists and get_file_age(return_file) > MAX_AGE:
update_needed = True

return return_file, update_needed


def fetch_values_to_file(filepath, *, timeout=1):
# Import here to avoid unitialised module
from dkist.net import conf as net_conf
data = urllib.request.urlopen(
net_conf.dataset_endpoint + net_conf.dataset_search_values_path, timeout=timeout
)
with open(filepath, "wb") as f:
f.write(data.read())


def attempt_local_update(*, timeout=1):
"""
Attempt to update the local data copy of the values.
"""
user_file = platformdirs.user_data_path("dkist") / "api_search_values.json"
user_file.parent.mkdir(exist_ok=True)

log.info("Fetching updated search values for the DKIST client.")

success = False
try:
fetch_values_to_file(user_file, timeout=timeout)
success = True
except Exception as err:
log.error("Failed to download new attrs values.")
log.debug(str(err))
# If an error has occured then remove the local file so it isn't
# corrupted or invalid.
user_file.unlink()

# Test that the file we just saved can be parsed as json
try:
with open(user_file, "r") as f:
json.load(f)
except Exception:
user_file.unlink()
return False

return success


def get_search_attrs_values(*, allow_update=True, timeout=1):
"""
Return the search values, updating if needed.
"""
local_path, update_needed = get_cached_json()
if allow_update and update_needed:
attempt_local_update(timeout=timeout)
if not update_needed:
log.debug("No update to attr values needed.")
log.debug(local_path.as_posix())

with open(local_path, "r") as f:
search_values = json.load(f)

search_values = {param["parameterName"]: param["values"] for param in search_values["parameterValues"]}

return_values = {}
for key, attr in INVENTORY_ATTR_MAP["categorical"].items():
return_values[attr] = [(name, "") for name in search_values[key]["categoricalValues"]]

return return_values
26 changes: 3 additions & 23 deletions dkist/net/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
QueryResponseTable, convert_row_to_table)
from sunpy.util.net import parse_header

from dkist.net.attrs_values import get_search_attrs_values
from dkist.utils.inventory import INVENTORY_KEY_MAP
from dkist.utils.net import INVENTORY_ATTR_MAP, search_values

from . import attrs as dattrs
from .attr_walker import walker
Expand Down Expand Up @@ -269,36 +269,16 @@ def register_values(cls):
"""
return_values = {
sattrs.Provider: [("DKIST", "Data provided by the DKIST Data Center")],
# instrumentNames
# Using these descriptions instead of auto-populating because they're more useful
sattrs.Instrument: [("VBI", "Visible Broadband Imager"),
("VISP", "Visible Spectro-Polarimeter"),
("VTF", "Visible Tunable Filter"),
("Cryo-NIRSP", "Cryogenic Near Infrared SpectroPolarimiter"),
("DL-NIRSP", "Diffraction-Limited Near-InfraRed Spectro-Polarimeter")],

# hasAllStokes
sattrs.Physobs: [("stokes_parameters", "Stokes I, Q, U and V are provided in the dataset"),
("intensity", "Only Stokes I is provided in the dataset.")],
# isEmbargoed
dattrs.Embargoed: [("True", "Data is subject to access restrictions."),
("False", "Data is not subject to access restrictions.")],
# targetTypes
#dattrs.TargetType: [], # This should be a controlled list.

# Time - Time attr allows times in the full range but start and end time are given separately by the DKIST API
sattrs.Time: [("time", f"Min: {search_values['startTimeMin']['minValue']}; max: {search_values['endTimeMax']['maxValue']}.")],

# Completeness
sattrs.Level: [("1", "DKIST data calibrated to level 1.")],
}

# Auto-populate with additional keys from DKIST search API
for key in INVENTORY_ATTR_MAP["categorical"].keys():
k = INVENTORY_ATTR_MAP["categorical"][key]
return_values[k["attr"]] = [(name, k["desc"]) for name in search_values[key]["categoricalValues"]]

for key in INVENTORY_ATTR_MAP["range"].keys():
k = INVENTORY_ATTR_MAP["range"][key]
return_values[k["attr"]] = [(key, k["desc"]+f" {search_values[key+'Min']['minValue']}-{search_values[key+'Max']['maxValue']}.")]

return return_values
return {**return_values, **get_search_attrs_values()}
74 changes: 0 additions & 74 deletions dkist/utils/net.py

This file was deleted.

0 comments on commit 54229d4

Please sign in to comment.