Skip to content

Commit

Permalink
Refactor attr value fetching
Browse files Browse the repository at this point in the history
  • Loading branch information
Cadair committed Jan 23, 2024
1 parent 4019dd3 commit 2106b59
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 99 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ repos:
- id: mixed-line-ending
files: ".*.py"
- id: end-of-file-fixer
exclude: ".*(.fits|.asdf)"
exclude: ".*(.fits|.asdf|.json)"
- repo: https://github.com/pycqa/flake8
rev: 7.0.0
hooks:
Expand Down
1 change: 1 addition & 0 deletions dkist/data/api_search_values.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"parameterValues":[{"parameterName":"createDateMin","values":{"minValue":"2022-12-08T19:07:55.038280","maxValue":"2024-01-23T03:21:27.034961"}},{"parameterName":"createDateMax","values":{"minValue":"2022-12-08T19:07:55.038280","maxValue":"2024-01-23T03:21:27.034961"}},{"parameterName":"endTimeMin","values":{"minValue":"2022-02-23T20:48:55.393500","maxValue":"2023-11-01T20:51:20.287000"}},{"parameterName":"endTimeMax","values":{"minValue":"2022-02-23T20:48:55.393500","maxValue":"2023-11-01T20:51:20.287000"}},{"parameterName":"exposureTimeMin","values":{"minValue":0.037,"maxValue":1380.2332394366197}},{"parameterName":"exposureTimeMax","values":{"minValue":0.037,"maxValue":1380.2332394366197}},{"parameterName":"instrumentNames","values":{"categoricalValues":["VBI","VISP"]}},{"parameterName":"qualityAverageFriedParameterMin","values":{"minValue":0.027724481746640606,"maxValue":2.6520787500175156e+30}},{"parameterName":"qualityAverageFriedParameterMax","values":{"minValue":0.027724481746640606,"maxValue":2.6520787500175156e+30}},{"parameterName":"qualityAveragePolarimetricAccuracyMin","values":{"minValue":0.7556396371714269,"maxValue":0.9845845208228297}},{"parameterName":"qualityAveragePolarimetricAccuracyMax","values":{"minValue":0.7556396371714269,"maxValue":0.9845845208228297}},{"parameterName":"startTimeMin","values":{"minValue":"2022-02-23T19:05:32.338002","maxValue":"2023-11-01T19:53:02.868500"}},{"parameterName":"startTimeMax","values":{"minValue":"2022-02-23T19:05:32.338002","maxValue":"2023-11-01T19:53:02.868500"}},{"parameterName":"targetTypes","values":{"categoricalValues":["quietsun","unknown","sunspot"]}},{"parameterName":"averageDatasetSpectralSamplingMin","values":{"minValue":0.000540156130946172,"maxValue":0.001631075310766238}},{"parameterName":"averageDatasetSpectralSamplingMax","values":{"minValue":0.000540156130946172,"maxValue":0.001631075310766238}},{"parameterName":"averageDatasetSpatialSamplingMin","values":{"minValue":0.0,"maxValue":12388.04306084}},{"parameterName":"averageDatasetSpatialSamplingMax","values":{"minValue":0.0,"maxValue":12388.04306084}},{"parameterName":"averageDatasetTemporalSamplingMin","values":{"minValue":9.139999999997528,"maxValue":5263.145059399399}},{"parameterName":"averageDatasetTemporalSamplingMax","values":{"minValue":9.139999999997528,"maxValue":5263.145059399399}},{"parameterName":"highLevelSoftwareVersion","values":{"categoricalValues":["Pono_2.1.0","Pono_1.0.0","Alakai_5-1","Pono_3.1.0","Alakai_3-0","Alakai_4-0","Alakai_11.1.0","Alakai_6-0","Alakai_8-0","Alakai_10-0","Alakai_7-0"]}},{"parameterName":"workflowName","values":{"categoricalValues":["l0_to_l1_vbi_summit-calibrated","l0_to_l1_visp"]}},{"parameterName":"workflowVersion","values":{"categoricalValues":["1.4.11","2.10.1","2.0.2","2.7.3","1.4.1","1.1.5","1.2.0","2.10.2","2.7.4","2.6.1","1.2.1","2.7.5","1.1.7","2.0.1","0.16.0","1.4.8","2.9.0","2.3.1","2.3.0","2.10.0","1.1.10","2.7.2","1.0.0","2.7.0"]}},{"parameterName":"headerDataUnitCreationDateMin","values":{"minValue":"2022-12-08T17:25:51.965000","maxValue":"2024-01-23T03:17:38.126000"}},{"parameterName":"headerDataUnitCreationDateMax","values":{"minValue":"2022-12-08T17:25:51.965000","maxValue":"2024-01-23T03:17:38.126000"}},{"parameterName":"headerVersion","values":{"categoricalValues":["3.6.0","4.0.0","3.3.0","3.0.0","3.4.0","3.9.0","3.5.0","3.7.1","3.8.1"]}}]}
1 change: 0 additions & 1 deletion dkist/data/test/api_search_values.json

This file was deleted.

124 changes: 124 additions & 0 deletions dkist/net/attrs_values.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"Functions for working with the net submodule"
import json
import urllib
import datetime as dt
import importlib.resources

import platformdirs

from sunpy.net import attrs as sattrs

import dkist.data
from dkist import log
from dkist.net import attrs as dattrs

__all__ = ["get_search_attrs_values"]

# TODO: This should be in the config file
# Threshold age at which to refresh search values
MAX_AGE = dt.timedelta(days=7).total_seconds()

# Map keys in dataset inventory to Fido attrs
INVENTORY_ATTR_MAP = {
# Only categorical data are supported currently
"categorical": {
"instrumentNames": sattrs.Instrument,
"targetTypes": dattrs.TargetType,
"workflowName": dattrs.WorkflowName,
"workflowVersion": dattrs.WorkflowVersion,
"headerVersion": dattrs.HeaderVersion,
"highLevelSoftwareVersion": dattrs.SummitSoftwareVersion,
},
}


def get_file_age(path):
last_modified = dt.datetime.fromtimestamp(path.stat().st_mtime)
now = dt.datetime.now()
return (now - last_modified).total_seconds()


def get_cached_json():
"""
Return the path to a local copy of the JSON file, and if the file should be updated.
If a user-local copy has been downloaded that will always be used.
"""
package_file = importlib.resources.files(dkist.data) / "api_search_values.json"
user_file = platformdirs.user_data_path("dkist") / "api_search_values.json"

return_file = package_file
if user_file_exists := user_file.exists():
return_file = user_file

Check warning on line 52 in dkist/net/attrs_values.py

View check run for this annotation

Codecov / codecov/patch

dkist/net/attrs_values.py#L52

Added line #L52 was not covered by tests

update_needed = False
if not user_file_exists:
update_needed = True
if not user_file_exists and get_file_age(return_file) > MAX_AGE:
update_needed = True

Check warning on line 58 in dkist/net/attrs_values.py

View check run for this annotation

Codecov / codecov/patch

dkist/net/attrs_values.py#L58

Added line #L58 was not covered by tests

return return_file, update_needed


def fetch_values_to_file(filepath, *, timeout=1):
# Import here to avoid unitialised module
from dkist.net import conf as net_conf
data = urllib.request.urlopen(

Check warning on line 66 in dkist/net/attrs_values.py

View check run for this annotation

Codecov / codecov/patch

dkist/net/attrs_values.py#L66

Added line #L66 was not covered by tests
net_conf.dataset_endpoint + net_conf.dataset_search_values_path, timeout=timeout
)
with open(filepath, "wb") as f:
f.write(data.read())

Check warning on line 70 in dkist/net/attrs_values.py

View check run for this annotation

Codecov / codecov/patch

dkist/net/attrs_values.py#L69-L70

Added lines #L69 - L70 were not covered by tests


def attempt_local_update(*, timeout=1):
"""
Attempt to update the local data copy of the values.
"""
user_file = platformdirs.user_data_path("dkist") / "api_search_values.json"
user_file.parent.mkdir(exist_ok=True)

log.info("Fetching updated search values for the DKIST client.")

success = False
try:
fetch_values_to_file(user_file, timeout=timeout)
success = True

Check warning on line 85 in dkist/net/attrs_values.py

View check run for this annotation

Codecov / codecov/patch

dkist/net/attrs_values.py#L85

Added line #L85 was not covered by tests
except Exception as err:
log.error("Failed to download new attrs values.")
log.debug(str(err))
# If an error has occured then remove the local file so it isn't
# corrupted or invalid.
user_file.unlink()

# Test that the file we just saved can be parsed as json
try:
with open(user_file, "r") as f:
json.load(f)
except Exception:
user_file.unlink()
return False

Check warning on line 99 in dkist/net/attrs_values.py

View check run for this annotation

Codecov / codecov/patch

dkist/net/attrs_values.py#L94-L99

Added lines #L94 - L99 were not covered by tests

return success

Check warning on line 101 in dkist/net/attrs_values.py

View check run for this annotation

Codecov / codecov/patch

dkist/net/attrs_values.py#L101

Added line #L101 was not covered by tests


def get_search_attrs_values(*, allow_update=True, timeout=1):
"""
Return the search values, updating if needed.
"""
local_path, update_needed = get_cached_json()
if allow_update and update_needed:
attempt_local_update(timeout=timeout)
if not update_needed:
log.debug("No update to attr values needed.")
log.debug(local_path.as_posix())

Check warning on line 113 in dkist/net/attrs_values.py

View check run for this annotation

Codecov / codecov/patch

dkist/net/attrs_values.py#L111-L113

Added lines #L111 - L113 were not covered by tests

with open(local_path, "r") as f:
search_values = json.load(f)

Check warning on line 116 in dkist/net/attrs_values.py

View check run for this annotation

Codecov / codecov/patch

dkist/net/attrs_values.py#L115-L116

Added lines #L115 - L116 were not covered by tests

search_values = {param["parameterName"]: param["values"] for param in search_values["parameterValues"]}

Check warning on line 118 in dkist/net/attrs_values.py

View check run for this annotation

Codecov / codecov/patch

dkist/net/attrs_values.py#L118

Added line #L118 was not covered by tests

return_values = {}
for key, attr in INVENTORY_ATTR_MAP["categorical"].items():
return_values[attr] = [(name, "") for name in search_values[key]["categoricalValues"]]

Check warning on line 122 in dkist/net/attrs_values.py

View check run for this annotation

Codecov / codecov/patch

dkist/net/attrs_values.py#L120-L122

Added lines #L120 - L122 were not covered by tests

return return_values

Check warning on line 124 in dkist/net/attrs_values.py

View check run for this annotation

Codecov / codecov/patch

dkist/net/attrs_values.py#L124

Added line #L124 was not covered by tests
26 changes: 3 additions & 23 deletions dkist/net/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
QueryResponseTable, convert_row_to_table)
from sunpy.util.net import parse_header

from dkist.net.attrs_values import get_search_attrs_values
from dkist.utils.inventory import INVENTORY_KEY_MAP
from dkist.utils.net import INVENTORY_ATTR_MAP, search_values

from . import attrs as dattrs
from .attr_walker import walker
Expand Down Expand Up @@ -269,36 +269,16 @@ def register_values(cls):
"""
return_values = {
sattrs.Provider: [("DKIST", "Data provided by the DKIST Data Center")],
# instrumentNames
# Using these descriptions instead of auto-populating because they're more useful
sattrs.Instrument: [("VBI", "Visible Broadband Imager"),
("VISP", "Visible Spectro-Polarimeter"),
("VTF", "Visible Tunable Filter"),
("Cryo-NIRSP", "Cryogenic Near Infrared SpectroPolarimiter"),
("DL-NIRSP", "Diffraction-Limited Near-InfraRed Spectro-Polarimeter")],

# hasAllStokes
sattrs.Physobs: [("stokes_parameters", "Stokes I, Q, U and V are provided in the dataset"),
("intensity", "Only Stokes I is provided in the dataset.")],
# isEmbargoed
dattrs.Embargoed: [("True", "Data is subject to access restrictions."),
("False", "Data is not subject to access restrictions.")],
# targetTypes
#dattrs.TargetType: [], # This should be a controlled list.

# Time - Time attr allows times in the full range but start and end time are given separately by the DKIST API
sattrs.Time: [("time", f"Min: {search_values['startTimeMin']['minValue']}; max: {search_values['endTimeMax']['maxValue']}.")],

# Completeness
sattrs.Level: [("1", "DKIST data calibrated to level 1.")],
}

# Auto-populate with additional keys from DKIST search API
for key in INVENTORY_ATTR_MAP["categorical"].keys():
k = INVENTORY_ATTR_MAP["categorical"][key]
return_values[k["attr"]] = [(name, k["desc"]) for name in search_values[key]["categoricalValues"]]

for key in INVENTORY_ATTR_MAP["range"].keys():
k = INVENTORY_ATTR_MAP["range"][key]
return_values[k["attr"]] = [(key, k["desc"]+f" {search_values[key+'Min']['minValue']}-{search_values[key+'Max']['maxValue']}.")]

return return_values
return {**return_values, **get_search_attrs_values()}
74 changes: 0 additions & 74 deletions dkist/utils/net.py

This file was deleted.

0 comments on commit 2106b59

Please sign in to comment.