diff --git a/dmci/api/worker.py b/dmci/api/worker.py
index 6263b39..7d01fc9 100644
--- a/dmci/api/worker.py
+++ b/dmci/api/worker.py
@@ -293,8 +293,7 @@ def _add_landing_page(self, data, catalog_url, uuid):
return data_mod
def _extract_metadata_id(self, xml_doc):
- """Extract the metadata_identifier from the xml object and set
- the class variables namespace and file_metadata_id.
+ """Set the class variables namespace and file_metadata_id.
Returns
-------
@@ -305,6 +304,35 @@ def _extract_metadata_id(self, xml_doc):
"""
self._file_metadata_id = None
self._namespace = None
+ namespace, file_uuid = self._get_metadata_id(xml_doc)
+ if file_uuid == "":
+ logger.error("No UUID found in XML file")
+ return False
+ if namespace == "":
+ logger.error("No namespace found in XML file")
+ return False
+
+ try:
+ self._file_metadata_id = uuid.UUID(file_uuid)
+ logger.debug("File UUID: %s", str(file_uuid))
+ except Exception as e:
+ logger.error("Could not parse UUID: '%s'", str(file_uuid))
+ logger.error(str(e))
+ return False
+ self._namespace = namespace
+ return True
+
+ @staticmethod
+ def _get_metadata_id(xml_doc):
+ """Extract the metadata_identifier from the xml object.
+
+ Returns
+ -------
+ namespace : str
+ Namespace if this is found, otherwise empty string
+ file_uuid : str
+ File UUID if this is found, otherwise empty string
+ """
file_uuid = ""
namespace = ""
for xml_entry in xml_doc:
@@ -314,7 +342,7 @@ def _extract_metadata_id(self, xml_doc):
words = xml_entry.text.split(":")
if len(words) != 2:
logger.error("metadata_identifier not formed as namespace:UUID")
- return False
+ return "", ""
namespace, file_uuid = words
logger.info(
@@ -323,23 +351,7 @@ def _extract_metadata_id(self, xml_doc):
logger.debug("XML file metadata_identifier namespace: %s", namespace)
logger.debug("XML file metadata_identifier UUID: %s", file_uuid)
break
-
- if file_uuid == "":
- logger.error("No UUID found in XML file")
- return False
- if namespace == "":
- logger.error("No namespace found in XML file")
- return False
-
- try:
- self._file_metadata_id = uuid.UUID(file_uuid)
- logger.debug("File UUID: %s", str(file_uuid))
- except Exception as e:
- logger.error("Could not parse UUID: '%s'", str(file_uuid))
- logger.error(str(e))
- return False
- self._namespace = namespace
- return True
+ return namespace, file_uuid
def _extract_title(self, xml_doc):
title = ""
diff --git a/dmci/distributors/pycsw_dist.py b/dmci/distributors/pycsw_dist.py
index 2b2c566..c98696f 100644
--- a/dmci/distributors/pycsw_dist.py
+++ b/dmci/distributors/pycsw_dist.py
@@ -17,6 +17,7 @@
limitations under the License.
"""
+import uuid
import logging
import requests
@@ -99,10 +100,26 @@ def _insert(self):
def _update(self):
"""Update current entry.
- Update: updates can be made as full record updates or record
- properties against a csw:Constraint, to update: Define
- overwriting property, search for places to overwrite.
+ Note: in pycsw updates are made on elements, not full documents. The
+ way dmci is designed, it is easier to update the full document by first
+ deleting the current entry, then inserting the new version.
"""
+ from dmci.api.worker import Worker
+ with open(self._xml_file) as fn:
+ data = fn.read()
+ xml_doc = etree.fromstring(data)
+ namespace, file_uuid = Worker._get_metadata_id(xml_doc)
+ if file_uuid == "":
+ return False, "No UUID found in XML file"
+ if namespace == "":
+ return False, "No namespace found in XML file"
+ try:
+ self._metadata_UUID = uuid.UUID(file_uuid)
+ logger.debug("File UUID: %s", str(file_uuid))
+ except Exception as e:
+ logger.error(str(e))
+ return False, f"Could not parse UUID: {str(file_uuid)}"
+
del_status, del_response_text = self._delete()
if not del_status:
return del_status, del_response_text
diff --git a/tests/files/reference/mmd_file.xml b/tests/files/reference/mmd_file.xml
new file mode 100644
index 0000000..32c333c
--- /dev/null
+++ b/tests/files/reference/mmd_file.xml
@@ -0,0 +1,152 @@
+
+ no.test:3f289fcc-022b-4b62-bbbb-b001304a6e09
+ Arome-Arctic 2.5Km lagged subset + 66 hours
+ This file contains output from ensemble. Contains several surface parameters, and some pressure level parameters. Was updated every 3 hours. The dataset production is ended.
+ Denne filen inneholder utdata fra ensemble. Inneholder flere overflateparametere, og noen trykknivåparametere. Ble ppdatert hver 3. time. Produksjonen av dette datasettet er avsluttet.
+ Active
+ Complete
+ SIOS
+ ADC
+ METNCS
+
+
+ 2023-09-15T12:00:00Z
+ Created
+
+
+ 2024-04-05T13:33:14Z
+ Minor modification
+ Add collection ADC
+
+
+ 2025-01-10T13:45:00Z
+ Minor modification
+ Set dataset production status to complete
+
+
+ 2024-04-05T13:35:05Z
+ Minor modification
+ Add collection SIOS
+
+
+ 2025-01-15T08:30:00Z
+ Minor modification
+ Add this comment to test dmci ingestion
+
+
+
+ 2023-02-27T15:00:00Z
+
+ climatologyMeteorologyAtmosphere
+
+ WEATHER FORECAST
+ https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/sciencekeywords
+
+
+
+ Meteorological geographical features
+ Atmospheric conditions
+ https://inspire.ec.europa.eu/theme
+
+
+
+ Weather and climate
+ https://register.geonorge.no/metadata-kodelister/nasjonal-temainndeling
+
+
+
+
+ 88.0
+ 62.0
+ 80.0
+ -18.0
+
+
+
+
+
+ -17.96 69.30
+ -13.44 69.04
+ -9.06 68.66
+ -4.85 68.17
+ -0.84 67.57
+ 2.94 66.88
+ 6.48 66.09
+ 9.79 65.23
+ 12.86 64.30
+ 15.71 63.30
+ 18.66 62.12
+ 21.80 63.60
+ 25.31 65.00
+ 29.23 66.30
+ 33.58 67.50
+ 38.40 68.56
+ 43.68 69.46
+ 49.38 70.18
+ 55.46 70.71
+ 61.81 71.01
+ 68.90 71.09
+ 68.90 71.09
+ 69.08 72.92
+ 69.28 74.56
+ 69.53 76.20
+ 69.84 77.84
+ 70.24 79.48
+ 70.79 81.11
+ 71.59 82.74
+ 72.83 84.37
+ 75.04 85.98
+ 80.05 87.56
+ 80.05 87.56
+ 31.29 87.08
+ 8.72 85.56
+ -1.73 83.70
+ -7.39 81.72
+ -10.88 79.69
+ -13.23 77.63
+ -14.92 75.55
+ -16.18 73.47
+ -17.17 71.38
+ -17.96 69.30
+
+
+
+
+
+ en
+ Operational
+
+ CC-BY-4.0
+ https://spdx.org/licenses/CC-BY-4.0
+
+
+ Investigator
+ Senter for utvikling av værvarslingstjenesten
+ suv-arctic@met.no
+ Norwegian Meteorological Institute
+
+
+
+ MET Norway
+ Norwegian Meteorological Institute
+
+ https://data.met.no
+
+
+ Project home page
+
+ https://www.met.no/en/projects/The-weather-model-AROME-Arctic
+
+ grid
+ Numerical Simulation
+
+ Senter for utvikling av værvarslingstjenesten
+ 2023-09-14T12:00:00Z
+ Arome-Arctic 2.5Km lagged subset + 66 hours
+
+
+ Dataset landing page
+
+ https://data-test.met.no/dataset/3f289fcc-022b-4b62-bbbb-b001304a6e09
+
+
diff --git a/tests/test_dist/test_pycsw_dist.py b/tests/test_dist/test_pycsw_dist.py
index d7deb81..a9465b9 100644
--- a/tests/test_dist/test_pycsw_dist.py
+++ b/tests/test_dist/test_pycsw_dist.py
@@ -102,7 +102,7 @@ def testDistPyCSW_Insert(monkeypatch, mockXml, mockXslt, tmpConf):
@pytest.mark.dist
-def testDistPyCSW_Update(monkeypatch, mockXml, mockXslt, tmpUUID, tmpConf):
+def testDistPyCSW_Update(monkeypatch, filesDir, mockXslt, tmpUUID, tmpConf):
"""Test update commands via run()."""
tstWorker = Worker("update", None, None)
@@ -110,6 +110,8 @@ def testDistPyCSW_Update(monkeypatch, mockXml, mockXslt, tmpUUID, tmpConf):
tstWorker._namespace = "no.test"
tstWorker._conf = tmpConf
+ mockXml = os.path.join(filesDir, "reference", "mmd_file.xml")
+
# Update returns True
with monkeypatch.context() as mp:
mp.setattr(PyCSWDist, "_translate", lambda *a: b"")
@@ -164,6 +166,25 @@ def new_delete(cls, *a, **k):
"http://localhost: service unavailable. Failed to insert."
)
+ # Worker._get_metadata_id returns empty file_uuid, i.e., file_uuid = "".
+ with monkeypatch.context() as mp:
+ mp.setattr(Worker, "_get_metadata_id", lambda *a, **k: ("no.test", ""))
+ tstPyCSW = PyCSWDist("update", xml_file=mockXml)
+ assert tstPyCSW.run() == (False, "No UUID found in XML file")
+
+ # Worker._get_metadata_id returns empty namespace, i.e., namespace = "".
+ with monkeypatch.context() as mp:
+ mp.setattr(Worker, "_get_metadata_id",
+ lambda *a, **k: ("", "3f289fcc-022b-4b62-bbbb-b001304a6e09"))
+ tstPyCSW = PyCSWDist("update", xml_file=mockXml)
+ assert tstPyCSW.run() == (False, "No namespace found in XML file")
+
+ # Worker._get_metadata_id returns invalid file_uuid, e.g., file_uuid = "123".
+ with monkeypatch.context() as mp:
+ mp.setattr(Worker, "_get_metadata_id", lambda *a, **k: ("no.test", "123"))
+ tstPyCSW = PyCSWDist("update", xml_file=mockXml)
+ assert tstPyCSW.run() == (False, "Could not parse UUID: 123")
+
# END Test testDistPyCSW_Update