diff --git a/dmci/api/worker.py b/dmci/api/worker.py index 6263b39..7d01fc9 100644 --- a/dmci/api/worker.py +++ b/dmci/api/worker.py @@ -293,8 +293,7 @@ def _add_landing_page(self, data, catalog_url, uuid): return data_mod def _extract_metadata_id(self, xml_doc): - """Extract the metadata_identifier from the xml object and set - the class variables namespace and file_metadata_id. + """Set the class variables namespace and file_metadata_id. Returns ------- @@ -305,6 +304,35 @@ def _extract_metadata_id(self, xml_doc): """ self._file_metadata_id = None self._namespace = None + namespace, file_uuid = self._get_metadata_id(xml_doc) + if file_uuid == "": + logger.error("No UUID found in XML file") + return False + if namespace == "": + logger.error("No namespace found in XML file") + return False + + try: + self._file_metadata_id = uuid.UUID(file_uuid) + logger.debug("File UUID: %s", str(file_uuid)) + except Exception as e: + logger.error("Could not parse UUID: '%s'", str(file_uuid)) + logger.error(str(e)) + return False + self._namespace = namespace + return True + + @staticmethod + def _get_metadata_id(xml_doc): + """Extract the metadata_identifier from the xml object. + + Returns + ------- + namespace : str + Namespace if this is found, otherwise empty string + file_uuid : str + File UUID if this is found, otherwise empty string + """ file_uuid = "" namespace = "" for xml_entry in xml_doc: @@ -314,7 +342,7 @@ def _extract_metadata_id(self, xml_doc): words = xml_entry.text.split(":") if len(words) != 2: logger.error("metadata_identifier not formed as namespace:UUID") - return False + return "", "" namespace, file_uuid = words logger.info( @@ -323,23 +351,7 @@ def _extract_metadata_id(self, xml_doc): logger.debug("XML file metadata_identifier namespace: %s", namespace) logger.debug("XML file metadata_identifier UUID: %s", file_uuid) break - - if file_uuid == "": - logger.error("No UUID found in XML file") - return False - if namespace == "": - logger.error("No namespace found in XML file") - return False - - try: - self._file_metadata_id = uuid.UUID(file_uuid) - logger.debug("File UUID: %s", str(file_uuid)) - except Exception as e: - logger.error("Could not parse UUID: '%s'", str(file_uuid)) - logger.error(str(e)) - return False - self._namespace = namespace - return True + return namespace, file_uuid def _extract_title(self, xml_doc): title = "" diff --git a/dmci/distributors/pycsw_dist.py b/dmci/distributors/pycsw_dist.py index 2b2c566..c98696f 100644 --- a/dmci/distributors/pycsw_dist.py +++ b/dmci/distributors/pycsw_dist.py @@ -17,6 +17,7 @@ limitations under the License. """ +import uuid import logging import requests @@ -99,10 +100,26 @@ def _insert(self): def _update(self): """Update current entry. - Update: updates can be made as full record updates or record - properties against a csw:Constraint, to update: Define - overwriting property, search for places to overwrite. + Note: in pycsw updates are made on elements, not full documents. The + way dmci is designed, it is easier to update the full document by first + deleting the current entry, then inserting the new version. """ + from dmci.api.worker import Worker + with open(self._xml_file) as fn: + data = fn.read() + xml_doc = etree.fromstring(data) + namespace, file_uuid = Worker._get_metadata_id(xml_doc) + if file_uuid == "": + return False, "No UUID found in XML file" + if namespace == "": + return False, "No namespace found in XML file" + try: + self._metadata_UUID = uuid.UUID(file_uuid) + logger.debug("File UUID: %s", str(file_uuid)) + except Exception as e: + logger.error(str(e)) + return False, f"Could not parse UUID: {str(file_uuid)}" + del_status, del_response_text = self._delete() if not del_status: return del_status, del_response_text diff --git a/tests/files/reference/mmd_file.xml b/tests/files/reference/mmd_file.xml new file mode 100644 index 0000000..32c333c --- /dev/null +++ b/tests/files/reference/mmd_file.xml @@ -0,0 +1,152 @@ + + no.test:3f289fcc-022b-4b62-bbbb-b001304a6e09 + Arome-Arctic 2.5Km lagged subset + 66 hours + This file contains output from ensemble. Contains several surface parameters, and some pressure level parameters. Was updated every 3 hours. The dataset production is ended. + Denne filen inneholder utdata fra ensemble. Inneholder flere overflateparametere, og noen trykknivåparametere. Ble ppdatert hver 3. time. Produksjonen av dette datasettet er avsluttet. + Active + Complete + SIOS + ADC + METNCS + + + 2023-09-15T12:00:00Z + Created + + + 2024-04-05T13:33:14Z + Minor modification + Add collection ADC + + + 2025-01-10T13:45:00Z + Minor modification + Set dataset production status to complete + + + 2024-04-05T13:35:05Z + Minor modification + Add collection SIOS + + + 2025-01-15T08:30:00Z + Minor modification + Add this comment to test dmci ingestion + + + + 2023-02-27T15:00:00Z + + climatologyMeteorologyAtmosphere + + WEATHER FORECAST + https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/sciencekeywords + + + + Meteorological geographical features + Atmospheric conditions + https://inspire.ec.europa.eu/theme + + + + Weather and climate + https://register.geonorge.no/metadata-kodelister/nasjonal-temainndeling + + + + + 88.0 + 62.0 + 80.0 + -18.0 + + + + + + -17.96 69.30 + -13.44 69.04 + -9.06 68.66 + -4.85 68.17 + -0.84 67.57 + 2.94 66.88 + 6.48 66.09 + 9.79 65.23 + 12.86 64.30 + 15.71 63.30 + 18.66 62.12 + 21.80 63.60 + 25.31 65.00 + 29.23 66.30 + 33.58 67.50 + 38.40 68.56 + 43.68 69.46 + 49.38 70.18 + 55.46 70.71 + 61.81 71.01 + 68.90 71.09 + 68.90 71.09 + 69.08 72.92 + 69.28 74.56 + 69.53 76.20 + 69.84 77.84 + 70.24 79.48 + 70.79 81.11 + 71.59 82.74 + 72.83 84.37 + 75.04 85.98 + 80.05 87.56 + 80.05 87.56 + 31.29 87.08 + 8.72 85.56 + -1.73 83.70 + -7.39 81.72 + -10.88 79.69 + -13.23 77.63 + -14.92 75.55 + -16.18 73.47 + -17.17 71.38 + -17.96 69.30 + + + + + + en + Operational + + CC-BY-4.0 + https://spdx.org/licenses/CC-BY-4.0 + + + Investigator + Senter for utvikling av værvarslingstjenesten + suv-arctic@met.no + Norwegian Meteorological Institute + + + + MET Norway + Norwegian Meteorological Institute + + https://data.met.no + + + Project home page + + https://www.met.no/en/projects/The-weather-model-AROME-Arctic + + grid + Numerical Simulation + + Senter for utvikling av værvarslingstjenesten + 2023-09-14T12:00:00Z + Arome-Arctic 2.5Km lagged subset + 66 hours + + + Dataset landing page + + https://data-test.met.no/dataset/3f289fcc-022b-4b62-bbbb-b001304a6e09 + + diff --git a/tests/test_dist/test_pycsw_dist.py b/tests/test_dist/test_pycsw_dist.py index d7deb81..a9465b9 100644 --- a/tests/test_dist/test_pycsw_dist.py +++ b/tests/test_dist/test_pycsw_dist.py @@ -102,7 +102,7 @@ def testDistPyCSW_Insert(monkeypatch, mockXml, mockXslt, tmpConf): @pytest.mark.dist -def testDistPyCSW_Update(monkeypatch, mockXml, mockXslt, tmpUUID, tmpConf): +def testDistPyCSW_Update(monkeypatch, filesDir, mockXslt, tmpUUID, tmpConf): """Test update commands via run().""" tstWorker = Worker("update", None, None) @@ -110,6 +110,8 @@ def testDistPyCSW_Update(monkeypatch, mockXml, mockXslt, tmpUUID, tmpConf): tstWorker._namespace = "no.test" tstWorker._conf = tmpConf + mockXml = os.path.join(filesDir, "reference", "mmd_file.xml") + # Update returns True with monkeypatch.context() as mp: mp.setattr(PyCSWDist, "_translate", lambda *a: b"") @@ -164,6 +166,25 @@ def new_delete(cls, *a, **k): "http://localhost: service unavailable. Failed to insert." ) + # Worker._get_metadata_id returns empty file_uuid, i.e., file_uuid = "". + with monkeypatch.context() as mp: + mp.setattr(Worker, "_get_metadata_id", lambda *a, **k: ("no.test", "")) + tstPyCSW = PyCSWDist("update", xml_file=mockXml) + assert tstPyCSW.run() == (False, "No UUID found in XML file") + + # Worker._get_metadata_id returns empty namespace, i.e., namespace = "". + with monkeypatch.context() as mp: + mp.setattr(Worker, "_get_metadata_id", + lambda *a, **k: ("", "3f289fcc-022b-4b62-bbbb-b001304a6e09")) + tstPyCSW = PyCSWDist("update", xml_file=mockXml) + assert tstPyCSW.run() == (False, "No namespace found in XML file") + + # Worker._get_metadata_id returns invalid file_uuid, e.g., file_uuid = "123". + with monkeypatch.context() as mp: + mp.setattr(Worker, "_get_metadata_id", lambda *a, **k: ("no.test", "123")) + tstPyCSW = PyCSWDist("update", xml_file=mockXml) + assert tstPyCSW.run() == (False, "Could not parse UUID: 123") + # END Test testDistPyCSW_Update