Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reject if namespace contains string mismatched with env #232

Merged
merged 14 commits into from
Nov 13, 2024
74 changes: 39 additions & 35 deletions dmci/api/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,7 @@

class Worker:

CALL_MAP = {
"file": FileDist,
"pycsw": PyCSWDist,
"solr": SolRDist
}
CALL_MAP = {"file": FileDist, "pycsw": PyCSWDist, "solr": SolRDist}

def __init__(self, cmd, xml_file, xsd_validator, **kwargs):

Expand Down Expand Up @@ -87,7 +83,7 @@ def validate(self, data):
# Takes in bytes-object data
# Gives msg when both validating and not validating
valid = False
msg = ''
msg = ""
if not isinstance(data, bytes):
return False, "Input must be bytes type", data

Expand All @@ -104,48 +100,60 @@ def validate(self, data):
if not valid:
return valid, msg, data

if (".dev" in self._namespace and self._conf.env_string != "dev") or (
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not just check

Suggested change
if (".dev" in self._namespace and self._conf.env_string != "dev") or (
if (self._conf.env_string not in self._namespace) or (

and so on? Does it fail for prod then?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about this?

Suggested change
if (".dev" in self._namespace and self._conf.env_string != "dev") or (
if self._conf.env_string != "" and self._conf.env_string not in self._namespace:

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that this will also be wrong. The point is that I prefer to avoid hardcoding "dev" and "staging"...

".staging" in self._namespace and self._conf.env_string != "staging"
):
msg = (
f"Namespace {self._namespace} does not match "
f"the env {self._conf.env_string}"
)
return False, msg, data

if self._conf.env_string:

# Append env string to namespace in metadata_identifier
logger.debug("Identifier namespace: %s" % self._namespace)
logger.debug("Environment customization: %s" %
self._conf.env_string)
logger.debug("Environment customization: %s" % self._conf.env_string)
ns_re_pattern = re.compile(r"\w.\w." + self._conf.env_string)

if re.search(ns_re_pattern, self._namespace) is None:
full_namespace = f"{self._namespace}.{self._conf.env_string}"
data = re.sub(
str.encode(
f"<mmd:metadata_identifier>{self._namespace}"),
str.encode(
f"<mmd:metadata_identifier>{full_namespace}"),
str.encode(f"<mmd:metadata_identifier>{self._namespace}"),
str.encode(f"<mmd:metadata_identifier>{full_namespace}"),
data,
)
self._namespace = full_namespace

# Append env string to the namespace in the parent block, if present
if bool(re.search(b'<mmd:related_dataset relation_type="parent">', data)):
if bool(
re.search(b'<mmd:related_dataset relation_type="parent">', data)
):
match_parent_block = re.search(
b'<mmd:related_dataset relation_type="parent">(.+?)</mmd:related_dataset>',
data
data,
)
found_parent_block_content = match_parent_block.group(1)
found_parent_block_content = found_parent_block_content.split(
b":")
found_parent_block_content = found_parent_block_content.split(b":")
if len(found_parent_block_content) != 2:
err = f"Malformed parent dataset identifier {found_parent_block_content}"
logger.error(err)
return False, err, data
old_parent_namespace = found_parent_block_content[0].decode()
logger.debug("Parent dataset namespace: %s" %
old_parent_namespace)
logger.debug("Parent dataset namespace: %s" % old_parent_namespace)
if re.search(ns_re_pattern, old_parent_namespace) is None:
new_parent_namespace = f"{old_parent_namespace}.{self._conf.env_string}"
new_parent_namespace = (
f"{old_parent_namespace}.{self._conf.env_string}"
)
data = re.sub(
str.encode(f'<mmd:related_dataset '
f'relation_type="parent">{old_parent_namespace}'),
str.encode(f'<mmd:related_dataset '
f'relation_type="parent">{new_parent_namespace}'),
str.encode(
f"<mmd:related_dataset "
f'relation_type="parent">{old_parent_namespace}'
),
str.encode(
f"<mmd:related_dataset "
f'relation_type="parent">{new_parent_namespace}'
),
data,
)

Expand Down Expand Up @@ -191,8 +199,7 @@ def distribute(self):
xml_file=self._dist_xml_file,
metadata_UUID=self._dist_metadata_id_uuid,
worker=self,
path_to_parent_list=self._kwargs.get(
"path_to_parent_list", None),
path_to_parent_list=self._kwargs.get("path_to_parent_list", None),
)
valid &= obj.is_valid()
if obj.is_valid():
Expand Down Expand Up @@ -278,8 +285,7 @@ def _add_landing_page(self, data, catalog_url, uuid):
f"\n <mmd:description/>\n "
f"<mmd:resource>{catalog_url}/dataset/{uuid}</mmd:resource>\n "
)
data_mod = re.sub(found_datasetlandingpage,
datasetlandingpage_mod, data)
data_mod = re.sub(found_datasetlandingpage, datasetlandingpage_mod, data)

return data_mod

Expand All @@ -304,17 +310,15 @@ def _extract_metadata_id(self, xml_doc):
# only accept if format is uri:UUID, both need to be present
words = xml_entry.text.split(":")
if len(words) != 2:
logger.error(
"metadata_identifier not formed as namespace:UUID")
logger.error("metadata_identifier not formed as namespace:UUID")
return False
namespace, file_uuid = words

logger.info("XML file metadata_identifier: %s:%s" %
(namespace, file_uuid))
logger.debug(
"XML file metadata_identifier namespace: %s", namespace)
logger.debug(
"XML file metadata_identifier UUID: %s", file_uuid)
logger.info(
"XML file metadata_identifier: %s:%s" % (namespace, file_uuid)
)
logger.debug("XML file metadata_identifier namespace: %s", namespace)
logger.debug("XML file metadata_identifier UUID: %s", file_uuid)
break

if file_uuid == "":
Expand Down
156 changes: 156 additions & 0 deletions tests/files/api/staging.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
<mmd:mmd xmlns:mmd="http://www.met.no/schema/mmd" xmlns:gml="http://www.opengis.net/gml">
<mmd:metadata_identifier>test.no.staging:a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b</mmd:metadata_identifier>
<mmd:title xml:lang="en">Direct Broadcast data processed in satellite swath to L1C</mmd:title>
<mmd:title xml:lang="nor">Direktesendte satellittdata prosessert i satellittsveip til L1C.</mmd:title>
<mmd:abstract xml:lang="en">Direct Broadcast data received at MET NORWAY Oslo. Processed by standard processing software to geolocated and calibrated values in satellite swath in received instrument resolution.</mmd:abstract>
<mmd:abstract xml:lang="nor">Direktesendte satellittdata mottatt ved Meteorologisk Institutt Oslo. Prosessert med standard prosesseringssoftware til geolokaliserte og kalibrerte verdier i satellitsveip i mottatt instrument oppløsning.</mmd:abstract>
<mmd:metadata_status>Active</mmd:metadata_status>
<mmd:dataset_production_status>Complete</mmd:dataset_production_status>
<mmd:collection>METNCS</mmd:collection>
<mmd:last_metadata_update>
<mmd:update>
<mmd:datetime>2021-04-29T00:46:05Z</mmd:datetime>
<mmd:type>Created</mmd:type>
</mmd:update>
</mmd:last_metadata_update>
<mmd:temporal_extent>
<mmd:start_date>2021-04-29T00:28:44.977627Z</mmd:start_date>
<mmd:end_date>2021-04-29T00:39:55.000000Z</mmd:end_date>
</mmd:temporal_extent>
<mmd:iso_topic_category>climatologyMeteorologyAtmosphere</mmd:iso_topic_category>
<mmd:iso_topic_category>environment</mmd:iso_topic_category>
<mmd:iso_topic_category>oceans</mmd:iso_topic_category>
<mmd:keywords vocabulary="GCMD">
<mmd:keyword>Earth Science &gt; Atmosphere &gt; Atmospheric radiation</mmd:keyword>
<mmd:resource>https://gcmdservices.gsfc.nasa.gov/static/kms/</mmd:resource>
<mmd:separator></mmd:separator>
</mmd:keywords>
<mmd:keywords vocabulary="GEMET">
<mmd:keyword>Meteorological geographical features</mmd:keyword>
<mmd:keyword>Atmospheric conditions</mmd:keyword>
<mmd:keyword>Oceanographic geographical features</mmd:keyword>
<mmd:resource>http://inspire.ec.europa.eu/theme</mmd:resource>
<mmd:separator></mmd:separator>
</mmd:keywords>
<mmd:keywords vocabulary="Norwegian thematic categories">
<mmd:keyword>Weather and climate</mmd:keyword>
<mmd:resource>https://register.geonorge.no/subregister/metadata-kodelister/kartverket/nasjonal-temainndeling</mmd:resource>
<mmd:separator></mmd:separator>
</mmd:keywords>
<mmd:geographic_extent>
<mmd:rectangle srsName="EPSG:4326">
<mmd:north>80.49233</mmd:north>
<mmd:south>36.540688</mmd:south>
<mmd:east>79.40124</mmd:east>
<mmd:west>1.5549301</mmd:west>
</mmd:rectangle>
</mmd:geographic_extent>
<mmd:dataset_language>en</mmd:dataset_language>
<mmd:operational_status>Operational</mmd:operational_status>
<mmd:use_constraint>
<mmd:identifier>CC-BY-4.0</mmd:identifier>
<mmd:resource>http://spdx.org/licenses/CC-BY-4.0</mmd:resource>
</mmd:use_constraint>
<mmd:personnel>
<mmd:role>Technical contact</mmd:role>
<mmd:name>DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING</mmd:name>
<mmd:email>[email protected]</mmd:email>
<mmd:organisation>MET NORWAY</mmd:organisation>
</mmd:personnel>
<mmd:personnel>
<mmd:role>Metadata author</mmd:role>
<mmd:name>DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING</mmd:name>
<mmd:email>[email protected]</mmd:email>
<mmd:organisation>unknown</mmd:organisation>
</mmd:personnel>
<mmd:data_center>
<mmd:data_center_name>
<mmd:short_name>MET NORWAY</mmd:short_name>
<mmd:long_name>MET NORWAY</mmd:long_name>
</mmd:data_center_name>
<mmd:data_center_url>met.no</mmd:data_center_url>
</mmd:data_center>
<mmd:data_access>
<mmd:type>OPeNDAP</mmd:type>
<mmd:description>Open-source Project for a Network Data Access Protocol</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/dodsC/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:resource>
</mmd:data_access>
<mmd:data_access>
<mmd:type>OGC WMS</mmd:type>
<mmd:description>OGC Web Mapping Service, URI to GetCapabilities Document.</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/wms/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc?service=WMS&amp;version=1.3.0&amp;request=GetCapabilities</mmd:resource>
<mmd:wms_layers>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
</mmd:wms_layers>
</mmd:data_access>
<mmd:data_access>
<mmd:type>HTTP</mmd:type>
<mmd:description>Direct download of file</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/fileServer/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:resource>
</mmd:data_access>
<mmd:related_dataset relation_type="parent">test.no.staging:64db6102-14ce-41e9-b93b-61dbb2cb8b4e</mmd:related_dataset>
<mmd:storage_information>
<mmd:file_name>aqua-modis-1km-20210429002844-20210429003955.nc</mmd:file_name>
<mmd:file_location>/lustre/storeB/immutable/archive/projects/remotesensing/satellite-thredds/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:file_location>
<mmd:file_format>NetCDF-CF</mmd:file_format>
<mmd:file_size unit="MB">1862.00</mmd:file_size>
<mmd:checksum type="md5sum">4e1833610272ee63228f575d1c875fbe</mmd:checksum>
</mmd:storage_information>
<mmd:project>
<mmd:short_name>Govermental core service</mmd:short_name>
<mmd:long_name>Govermental core service</mmd:long_name>
</mmd:project>
<mmd:platform>
<mmd:short_name>Aqua</mmd:short_name>
<mmd:long_name>Aqua</mmd:long_name>
<mmd:resource>https://www.wmo-sat.info/oscar/satellites/view/aqua</mmd:resource>
<mmd:instrument>
<mmd:short_name>MODIS</mmd:short_name>
<mmd:long_name>MODIS</mmd:long_name>
<mmd:resource>https://www.wmo-sat.info/oscar/instruments/view/modis</mmd:resource>
</mmd:instrument>
</mmd:platform>
<mmd:activity_type>Space Borne Instrument</mmd:activity_type>
<mmd:dataset_citation>
<mmd:author>DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING</mmd:author>
<mmd:publication_date>2021-04-29</mmd:publication_date>
<mmd:title>Direct Broadcast data processed in satellite swath to L1C</mmd:title>
</mmd:dataset_citation>
</mmd:mmd>
Loading
Loading