diff --git a/README.md b/README.md index d6f918e..ec3928d 100644 --- a/README.md +++ b/README.md @@ -36,11 +36,13 @@ options: 1. To generate a list of layers per service run the following: ```sh -ngr-spider layers -p 'OGC:WMS,OGC:WMTS' pdok-services.json -ngr-spider services -p 'OGC:WMS,OGC:WMTS' pdok-services.json +ngr-spider layers -p 'OGC:WMS,OGC:WMTS,OGC:API features' pdok-services.json +ngr-spider services -p 'OGC:WMS,OGC:WMTS,OGC:API features' pdok-services.json ``` +Note: you may need to install pyproj manually (`pip install pyproj`) on some systems. + ## Development Install dev dependencies and package from source: diff --git a/examples/generate-pdok-services-plugin-config.sh b/examples/generate-pdok-services-plugin-config.sh index 808868a..b5c1beb 100755 --- a/examples/generate-pdok-services-plugin-config.sh +++ b/examples/generate-pdok-services-plugin-config.sh @@ -10,7 +10,9 @@ spider_output=/output_dir/$(basename "$output_file") cat < /tmp/sorting-rules.json [ - { "index": 0, "names": ["opentopo+"], "types": ["OGC:WMTS"] }, + { "index": 0, "names": ["tiles$"], "types": ["OGC:API tiles"] }, + { "index": 5, "names": ["features$"], "types": ["OGC:API features"] }, + { "index": 9, "names": ["opentopo+"], "types": ["OGC:WMTS"] }, { "index": 10, "names": ["^actueel_orthohr$"], "types": ["OGC:WMTS"] }, { "index": 11, "names": ["^actueel_ortho25$"], "types": ["OGC:WMTS"] }, { "index": 12, "names": ["^actueel_ortho25ir$"], "types": ["OGC:WMTS"] }, @@ -40,7 +42,7 @@ if [[ $nr_of_services != "-" ]];then nr_svc_flag="-n ${nr_of_services}" fi -docker run -v "/${output_dir}:/output_dir" -v /tmp:/tmp pdok/ngr-services-spider layers $nr_svc_flag --snake-case -s /tmp/sorting-rules.json -m flat -p OGC:WMS,OGC:WFS,OGC:WCS,OGC:WMTS "$spider_output" --jq-filter '.layers[] |= with_entries( +docker run -v "/${output_dir}:/output_dir" -v /tmp:/tmp "pdok/ngr-services-spider:0.6.3" layers $nr_svc_flag --snake-case -s /tmp/sorting-rules.json -m flat -p "OGC:WMS,OGC:WFS,OGC:WCS,OGC:WMTS,OGC:API tiles,OGC:API features" "$spider_output" --jq-filter '.layers[] |= with_entries( if .key == "service_protocol" then .value = (.value | split(":")[1] | ascii_downcase) | .key = "service_type" elif .key == "service_metadata_id" then @@ -49,6 +51,8 @@ docker run -v "/${output_dir}:/output_dir" -v /tmp:/tmp pdok/ngr-services-spider .key = "dataset_md_id" elif .key == "styles" then .value = (.value | map(del(.legend_url))) + elif .key == "service_url" and (.value | test("/tiles")) then + .value = (.value | split("/tiles")[0]) else (.) end diff --git a/ngr_spider/models.py b/ngr_spider/models.py index b489a01..54e8197 100644 --- a/ngr_spider/models.py +++ b/ngr_spider/models.py @@ -95,7 +95,6 @@ class OatLayer(Layer): styles: list[VectorTileStyle] tiles: list[OatTiles] - @dataclasses.dataclass class WmtsLayer(Layer): styles: list[Style] @@ -327,7 +326,6 @@ class OatService(Service): @dataclasses.dataclass(kw_only=True) class OafService(Service): featuretypes: list[Layer] - output_formats: str protocol: str = OAF_PROTOCOL diff --git a/ngr_spider/ogc_api_features.py b/ngr_spider/ogc_api_features.py index 16a4e70..860040a 100644 --- a/ngr_spider/ogc_api_features.py +++ b/ngr_spider/ogc_api_features.py @@ -6,6 +6,19 @@ LOGGER = logging.getLogger(__name__) +class Collection: + id: str + title: str + description: str + crs: str + + def __init__(self, data: dict): + self.id = data["id"] + self.title = data["title"] + self.description = data["description"] + self.crs = data.get("extent", {}).get("spatial", {}).get("crs", "") + + class Info: description: str title: str @@ -17,7 +30,6 @@ def __init__(self, data: dict): self.version = data["version"] -# TODO Implement service to retrieve correct info class ServiceDesc: def __init__(self, href: str): url = requests.get(href) @@ -27,17 +39,11 @@ def get_info(self): return Info(self.json["info"]) def get_tags(self): - return self.json.get('tags', []) or [] + return self.json.get("tags", []) or [] def get_servers(self): return self.json["servers"] - def get_dataset_metadata_id(self): - return "" - - def get_output_format(self): - return "" - def _get_url_from_servers(self, servers: list[str]): for server in servers: if len(server["url"]) > 0: @@ -49,9 +55,15 @@ def __init__(self, href: str): url = requests.get(href) self.json = url.json() + def get_collections(self): + collection_list = [] + for collection in self.json["collections"]: + collection_list.append(Collection(collection)) + return collection_list + + class OGCApiFeatures: service_url: str - service_type: str service_desc: ServiceDesc data: Data @@ -63,21 +75,21 @@ def __init__(self, url): self.service_url = url self._load_landing_page(url) - # TODO Get correct info for featuretypes info when available - def get_featuretypes(self): - service_layer_name: str = "service_layer_name" - service_layer_title: str = "service_layer_title" - service_layer_abstract: str = "service_layer_abstract" - service_layer_metadata_id: str = "service_layer_metadata_id" - - return [ - Layer( - service_layer_name, - service_layer_title, - service_layer_abstract, - service_layer_metadata_id, + def get_featuretypes(self, dataset_metadata_id: str): + collection_list = [] + collections = self.data.get_collections() + for collection in collections: + collection_name: str = collection.id + collection_title: str = collection.title + collection_abstract: str = collection.description + featuretype = Layer( + collection_name, + collection_title, + collection_abstract, + dataset_metadata_id, ) - ] + collection_list.append(featuretype) + return collection_list def _load_landing_page(self, service_url: str): response = requests.get(service_url) @@ -87,7 +99,7 @@ def _load_landing_page(self, service_url: str): for link in links: if link["rel"] == "service-desc": self.service_desc = ServiceDesc(link["href"]) - elif link["rel"] == "data": + elif link["rel"].endswith("data"): self.data = Data(link["href"]) self.title = response_body_data["title"] or "" self.description = response_body_data["description"] or "" diff --git a/ngr_spider/ogc_api_tiles.py b/ngr_spider/ogc_api_tiles.py index 19f02f3..cbad59f 100644 --- a/ngr_spider/ogc_api_tiles.py +++ b/ngr_spider/ogc_api_tiles.py @@ -72,7 +72,6 @@ def __init__(self, href: str): # TODO use async methods class OGCApiTiles: service_url: str - service_type: str service_desc: ServiceDesc data: Data @@ -113,11 +112,11 @@ def __load_landing_page(self, service_url: str): for link in links: if link["rel"] == "service-desc": self.service_desc = ServiceDesc(link["href"]) - elif link["rel"] == "data" or link["rel"].endswith('styles'): + elif link["rel"].endswith('styles'): self.data = Data(link["href"]) - elif link["rel"] == "tiles" or link["rel"].endswith('tilesets-vector'): + elif link["rel"].endswith('tilesets-vector'): self.tiles = Tiles(link["href"]) - elif link["rel"] == "tileMatrixSets" or link["rel"].endswith('tiling-schemes'): + elif link["rel"].endswith('tiling-schemes'): self.tile_matrix_sets = TileMatrixSets(link["href"]) title = response_body_data["title"] self.title = title if title else "" diff --git a/ngr_spider/util.py b/ngr_spider/util.py index 3a3ba28..f451a57 100644 --- a/ngr_spider/util.py +++ b/ngr_spider/util.py @@ -272,13 +272,13 @@ def get_atom_service( return AtomService(service_record.service_url, r.text) -# TODO check correctness when test data is available, retrieve data from correct source/location def get_oaf_service( service_record: CswServiceRecord, ) -> Union[OafService, ServiceError]: try: url = service_record.service_url - md_id = service_record.metadata_id + md_id = service_record.metadata_id or "" + ds_md_id = service_record.dataset_metadata_id or "" LOGGER.info(f"{md_id} - {url}") if "://secure" in url: # this is a secure layer not for the general public: ignore @@ -286,17 +286,19 @@ def get_oaf_service( oaf = OGCApiFeatures(url) title = oaf.title or oaf.service_desc.get_info().title or "" description = oaf.description or oaf.service_desc.get_info().description or "" - keywords = oaf.service_desc.get_tags() or [] + + featuretypes=oaf.get_featuretypes(ds_md_id) + for featuretype in featuretypes: + featuretype.dataset_metadata_id = service_record.dataset_metadata_id or "" return OafService( title=title, abstract=description, metadata_id=md_id, url=url, - output_formats=oaf.service_desc.get_output_format(), - keywords=keywords, - dataset_metadata_id=oaf.service_desc.get_dataset_metadata_id(), - featuretypes=oaf.get_featuretypes(), + featuretypes=oaf.get_featuretypes(ds_md_id), + keywords=oaf.service_desc.get_tags(), + dataset_metadata_id=ds_md_id, ) except requests.exceptions.HTTPError as e: LOGGER.error(f"md-identifier: {md_id} - {e}") @@ -511,7 +513,6 @@ def flatten_layer(layer): protocol = service["protocol"] - # TODO? do we need specific functions for flattening OGC:API tiles/features? if protocol == "INSPIRE Atom": raise NotImplementedError( # TODO: move check to argument parse function "Flat output for INSPIRE Atom services has not been implemented (yet)."