From 2fc0b3076ef8c7a676a006557d4f89f1bc842467 Mon Sep 17 00:00:00 2001 From: Renaud <38732257+renaudjester@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:18:05 +0100 Subject: [PATCH] chore: multiple products for a dataset in the mapping (#247) allow a dataset to be in several products (they are now comma separated in the mapping) --- .../catalogue_parser/catalogue_parser.py | 46 +++++++++++-------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/copernicusmarine/catalogue_parser/catalogue_parser.py b/copernicusmarine/catalogue_parser/catalogue_parser.py index 891c9e75..16633fee 100644 --- a/copernicusmarine/catalogue_parser/catalogue_parser.py +++ b/copernicusmarine/catalogue_parser/catalogue_parser.py @@ -58,26 +58,34 @@ def get_dataset_metadata( dataset_product_mapping_url = ( f"{root_url}/dataset_product_id_mapping.json" ) - product_id = connection.get_json_file(dataset_product_mapping_url).get( - dataset_id - ) - if not product_id: + product_ids = connection.get_json_file( + dataset_product_mapping_url + ).get(dataset_id) + if not product_ids: raise DatasetNotFound(dataset_id) - url = f"{stac_url}/{product_id}/product.stac.json" - product_json = connection.get_json_file(url) - product_collection = pystac.Collection.from_dict(product_json) - product_datasets_metadata_links = product_collection.get_item_links() - datasets_metadata_links = [ - dataset_metadata_link - for dataset_metadata_link in product_datasets_metadata_links - if dataset_id in dataset_metadata_link.href - ] - if not datasets_metadata_links: - return None - dataset_jsons: list[dict] = [ - connection.get_json_file(f"{stac_url}/{product_id}/{link.href}") - for link in datasets_metadata_links - ] + dataset_jsons: list[dict] = [] + for product_id in product_ids.split(","): + url = f"{stac_url}/{product_id}/product.stac.json" + product_json = connection.get_json_file(url) + product_collection = pystac.Collection.from_dict(product_json) + product_datasets_metadata_links = ( + product_collection.get_item_links() + ) + datasets_metadata_links = [ + dataset_metadata_link + for dataset_metadata_link in product_datasets_metadata_links + if dataset_id in dataset_metadata_link.href + ] + if not datasets_metadata_links: + continue + dataset_jsons.extend( + [ + connection.get_json_file( + f"{stac_url}/{product_id}/{link.href}" + ) + for link in datasets_metadata_links + ] + ) dataset_items = [ dataset_item