From c86bf3a7ba0d3346002824a6a7895d0d6e55e814 Mon Sep 17 00:00:00 2001 From: Anton Bakker Date: Tue, 7 Mar 2023 13:38:02 +0100 Subject: [PATCH] make duplicate filter deterministic by sorting on title before filtering --- ngr_spider/csw_client.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ngr_spider/csw_client.py b/ngr_spider/csw_client.py index e3c5f79..144be94 100644 --- a/ngr_spider/csw_client.py +++ b/ngr_spider/csw_client.py @@ -15,9 +15,10 @@ def __init__(self, csw_url): def _filter_service_records( self, records: list[CswServiceRecord] ) -> list[CswServiceRecord]: - filtered_records = filter( - lambda x: x.service_url != "", records - ) # filter out results without serviceurl + records.sort(key=lambda x: x.title, reverse=True) + filtered_records = filter(lambda x: x.service_url != "", records) + + # filter out results without serviceurl # delete duplicate service entries, some service endpoint have multiple service records # so last record in get_record_results will be retained in case of duplicate # since it will be inserted in new_dict last @@ -66,8 +67,10 @@ def _get_csw_records_by_protocol( ) -> list[CswServiceRecord]: protocol_key = "protocol" - if protocol == OAT_PROTOCOL: # required since NGR does not support OGC API TILES as a seperate protocol - protocol_key= "anyText" + if ( + protocol == OAT_PROTOCOL + ): # required since NGR does not support OGC API TILES as a seperate protocol + protocol_key = "anyText" query = f"type='service' AND organisationName='{svc_owner}' AND {protocol_key}='{protocol}'" records = self._get_csw_records(query, max_results, no_filter)