From e6773cb68162e62d90eae5331103afdf72421cf2 Mon Sep 17 00:00:00 2001 From: Abram Booth Date: Wed, 30 Aug 2023 16:27:04 -0400 Subject: [PATCH] fix: infer funderIdentifierType from identifier --- osf/metadata/osf_gathering.py | 1 - osf/metadata/rdfutils.py | 1 + .../serializers/datacite/datacite_tree_walker.py | 15 +++++++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/osf/metadata/osf_gathering.py b/osf/metadata/osf_gathering.py index 60d8a6210602..11e60b5c0156 100644 --- a/osf/metadata/osf_gathering.py +++ b/osf/metadata/osf_gathering.py @@ -839,7 +839,6 @@ def gather_funding(focus): yield (_funder_ref, RDF.type, DCTERMS.Agent) yield (_funder_ref, DCTERMS.identifier, _funder_uri) yield (_funder_ref, FOAF.name, _funder_name) - yield (_funder_ref, OSF.funderIdentifierType, _funding.get('funder_identifier_type')) _award_uri = _funding.get('award_uri') _award_title = _funding.get('award_title') _award_number = _funding.get('award_number') diff --git a/osf/metadata/rdfutils.py b/osf/metadata/rdfutils.py index 259efdf08249..6dbe7039c5d4 100644 --- a/osf/metadata/rdfutils.py +++ b/osf/metadata/rdfutils.py @@ -12,6 +12,7 @@ OSFIO = rdflib.Namespace(website_settings.DOMAIN) # external pid namespaces: DOI = rdflib.Namespace('https://doi.org/') +DxDOI = rdflib.Namespace('http://dx.doi.org/') # "earlier but no longer preferred" DOI namespace ORCID = rdflib.Namespace('https://orcid.org/') ROR = rdflib.Namespace('https://ror.org/') # external terminology namespaces: diff --git a/osf/metadata/serializers/datacite/datacite_tree_walker.py b/osf/metadata/serializers/datacite/datacite_tree_walker.py index a00911fc1d0d..08990d466a9e 100644 --- a/osf/metadata/serializers/datacite/datacite_tree_walker.py +++ b/osf/metadata/serializers/datacite/datacite_tree_walker.py @@ -12,6 +12,7 @@ RDF, DCTERMS, DOI, + DxDOI, FOAF, ORCID, OSF, @@ -154,6 +155,8 @@ def _visit_creators(self, parent_el, focus_iri): def _identifier_type_and_value(self, identifier: str): if identifier.startswith(DOI): return ('DOI', without_namespace(identifier, DOI)) + elif identifier.startswith(DxDOI): + return ('DOI', without_namespace(identifier, DxDOI)) elif identifier.startswith(ROR): return ('ROR', identifier) # ROR keeps the full IRI elif identifier.startswith(ORCID): @@ -162,6 +165,13 @@ def _identifier_type_and_value(self, identifier: str): return ('URL', identifier) logger.warning('skipping non-IRI-shaped identifier "%s"', identifier) + def _funder_identifier_type(self, identifier: str): + if identifier.startswith(DxDOI) or identifier.startswith(DOI): + return 'Crossref Funder ID' + if identifier.startswith(ROR): + return 'ROR' + return 'Other' + def _get_name_type(self, agent_iri): if (agent_iri, RDF.type, FOAF.Person) in self.basket: return 'Personal' @@ -261,12 +271,13 @@ def _visit_funding_references(self, parent_el): for _funder in self.basket[OSF.funder]: fundref_el = self.visit(fundrefs_el, 'fundingReference') self.visit(fundref_el, 'funderName', text=next(self.basket[_funder:FOAF.name], '')) + funder_identifier = next(self.basket[_funder:DCTERMS.identifier], '') self.visit( fundref_el, 'funderIdentifier', - text=next(self.basket[_funder:DCTERMS.identifier], ''), + text=funder_identifier, attrib={ - 'funderIdentifierType': next(self.basket[_funder:OSF.funderIdentifierType], ''), + 'funderIdentifierType': self._funder_identifier_type(funder_identifier), }, ) for _funding_award in self.basket[OSF.hasFunding]: