diff --git a/.github/workflows/pull_reqest.yml b/.github/workflows/pull_reqest.yml index c47817d..df72641 100644 --- a/.github/workflows/pull_reqest.yml +++ b/.github/workflows/pull_reqest.yml @@ -12,7 +12,7 @@ jobs: - name: Build the stack run: docker compose -f "docker-compose.yml" up -d - name: Run Tests - run: docker compose -f "docker-compose.yml" run web ./bin/ci.sh + run: docker compose -f "docker-compose.yml" exec web ./bin/ci.sh - name: Stop containers if: always() run: docker compose -f "docker-compose.yml" down diff --git a/tests/unit/test_catalogs.py b/tests/unit/test_catalogs.py index 1cc8e2a..3765ce9 100644 --- a/tests/unit/test_catalogs.py +++ b/tests/unit/test_catalogs.py @@ -1,5 +1,6 @@ """Unit tests for the OAI-PMH DataCite Catalog implementation""" +import pytest from viringo import catalogs def test_set_to_search_query(): @@ -62,9 +63,7 @@ def test_identifier_to_string(): 'type': 'DOI', 'identifier': "10.5072/1234" } - identifier_string = catalogs.identifier_to_string(identifier_type) - assert identifier_string == "doi:10.5072/1234" def test_numeric_identifier_to_string(): @@ -74,7 +73,28 @@ def test_numeric_identifier_to_string(): 'type': 'ISBN', 'identifier': 9783851254679 } - identifier_string = catalogs.identifier_to_string(identifier_type) - assert identifier_string == "isbn:9783851254679" + +def test_none_identifier_to_string(): + """Tests for converting an ISBN numeric identifier to single string""" + + identifier_type = { + 'type': "ABC", + 'identifier': None + } + with pytest.raises(catalogs.InvalidIdentifierException) as exc: + identifier_string = catalogs.identifier_to_string(identifier_type) + assert exc.type == catalogs.InvalidIdentifierException + +def test_none_type_identifier_to_string(): + """Tests for converting an ISBN numeric identifier to single string""" + + identifier_type = { + 'type': None, + 'identifier': 9783851254679 + } + with pytest.raises(catalogs.InvalidIdentifierException) as exc: + identifier_string = catalogs.identifier_to_string(identifier_type) + assert exc.type == catalogs.InvalidIdentifierException + diff --git a/tests/unit/test_service_datacite.py b/tests/unit/test_service_datacite.py index 74b4ed0..49700bf 100644 --- a/tests/unit/test_service_datacite.py +++ b/tests/unit/test_service_datacite.py @@ -13,3 +13,41 @@ def test_strip_uri_prefix(): identifier = datacite.strip_uri_prefix("") assert identifier == "" + +def test_identifiers(): + SAMPLE_ATTRIBUTES = { + 'identifiers': [ + { + 'identifierType': 'ISNI', + 'identifier': 9783851254679 + }, + ] + } + + assert datacite.identifiers_from_attributes( SAMPLE_ATTRIBUTES ) == [ + { + 'type': 'ISNI', + 'identifier': '9783851254679' + } + ] + +def test_skip_invlaid_identifiers(): + INVALID_ATTRIBUTES = { + 'identifiers': [ + { + 'identifierType': 'ISBN', + 'identifier': "XXXXX" + }, + { + 'identifierType': None, + 'identifier': 9783851254679 + }, + ] + } + + assert datacite.identifiers_from_attributes( INVALID_ATTRIBUTES ) == [ + { + 'type': 'ISBN', + 'identifier': 'XXXXX' + } + ] diff --git a/viringo/catalogs.py b/viringo/catalogs.py index 514f032..5afb68f 100644 --- a/viringo/catalogs.py +++ b/viringo/catalogs.py @@ -14,6 +14,10 @@ from .services import datacite +class InvalidIdentifierException(Exception): + pass + + class DataCiteOAIServer(): """Build OAI-PMH data responses for DataCite metadata catalog""" @@ -253,12 +257,15 @@ def build_metadata_map(self, result): rights.append(right['uri']) identifiers = [ - identifier_to_string(identifier) for identifier in result.identifiers + identifier_to_string(identifier) + for identifier in result.identifiers + if 'type' in identifier ] relations = [ identifier_to_string(relation) for relation in result.relations + if 'type' in relation ] contributors = [ @@ -328,5 +335,9 @@ def set_to_provider_client(unparsed_set): def identifier_to_string(identifier): """Take an identifier and return in a formatted in single string""" _id = identifier.get('identifier') - _type = identifier.get('type', '') + _type = identifier.get('type','') + if not _type: + raise InvalidIdentifierException("Missing 'type' information") + if not _id: + raise InvalidIdentifierException("Missing 'id' information") return _type.lower() + ":" + str(_id) diff --git a/viringo/services/datacite.py b/viringo/services/datacite.py index 98d4b4e..aa88678 100644 --- a/viringo/services/datacite.py +++ b/viringo/services/datacite.py @@ -114,12 +114,10 @@ def build_metadata(data): type_ = date.get('dateType') value = date.get('date') if bool(type_) and bool(value) : - result.dates.append( - { - 'type': type_, - 'date': value - } - ) + result.dates.append({ + 'type': type_, + 'date': value + }) result.contributors = attributes.get('contributors',[]) result.funding_references = attributes.get('fundingReferences',[]) @@ -133,37 +131,14 @@ def build_metadata(data): if attributes['types'].get('resourceType') is not None else [] result.formats = attributes.get('formats', []) - - result.identifiers = [] - - # handle missing identifiers attribute - for identifier in attributes.get('identifiers',[]): - if identifier['identifier']: - # Special handling for the fact the API could return bad identifier - # as a list rather than a string - if isinstance(identifier['identifier'], list): - identifier['identifier'] = ','.join( - identifier['identifier']) - - result.identifiers.append({ - 'type': identifier['identifierType'], - 'identifier': strip_uri_prefix(identifier['identifier']) - }) - + result.identifiers = identifiers_from_attributes(attributes) result.language = attributes.get('language', '') - - result.relations = [] - for related in attributes.get('relatedIdentifiers',[]): - if 'relatedIdentifier' in related: - result.relations.append({ - 'type': related['relatedIdentifierType'], - 'identifier': related['relatedIdentifier'] - }) + result.relations = relations_from_attributes(attributes) result.rights = [ { - 'statement': right.get('rights', None), - 'uri': right.get('rightsUri', None) + 'statement': right.get('rights'), + 'uri': right.get('rightsUri') } for right in attributes.get('rightsList', []) ] @@ -181,6 +156,33 @@ def build_metadata(data): return result +def relations_from_attributes(attributes): + return identifiers_from_attributes(attributes, + list_key='relatedIdentifiers', + type_key='relatedIdentifierType', + value_key='relatedIdentifier') + +def identifiers_from_attributes(attributes, + list_key='identifiers', + type_key='identifierType', + value_key='identifier'): + identifiers = [] + for identifier in attributes.get(list_key,[]): + type_ = identifier.get(type_key) + value = identifier.get(value_key) + if bool(type_) and bool(value) : + if isinstance(value, list): + value = ','.join( value ) + if value_key == 'identifier': + value = strip_uri_prefix(value) + identifiers.append({ + 'type': type_, + 'identifier': value + }) + return identifiers + + + def strip_uri_prefix(identifier): """Strip common prefixes because OAI doesn't work with those kind of ID's"""