From 1aa9f6914c0460566d722f3f343e1e1ece8e6f94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20Michael=20O=2E=20Hegg=C3=B8?= Date: Thu, 6 Jul 2017 20:02:22 +0200 Subject: [PATCH] [#32] Allow repeated $4 and $0 in 7XX --- README.rst | 18 +++++---- examples/noubomn-c000011.ttl | 1 + examples/noubomn-c000011.xml | 6 +-- mc2skos/record.py | 75 +++++++++++++++++++----------------- 4 files changed, 53 insertions(+), 47 deletions(-) diff --git a/README.rst b/README.rst index f424f6f..cc6b6e1 100644 --- a/README.rst +++ b/README.rst @@ -273,11 +273,13 @@ Notes: The relationship type is ``skos:broader`` if ``$w=g``, ``skos:narrower`` if ``$w=h``, and ``skos:related`` otherwise. If ``$w=r`` and ``$4`` contains an URI, that URI is used as the relationship type. - -* Mappings/relationships are generated for 7XX headings if the fields contain a ``$0`` - subfield containing either the control number or the URI of the related record. - If ``$0`` contains a control number, an URI pattern for the vocabulary - (found in indicator 2 or ``$2``) must be defined in mc2skos.record.CONFIG. - If ``$4`` contains an URI, that URI is used as the relationship type. - Otherwise, if ``$4`` contains one of the ISO 25964 relations, the corresponding - SKOS relation is used. Otherwise, the default value ``skos:closeMatch`` is used. + Note that ``$4`` must precede ``$0`` (since both subfields can be repeated). + +* Mappings/relationships are generated for 7XX headings if the fields contain a ``$0`` + subfield containing either the control number or the URI of the related record. + If ``$0`` contains a control number, an URI pattern for the vocabulary + (found in indicator 2 or ``$2``) must be defined in mc2skos.record.CONFIG. + If ``$4`` contains an URI, that URI is used as the relationship type. + Otherwise, if ``$4`` contains one of the ISO 25964 relations, the corresponding + SKOS relation is used. Otherwise, the default value ``skos:closeMatch`` is used. + Note that ``$4`` must precede ``$0`` (since both subfields can be repeated). diff --git a/examples/noubomn-c000011.ttl b/examples/noubomn-c000011.ttl index c21269d..cebbe29 100644 --- a/examples/noubomn-c000011.ttl +++ b/examples/noubomn-c000011.ttl @@ -13,6 +13,7 @@ owl:deprecated true ; skos:closeMatch ; skos:exactMatch , + , ; skos:inScheme ; skos:prefLabel "Mugg"@nb . diff --git a/examples/noubomn-c000011.xml b/examples/noubomn-c000011.xml index 1a1d3eb..7fa7987 100644 --- a/examples/noubomn-c000011.xml +++ b/examples/noubomn-c000011.xml @@ -19,14 +19,14 @@ Muggsopp + =EQ (No-TrBIB)HUME08221 humord - =EQ - muggsopp - http://www.wikidata.org/entity/Q159341 http://www.w3.org/2004/02/skos/core#exactMatch + http://www.wikidata.org/entity/Q159341 + http://dbpedia.org/page/Mold Molds (Fungi) diff --git a/mc2skos/record.py b/mc2skos/record.py index 0eee494..86d2bab 100644 --- a/mc2skos/record.py +++ b/mc2skos/record.py @@ -758,40 +758,43 @@ def parse(self, options): # 7XX: Heading Linking Entries for heading in self.get_terms('7'): - sf_4 = heading['node'].text('mx:subfield[@code="4"]') - sf_0 = heading['node'].text('mx:subfield[@code="0"]') + relation = None + for sf in heading['node'].all('mx:subfield'): + if sf.get('code') == '4': + if is_uri(sf.text()): + relation = URIRef(sf.text()) + else: + relation = { + '=EQ': SKOS.exactMatch, + '~EQ': SKOS.closeMatch, + 'BM': SKOS.broadMatch, + 'NM': SKOS.narrowMatch, + 'RM': SKOS.relatedMatch, + }.get(sf.text()) # None if no match + + elif sf.get('code') == '0': + # Note: Default value might change in the future + relation = relation if relation else SKOS.closeMatch + + if is_uri(sf.text()): + self.relations.append({ + 'uri': sf.text(), + 'relation': relation, + }) + else: + scheme_code = { + '0': 'a', # Library of Congress Subject Headings + '1': 'b', # LC subject headings for children's literature + '2': 'c', # Medical Subject Headings + '3': 'd', # National Agricultural Library subject authority file + '4': 'n', # Source not specified + '5': 'k', # Canadian Subject Headings + '6': 'v', # Répertoire de vedettes-matière + '7': heading['node'].text('mx:subfield[@code="2"]'), # Source specified in subfield $2 + }.get(heading['node'].get('ind2')) - if sf_4 is not None and is_uri(sf_4): - relation = URIRef(sf_4) - else: - relation = { - '=EQ': SKOS.exactMatch, - '~EQ': SKOS.closeMatch, - 'BM': SKOS.broadMatch, - 'NM': SKOS.narrowMatch, - 'RM': SKOS.relatedMatch, - }.get(sf_4) - - relation = relation or SKOS.closeMatch # default - if is_uri(sf_0): - self.relations.append({ - 'uri': sf_0, - 'relation': relation, - }) - else: - scheme_code = { - '0': 'a', # Library of Congress Subject Headings - '1': 'b', # LC subject headings for children's literature - '2': 'c', # Medical Subject Headings - '3': 'd', # National Agricultural Library subject authority file - '4': 'n', # Source not specified - '5': 'k', # Canadian Subject Headings - '6': 'v', # Répertoire de vedettes-matière - '7': heading['node'].text('mx:subfield[@code="2"]'), # Source specified in subfield $2 - }.get(heading['node'].get('ind2')) - - self.append_relation( - ConceptScheme(scheme_code, AuthorityRecord), - relation, - control_number=sf_0 - ) + self.append_relation( + ConceptScheme(scheme_code, AuthorityRecord), + relation, + control_number=sf.text() + )