Skip to content

Commit

Permalink
fix: skip custom subjects that add nothing
Browse files Browse the repository at this point in the history
if a custom subject has the exact name of its bepress synonym, do not
bother including both in metadata records (they result in wrong-looking
duplicates in the search interface and add no value to the metadata)
  • Loading branch information
aaxelb committed Aug 24, 2023
1 parent adaaf54 commit a83d506
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 21 deletions.
33 changes: 18 additions & 15 deletions osf/metadata/osf_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,21 +523,24 @@ def gather_subjects(focus):


def _subject_triples(dbsubject, *, child_ref=None, related_ref=None):
_subject_ref = rdflib.URIRef(dbsubject.absolute_api_v2_subject_url)
yield (DCTERMS.subject, _subject_ref)
yield (_subject_ref, RDF.type, SKOS.Concept)
yield (_subject_ref, SKOS.prefLabel, dbsubject.text)
yield from _subject_scheme_triples(dbsubject, _subject_ref)
if dbsubject.text != dbsubject.bepress_text:
yield (_subject_ref, SKOS.altLabel, dbsubject.bepress_text)
if child_ref is not None:
yield (child_ref, SKOS.broader, _subject_ref)
if related_ref is not None:
yield (related_ref, SKOS.related, _subject_ref)
if dbsubject.bepress_subject and (dbsubject != dbsubject.bepress_subject):
yield from _subject_triples(dbsubject.bepress_subject, related_ref=_subject_ref)
if dbsubject.parent and (dbsubject != dbsubject.parent):
yield from _subject_triples(dbsubject.parent, child_ref=_subject_ref)
_is_bepress = (not dbsubject.bepress_subject)
_is_distinct_from_bepress = (dbsubject.text != dbsubject.bepress_text)
if _is_bepress or _is_distinct_from_bepress:
_subject_ref = rdflib.URIRef(dbsubject.absolute_api_v2_subject_url)
yield (DCTERMS.subject, _subject_ref)
yield (_subject_ref, RDF.type, SKOS.Concept)
yield (_subject_ref, SKOS.prefLabel, dbsubject.text)
yield from _subject_scheme_triples(dbsubject, _subject_ref)
if _is_distinct_from_bepress:
yield from _subject_triples(dbsubject.bepress_subject, related_ref=_subject_ref)
if child_ref is not None:
yield (child_ref, SKOS.broader, _subject_ref)
if related_ref is not None:
yield (related_ref, SKOS.related, _subject_ref)
if dbsubject.parent and (dbsubject != dbsubject.parent):
yield from _subject_triples(dbsubject.parent, child_ref=_subject_ref)
else: # if the custom subject adds nothing of value, just include the bepress subject
yield from _subject_triples(dbsubject.bepress_subject, child_ref=child_ref, related_ref=related_ref)


def _subject_scheme_triples(dbsubject, subject_ref):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
owl:sameAs <https://doi.org/10.70102/FK2osf.io/w2ibb> .

<http://localhost:8000/v2/subjects/subjwobb/> a skos:Concept ;
skos:altLabel "wobbble" ;
skos:broader <http://localhost:8000/v2/subjects/subjwibb/> ;
skos:inScheme <http://localhost:8000/v2/providers/preprints/preprovi/subjects/> ;
skos:prefLabel "wobble" ;
Expand All @@ -77,7 +76,6 @@
dcterms:title "preprovi" .

<http://localhost:8000/v2/subjects/subjwibb/> a skos:Concept ;
skos:altLabel "wibbble" ;
skos:inScheme <http://localhost:8000/v2/providers/preprints/preprovi/subjects/> ;
skos:prefLabel "wibble" ;
skos:related <http://localhost:8000/v2/subjects/subjwibbb/> .
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
osf:hasFunding <https://moneypockets.example/millions> .

<http://localhost:8000/v2/subjects/subjwobb/> a skos:Concept ;
skos:altLabel "wobbble" ;
skos:broader <http://localhost:8000/v2/subjects/subjwibb/> ;
skos:inScheme <http://localhost:8000/v2/providers/preprints/preprovi/subjects/> ;
skos:prefLabel "wobble" ;
Expand Down Expand Up @@ -91,7 +90,6 @@
dcterms:title "preprovi" .

<http://localhost:8000/v2/subjects/subjwibb/> a skos:Concept ;
skos:altLabel "wibbble" ;
skos:inScheme <http://localhost:8000/v2/providers/preprints/preprovi/subjects/> ;
skos:prefLabel "wibble" ;
skos:related <http://localhost:8000/v2/subjects/subjwibbb/> .
Expand Down
2 changes: 0 additions & 2 deletions osf_tests/metadata/test_osf_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,6 @@ def test_gather_subjects(self):
(_child_iri, SKOS.prefLabel, Literal('Child')),
(_customparent_iri, SKOS.prefLabel, Literal('Custom-parent')),
(_customchild_iri, SKOS.prefLabel, Literal('Custom-child')),
(_customparent_iri, SKOS.altLabel, Literal('Parent')),
(_customchild_iri, SKOS.altLabel, Literal('Child')),
(_child_iri, SKOS.broader, _parent_iri),
(_customchild_iri, SKOS.broader, _customparent_iri),
(_customchild_iri, SKOS.related, _child_iri),
Expand Down

0 comments on commit a83d506

Please sign in to comment.