From 34041ef8e6d3a6c3994b2e85a4e24bf1fd207535 Mon Sep 17 00:00:00 2001 From: torleifg Date: Tue, 14 Jan 2025 13:28:07 +0100 Subject: [PATCH] Mapping of subject --- .../fdk/concept_catalog/rdf/SkosApNoImport.kt | 69 +++++---- .../rdf/SkosApNoImportTests.kt | 131 +++++++++++------- src/test/resources/import_concept.ttl | 17 ++- 3 files changed, 131 insertions(+), 86 deletions(-) diff --git a/src/main/kotlin/no/fdk/concept_catalog/rdf/SkosApNoImport.kt b/src/main/kotlin/no/fdk/concept_catalog/rdf/SkosApNoImport.kt index ce4e4fb..a0b5a99 100644 --- a/src/main/kotlin/no/fdk/concept_catalog/rdf/SkosApNoImport.kt +++ b/src/main/kotlin/no/fdk/concept_catalog/rdf/SkosApNoImport.kt @@ -1,6 +1,7 @@ package no.fdk.concept_catalog.rdf import no.fdk.concept_catalog.model.* +import no.fdk.concept_catalog.service.isValidURI import org.apache.jena.rdf.model.* import org.apache.jena.vocabulary.DCTerms import org.apache.jena.vocabulary.OWL @@ -25,7 +26,9 @@ fun Model.extractBegreper(catalogId: String): List { definisjonForAllmennheten = it.extractDefinisjonForAllmennheten(), definisjonForSpesialister = it.extractDefinisjonForSpesialister(), merknad = it.extractMerknad(), - eksempel = it.extractEksempel() + eksempel = it.extractEksempel(), + fagområde = it.extractFagområde(), + fagområdeKoder = it.extractFagområdeKoder() ) } @@ -35,7 +38,7 @@ fun Model.extractBegreper(catalogId: String): List { fun Resource.extractVersjonr(): SemVer? { return this.getProperty(OWL.versionInfo) ?.let { it.`object`.asLiteralOrNull()?.string } - ?.takeIf { it.isNotBlank() and SEM_VAR_REGEX.matches(it) } + ?.takeIf { it.isNotBlank() && SEM_VAR_REGEX.matches(it) } ?.let { SEM_VAR_REGEX.matchEntire(it)?.destructured?.let { (major, minor, patch) -> SemVer(major.toInt(), minor.toInt(), patch.toInt()) @@ -45,30 +48,20 @@ fun Resource.extractVersjonr(): SemVer? { fun Resource.extractStatusUri(): String? { return this.getProperty(EUVOC.status) - ?.let { it.`object`.asResourceOrNull()?.uri } + ?.let { it.`object`.asResourceUriOrNull()?.uri } } fun Resource.extractAnbefaltTerm(): Term? { - return extractLocalizesStrings(SKOS.prefLabel) + return extractLocalizedStrings(SKOS.prefLabel) ?.let { Term(it) } } fun Resource.extractTillattTerm(): Map>? { - return extractTerm(SKOS.altLabel) + return extractLocalizedStringsAsGrouping(SKOS.altLabel) } fun Resource.extractFrarådetTerm(): Map>? { - return extractTerm(SKOS.hiddenLabel) -} - -private fun Resource.extractTerm(property: Property): Map>? { - return this.listProperties(property) - .toList() - .mapNotNull { it.`object`.asLiteralOrNull() } - .filter { it.language.isNotBlank() and it.string.isNotBlank() } - .groupBy { it.language } - .mapValues { (_, literals) -> literals.map { it.string } } - .takeIf { it.isNotEmpty() } + return extractLocalizedStringsAsGrouping(SKOS.hiddenLabel) } fun Resource.extractDefinisjon(): Definisjon? { @@ -86,7 +79,7 @@ fun Resource.extractDefinisjonForAllmennheten(): Definisjon? { .filter { it.getProperty(DCTerms.audience) ?.`object` - ?.asResourceOrNull() + ?.asResourceUriOrNull() ?.hasURI(AUDIENCE_TYPE.public.uri) == true } .firstNotNullOfOrNull { it.extractDefinition() } @@ -99,24 +92,36 @@ fun Resource.extractDefinisjonForSpesialister(): Definisjon? { .filter { it.getProperty(DCTerms.audience) ?.`object` - ?.asResourceOrNull() + ?.asResourceUriOrNull() ?.hasURI(AUDIENCE_TYPE.specialist.uri) == true } .firstNotNullOfOrNull { it.extractDefinition() } } fun Resource.extractMerknad(): Map? { - return extractLocalizesStrings(SKOS.scopeNote) + return extractLocalizedStrings(SKOS.scopeNote) } fun Resource.extractEksempel(): Map? { - return extractLocalizesStrings(SKOS.example) + return extractLocalizedStrings(SKOS.example) +} + +fun Resource.extractFagområde(): Map>? { + return extractLocalizedStringsAsGrouping(DCTerms.subject) +} + +fun Resource.extractFagområdeKoder(): List? { + return this.listProperties(DCTerms.subject) + .toList() + .mapNotNull { it.`object`.asResourceUriOrNull() } + .map { it.toString() } + .takeIf { it.isNotEmpty() } } private fun Resource.extractDefinition(): Definisjon? { val relationshipWithSource: ForholdTilKildeEnum? = this.getProperty(SKOSNO.relationshipWithSource) ?.let { statement -> - statement.`object`.asResourceOrNull()?.let { + statement.`object`.asResourceUriOrNull()?.let { when { it.hasURI(RELATIONSHIP.selfComposed.uri) -> ForholdTilKildeEnum.EGENDEFINERT it.hasURI(RELATIONSHIP.directFromSource.uri) -> ForholdTilKildeEnum.BASERTPAAKILDE @@ -132,7 +137,7 @@ private fun Resource.extractDefinition(): Definisjon? { statement.`object`.let { obj -> when { obj.isLiteral -> URITekst(tekst = obj.asLiteralOrNull()?.string) - obj.isResource -> URITekst(uri = obj.asResourceOrNull()?.uri) + obj.isURIResource -> URITekst(uri = obj.asResourceUriOrNull()?.uri) else -> null } } @@ -143,16 +148,26 @@ private fun Resource.extractDefinition(): Definisjon? { Kildebeskrivelse(forholdTilKilde = relationshipWithSource, kilde = source) } - val value: Map? = this.extractLocalizesStrings(RDF.value) + val value: Map? = this.extractLocalizedStrings(RDF.value) return value?.let { Definisjon(tekst = it, kildebeskrivelse = sourceDescription) } } -private fun Resource.extractLocalizesStrings(property: Property): Map? { +private fun Resource.extractLocalizedStringsAsGrouping(property: Property): Map>? { + return this.listProperties(property) + .toList() + .mapNotNull { it.`object`.asLiteralOrNull() } + .filter { it.language.isNotBlank() && it.string.isNotBlank() } + .groupBy { it.language } + .mapValues { (_, literals) -> literals.map { it.string } } + .takeIf { it.isNotEmpty() } +} + +private fun Resource.extractLocalizedStrings(property: Property): Map? { return this.listProperties(property) .toList() .mapNotNull { it.`object`.asLiteralOrNull() } - .filter { it.language.isNotBlank() and it.string.isNotBlank() } + .filter { it.language.isNotBlank() && it.string.isNotBlank() } .associate { it.language to it.string } .takeIf { it.isNotEmpty() } } @@ -164,3 +179,7 @@ private fun RDFNode.asLiteralOrNull(): Literal? { private fun RDFNode.asResourceOrNull(): Resource? { return if (this.isResource) this.asResource() else null } + +private fun RDFNode.asResourceUriOrNull(): Resource? { + return if (this.isURIResource && this.asResource().uri.isValidURI()) this.asResource() else null +} diff --git a/src/test/kotlin/no/fdk/concept_catalog/rdf/SkosApNoImportTests.kt b/src/test/kotlin/no/fdk/concept_catalog/rdf/SkosApNoImportTests.kt index 6945ee0..871cf5d 100644 --- a/src/test/kotlin/no/fdk/concept_catalog/rdf/SkosApNoImportTests.kt +++ b/src/test/kotlin/no/fdk/concept_catalog/rdf/SkosApNoImportTests.kt @@ -6,6 +6,7 @@ import no.fdk.concept_catalog.model.URITekst import org.apache.jena.rdf.model.Model import org.apache.jena.rdf.model.ModelFactory import org.apache.jena.riot.Lang +import org.apache.jena.vocabulary.DCTerms import org.apache.jena.vocabulary.OWL import org.apache.jena.vocabulary.SKOS import org.junit.jupiter.api.Assertions.* @@ -30,15 +31,17 @@ class SkosApNoImportTests { assertNotNull(concept.statusURI) assertNotNull(concept.anbefaltTerm) - concept.tillattTerm?.let { assertFalse(it.isEmpty()) } - concept.frarådetTerm?.let { assertFalse(it.isEmpty()) } + assertFalse(concept.tillattTerm!!.isEmpty()) + assertFalse(concept.frarådetTerm!!.isEmpty()) assertNotNull(concept.definisjon) assertNotNull(concept.definisjonForAllmennheten) assertNotNull(concept.definisjonForSpesialister) - concept.merknad?.let { assertFalse(it.isEmpty()) } - concept.eksempel?.let { assertFalse(it.isEmpty()) } + assertFalse(concept.merknad!!.isEmpty()) + assertFalse(concept.eksempel!!.isEmpty()) + assertFalse(concept.fagområde!!.isEmpty()) + assertFalse(concept.fagområdeKoder!!.isEmpty()) } } @@ -76,12 +79,12 @@ class SkosApNoImportTests { assertEquals(1, terms.size) - terms.first()?.navn?.let { localizedTerms -> - assertEquals(2, localizedTerms.size) - assertTrue(localizedTerms.containsKey("nb")) - assertEquals("anbefaltTerm", localizedTerms["nb"]) - assertTrue(localizedTerms.containsKey("en")) - assertEquals("recommendedTerm", localizedTerms["en"]) + terms.first()!!.navn.let { + assertEquals(2, it.size) + assertTrue(it.containsKey("nb")) + assertEquals("anbefaltTerm", it["nb"]) + assertTrue(it.containsKey("en")) + assertEquals("recommendedTerm", it["en"]) } } @@ -95,9 +98,9 @@ class SkosApNoImportTests { assertEquals(1, terms.size) - terms.first()?.let { localizedTerms -> - assertTrue(localizedTerms.containsKey("nn")) - assertEquals(localizedTerms.getValue("nn").toSet(), setOf("tillattTerm", "tillattTerm2")) + terms.first().let { + assertTrue(it!!.containsKey("nn")) + assertEquals(it.getValue("nn").toSet(), setOf("tillattTerm", "tillattTerm2")) } } @@ -111,9 +114,9 @@ class SkosApNoImportTests { assertEquals(1, terms.size) - terms.first()?.let { localizedTerms -> - assertTrue(localizedTerms.containsKey("nb")) - assertEquals(localizedTerms.getValue("nb").toSet(), setOf("fraraadetTerm", "fraraadetTerm2", "Lorem ipsum")) + terms.first().let { + assertTrue(it!!.containsKey("nb")) + assertEquals(it.getValue("nb").toSet(), setOf("fraraadetTerm", "fraraadetTerm2", "Lorem ipsum")) } } @@ -127,20 +130,20 @@ class SkosApNoImportTests { assertEquals(1, definitions.size) - definitions.first()?.let { - it.tekst?.let { text -> - assertEquals(2, text.size) + definitions.first().let { + it!!.tekst.let { text -> + assertEquals(2, text!!.size) assertTrue(text.containsKey("nb")) assertEquals("definisjon", text["nb"]) assertTrue(text.containsKey("nb")) assertEquals("definition", text["en"]) } - it.kildebeskrivelse?.let { sourceDescription -> - assertEquals(ForholdTilKildeEnum.EGENDEFINERT, sourceDescription.forholdTilKilde) + it.kildebeskrivelse.let { sourceDescription -> + assertEquals(ForholdTilKildeEnum.EGENDEFINERT, sourceDescription!!.forholdTilKilde) - sourceDescription.kilde?.let { source -> - assertEquals(2, source.size) + sourceDescription.kilde.let { source -> + assertEquals(2, source!!.size) assertEquals(URITekst(tekst = "kap14"), source.first()) assertEquals( URITekst(uri = "https://lovdata.no/dokument/NL/lov/1997-02-28-19/kap14#kap14"), @@ -161,16 +164,14 @@ class SkosApNoImportTests { assertEquals(1, definitions.size) - definitions.first()?.let { - it.tekst?.let { text -> - assertEquals(1, text.size) + definitions.first().let { + it!!.tekst.let { text -> + assertEquals(1, text!!.size) assertTrue(text.containsKey("nb")) assertEquals("definisjon for allmennheten", text["nb"]) } - it.kildebeskrivelse?.let { sourceDescription -> - assertEquals(ForholdTilKildeEnum.SITATFRAKILDE, sourceDescription.forholdTilKilde) - } + assertEquals(ForholdTilKildeEnum.SITATFRAKILDE, it.kildebeskrivelse!!.forholdTilKilde) } } @@ -184,16 +185,14 @@ class SkosApNoImportTests { assertEquals(1, definitions.size) - definitions.first()?.let { - it.tekst?.let { text -> - assertEquals(1, text.size) + definitions.first().let { + it!!.tekst.let { text -> + assertEquals(1, text!!.size) assertTrue(text.containsKey("nb")) assertEquals("definisjon for spesialister", text["nb"]) } - it.kildebeskrivelse?.let { sourceDescription -> - assertEquals(ForholdTilKildeEnum.BASERTPAAKILDE, sourceDescription.forholdTilKilde) - } + assertEquals(ForholdTilKildeEnum.BASERTPAAKILDE, it.kildebeskrivelse!!.forholdTilKilde) } } @@ -207,12 +206,12 @@ class SkosApNoImportTests { assertEquals(1, notes.size) - notes.first()?.let { localizedNote -> - assertEquals(2, localizedNote.size) - assertTrue(localizedNote.containsKey("nb")) - assertEquals("merknad", localizedNote["nb"]) - assertTrue(localizedNote.containsKey("nn")) - assertEquals("merknad", localizedNote["nn"]) + notes.first().let { + assertEquals(2, it!!.size) + assertTrue(it.containsKey("nb")) + assertEquals("merknad", it["nb"]) + assertTrue(it.containsKey("nn")) + assertEquals("merknad", it["nn"]) } } @@ -220,27 +219,55 @@ class SkosApNoImportTests { fun `should extract eksempel`() { val model = readModel("import_concept.ttl") - val notes = model.listResourcesWithProperty(SKOS.example) + val examples = model.listResourcesWithProperty(SKOS.example) .toList() .map { it.extractEksempel() } - assertEquals(1, notes.size) + assertEquals(1, examples.size) + + examples.first().let { + assertEquals(2, it!!.size) + assertTrue(it.containsKey("nb")) + assertEquals("eksempel", it["nb"]) + assertTrue(it.containsKey("nn")) + assertEquals("eksempel", it["nn"]) + } + } + + @Test + fun `should extract fagområde`() { + val model = readModel("import_concept.ttl") + + val subjects = model.listResourcesWithProperty(DCTerms.subject) + .toList() + .map { it.extractFagområde() } + + assertEquals(1, subjects.size) - notes.first()?.let { localizedNote -> - assertEquals(2, localizedNote.size) - assertTrue(localizedNote.containsKey("nb")) - assertEquals("eksempel", localizedNote["nb"]) - assertTrue(localizedNote.containsKey("nn")) - assertEquals("eksempel", localizedNote["nn"]) + subjects.first().let { + assertTrue(it!!.containsKey("nb")) + assertEquals(it.getValue("nb").toSet(), setOf("fagområde")) } } + @Test + fun `should extract fagområdeKoder`() { + val model = readModel("import_concept.ttl") + + val subjects = model.listResourcesWithProperty(DCTerms.subject) + .toList() + .map { it.extractFagområdeKoder() } + + assertEquals(1, subjects.size) + + assertTrue(subjects.first()!!.contains("https://example.com/fagområdekode")) + } + private fun readModel(file: String): Model { - val turtle = javaClass.classLoader.getResourceAsStream(file) - ?.let { String(it.readAllBytes(), StandardCharsets.UTF_8) } + val turtle = String(javaClass.classLoader.getResourceAsStream(file)!!.readAllBytes(), StandardCharsets.UTF_8) val model = ModelFactory.createDefaultModel() - model.read(StringReader(turtle!!), "http://example.com", Lang.TURTLE.name) + model.read(StringReader(turtle), "http://example.com", Lang.TURTLE.name) return model } diff --git a/src/test/resources/import_concept.ttl b/src/test/resources/import_concept.ttl index 0408333..f0b06f9 100644 --- a/src/test/resources/import_concept.ttl +++ b/src/test/resources/import_concept.ttl @@ -20,8 +20,6 @@ @prefix relationship-with-source-type: . @prefix audience-type: . - - rdf:type skos:Concept ; rdfs:seeAlso ; @@ -29,11 +27,14 @@ dct:identifier "https://concept-catalog.fellesdatakatalog.digdir.no/collections/123456789/concepts/id0-old" ; dct:publisher ; euvoc:status ; - skos:altLabel "tillattTerm"@nn, "tillattTerm2"@nn ; - skos:hiddenLabel "fraraadetTerm"@nb,"fraraadetTerm2"@nb, "Lorem ipsum"@nb ; owl:versionInfo "1.0.0" ; - skos:prefLabel "anbefaltTerm"@nb, "recommendedTerm"@en ; + skos:altLabel "tillattTerm"@nn, "tillattTerm2"@nn ; + skos:hiddenLabel "fraraadetTerm"@nb,"fraraadetTerm2"@nb, "Lorem ipsum"@nb ; + skos:scopeNote "merknad"@nn, "merknad"@nb ; + skos:example "eksempel"@nn, "eksempel"@nb ; + dct:subject "fagområde"@nb, ; + dct:modified "2020-01-02"^^xsd:date ; skosno:hasGenericConceptRelation [ rdf:type skosno:GenericConceptRelation ; dct:description "Inndelingskriterium"@nb ; skosno:hasSpecificConcept @@ -65,7 +66,5 @@ rdf:value "definisjon for allmennheten"@nb ; dct:audience audience-type:public ; skosno:relationshipWithSource relationship-with-source-type:derived-from-source ; - ] ; - skos:scopeNote "merknad"@nn, "merknad"@nb ; - skos:example "eksempel"@nn, "eksempel"@nb ; - dct:modified "2020-01-02"^^xsd:date . + ] . +