Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/geodata #475

Draft
wants to merge 8 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

source/cc-licenses.ttl: source/construct-cc-license-data.rq
cat source/cc-licenses-header.txt > $@
(echo https://creativecommons.org/publicdomain/mark/1.0/ && echo https://creativecommons.org/publicdomain/mark/1.0/deed.sv && curl -sL https://creativecommons.org/about/cclicenses/ | sed -nE 's!.*<a href="([^"]+)"><img .*!\1\n\1deed.sv!p') | xargs python3 scripts/construct.py $^ >> $@
Expand All @@ -8,3 +9,10 @@ source/sab.ttl: scripts/extract_sab_data_from_docx.py cache/esab-2015_1.docx
# ../librisxl/whelk-core/src/main/java/se/kb/libris/export/dewey/dewey_sab.txt
# TODO 2: In XL, add precomposed usages (extract from usage in records)? See:
# ../librisxl/marc_export/src/main/resources/se/kb/libris/export/sabrub.txt # precomposed

cache/geocore.ttl: cache/remote-geocore.ttl
cat $^ | scripts/fmt.sh > $@
#scripts/construct.py source/geo/modify-geocore.rq $^ > $@

cache/remote-geocore.ttl: source/geo/construct-geocore.rq
scripts/rq.sh https://query.wikidata.org/sparql $^ > $@
4 changes: 4 additions & 0 deletions lxltools/datacompiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,12 @@ def _compile_dataset(self, name, result):

meta = node.pop('meta', None)
if meta:
if meta.get('@id', "").startswith('_:'):
del meta['@id']

if 'created' in meta:
created_ms = timeutil.w3c_dtz_to_ms(meta.pop('created'))

if 'modified' in meta:
modified_ms = timeutil.w3c_dtz_to_ms(meta.pop('modified'))

Expand Down
2 changes: 1 addition & 1 deletion lxltools/timeutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ def to_w3c_dtz(ms: float):


def to_http_date(s: float):
return datetime.utcfromtimestamp(s).strftime('%a, %d %b %Y %H:%M:%S GMT')
return datetime.fromtimestamp(s, tz=timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT')
2 changes: 2 additions & 0 deletions scripts/fmt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
trld -ittl -e -f -c $(dirname $0)/../build/sys/context/kbv.jsonld -B -ottl | sed 's/rdf:type/a/'
5 changes: 5 additions & 0 deletions scripts/rq.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
endpoint=$1
queryfile=$2

curl -s $endpoint -HAccept:text/turtle --data-urlencode "query@$queryfile"
125 changes: 63 additions & 62 deletions source/construct-libraries.rq
Original file line number Diff line number Diff line change
Expand Up @@ -8,85 +8,86 @@ base <https://libris.kb.se/library/>
construct {

?library a ?libtype ;
:meta _:meta ;
:meta ?meta ;
owl:sameAs ?bdb_id, ?sameas ;
:sigel ?sigel ;
:name ?name ;
:url ?url ;
:qualifier ?dept ;
:category ?registranturl .
#:place ?place .

_:meta :created ?created ; :modified ?modified .

# TODO: Model a proper place/adress relation.
# ?place a :Place ;
# :label ?region ;
# :code ?municipality_code ;
# :isPartOf ?country ;
# :latitude ?lat ;
# :longitude ?long .
#
# ?country a :Country ;
# :code ?country_code .
:category ?registranturl ;
:locatedIn ?municipality ;
:geo ?geo ;
:country ?country .

?meta :created ?created ; :modified ?modified .

?geo :latitude ?lat ; :longitude ?long .

} where {

?bdb_id
bibdb:sigel ?sigel ;
sdo:name ?name .
# :organisation, sdo:address

optional {
?bdb_id bibdb:libris_reg ?reg . filter ( ?reg = true )
bind (iri('https://id.kb.se/term/bibdb/Registrant') as ?registranturl)
}
bind(encode_for_uri(replace(str(?sigel), "\\s+", "")) as ?sigelslug)

optional {
?bdb_id bibdb:dept ?dept
}
optional { ?bdb_id a ?type }
bind(if(?type = sdo:Library, :Library, :Bibliography) as ?libtype)

optional {
?bdb_id bibdb:date_created ?raw_created .
bind(concat(?raw_created, '.000Z') as ?created)
}
optional {
?bdb_id bibdb:date_modified ?raw_modified .
bind(concat(?raw_modified, '.000Z') as ?modified)
}
bind(iri(concat(str(</library/>), ?sigelslug)) as ?library)

bind(encode_for_uri(replace(str(?sigel), "\\s+", "")) as ?sigelslug)
{

optional { ?bdb_id a ?type }
optional { ?bdb_id :organisation ?org }
optional { ?bdb_id bibdb:dept ?dept }
optional {
?bdb_id bibdb:libris_reg ?reg . filter ( ?reg = true )
bind (<https://id.kb.se/term/bibdb/Registrant> as ?registranturl)
}

optional {
?bdb_id bibdb:date_created ?raw_created .
bind(concat(?raw_created, '.000Z') as ?created)
}
optional {
?bdb_id bibdb:date_modified ?raw_modified .
bind(concat(?raw_modified, '.000Z') as ?modified)
}
bind(bnode(coalesce(?created, ?modified)) as ?meta)

optional {
?bdb_id sdo:url ?url .
FILTER(?url != "" && ?url != "http://")
}

optional {
?bdb_id bibdb:country_code ?country_code .
FILTER(?country_code != "")
}
# TODO: tr, fi, ...
bind(if(?country_code = 'se', <https://id.kb.se/country/sw>, ?NO_country) as ?country)

} union {

?bdb_id sdo:address [
sdo:streetAddress ?streetAddress ;
sdo:addressLocality ?city ;
sdo:postalCode ?zipCode
] .
FILTER(!STRSTARTS(?streetAddress, "FE ") && !STRSTARTS(?streetAddress, "FE "))

} union {

?bdb_id bibdb:municipality_code ?municipality_code .
FILTER(?municipality_code not in ('', '-'))
bind(IRI(CONCAT(STR(</dataset/geo/sw/municipality/>), ?municipality_code)) as ?municipality)

} union {

?bdb_id sdo:latitude ?lat ; sdo:longitude ?long .
FILTER(?lat > 0 && ?long > 0)
bind(concat('POINT(', STR(?long), ' ', STR(?lat), ')') as ?geo)

optional {
?bdb_id sdo:url ?url .
FILTER(?url != "" && ?url != "http://")
}
bind(if(?type = sdo:Library, :Library, :Bibliography) as ?libtype)

# optional {
# ?bdb_id bibdb:country_code ?country_code .
# FILTER(?country_code != "")
# }
#
# optional {
# ?bdb_id bibdb:municipality_code ?municipality_code .
# FILTER(?municipality_code != "")
# }
#
# optional {
# ?bdb_id bibdb:region ?region
# }
#
# optional {
# ?bdb_id sdo:latitude ?lat ; sdo:longitude ?long .
# FILTER(?lat > 0 && ?long > 0)
# }

# TODO: coalesce should not be necessary here, due to the else clause. RDFLib bug?
bind(iri(concat(str(coalesce(?uribase, </library/>)), ?sigelslug)) as ?library)

# bind(if(bound(?region) || bound(?lat), bnode(), ?NO_place) as ?place)
# bind(if(bound(?place) && bound(?country_code), bnode(), ?NO_country) as ?country)
}
8 changes: 8 additions & 0 deletions source/datasets/geo.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix : <https://id.kb.se/vocab/>
base <https://libris.kb.se/dataset/>

<geo/core> a :Dataset ;
:sourceData [ :uri "cache/geocore.ttl" ] ;
:uriSpace "https://libris.kb.se/dataset/geo/" ;
:created "2024-03-07T12:09:24Z"^^xsd:dateTime .
86 changes: 86 additions & 0 deletions source/geo/construct-geocore.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
BASE <https://libris.kb.se/dataset/geo/sw/>
PREFIX : <https://id.kb.se/vocab/>
PREFIX idkbse: <https://id.kb.se/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX psv: <http://www.wikidata.org/prop/statement/value/>
PREFIX wikibase: <http://wikiba.se/ontology#>

CONSTRUCT {

?type a :Concept ; :label ?typelabel .

?place a :Place ;
:sameAs ?sameAs ;
:label ?label ;
:category ?type ;
:code ?swedishMunicipalityCode ;
:geo ?geo ;
:locatedIn ?locatedIn ;
:principalPlace ?capital ;
:country <https://id.kb.se/country/sw> ;
:startDate ?foundingDate ;
:image ?image .

?geo a :EarthGlobeCoordinates ;
:value ?geocode ;
:geoLatitude ?lat ;
:geoLongitude ?long ;
:geoPrecision ?geoPrecision .

} WHERE {
?wdplace a|wdt:P31 ?type ;
wdt:P17 wd:Q34 . # :country Sweden

FILTER EXISTS {
{
?type wdt:P279* wd:Q914262 # administrative territorial entity of Sweden
} UNION {
?type wdt:P279* wd:Q12813115 # urban area of Sweden
}
}

?wdplace rdfs:label ?label .

FILTER NOT EXISTS { ?wdplace wdt:P576 ?dissolutionDate }
OPTIONAL { ?wdplace wdt:P571 ?foundingDate }

?type rdfs:label ?typelabel . FILTER(lang(?typelabel) IN ('sv', 'en'))

FILTER(lang(?label) IN ('sv', 'en'))

OPTIONAL {
?wdplace wdt:P625 ?geocode . # ^^:wktLiteral
OPTIONAL {
?wdplace p:P625 ?qualifiedgeo .
?qualifiedgeo psv:P625 [
wikibase:geoLatitude ?lat ;
wikibase:geoLongitude ?long ;
wikibase:geoPrecision ?geoPrecision ] .
}
BIND(BNODE(STR(COALESCE(?qualifiedgeo, ?geocode))) AS ?geo)
}

?wdplace wdt:P131 ?wdLocatedIn .
OPTIONAL {
#?wdplace wdt:P525 ?swedishMunicipalityCode .
?wdplace p:P525 ?qualifiedP525 .
?qualifiedP525 ps:P525 ?swedishMunicipalityCode .
FILTER NOT EXISTS { ?qualifiedP525 pq:P582 ?endDate }
BIND(IRI(CONCAT(STR(<municipality/>), ENCODE_FOR_URI(?swedishMunicipalityCode))) AS ?dsId)
BIND(?wdplace AS ?sameAs)
}

OPTIONAL {
?wdplace wdt:P36 ?capital .
# :capital :label "centralort"@sv
# (Cf. wdt:P1376 == huvudstad.)
}

BIND(IF(?wdLocatedIn = wd:Q34, idkbse:country\/sw, ?wdLocatedIn) AS ?locatedIn)
BIND(IF(BOUND(?dsId), ?dsId, ?wdplace) AS ?place)

OPTIONAL { ?wdplace wdt:P18 ?image }
}
27 changes: 27 additions & 0 deletions source/vocab/details.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
@prefix rdaz: <http://rdaregistry.info/Elements/z/> .
@prefix rdau: <http://rdaregistry.info/Elements/u/> .

@prefix wikibase: <http://wikiba.se/ontology#> .

@prefix : <https://id.kb.se/vocab/> .
@prefix kbrel: <https://id.kb.se/relator/> .

Expand Down Expand Up @@ -1539,6 +1541,31 @@
rdfs:label "Geographic coverage"@en, "Geografisk täckning"@sv;
owl:equivalentClass bf2:GeographicCoverage .

:geo a owl:ObjectProperty;
rdfs:label "Geographic coordinates"@en, "Geografiska koordinater"@sv;
sdo:domainIncludes :Place, :Library;
rdfs:range :GlobeCoordinates;
owl:equivalentProperty sdo:geo .

:GlobeCoordinates a owl:Class ;
rdfs:subClassOf sdo:GeoCoordinates, wikibase:GlobecoordinateValue .

:geoLatitude owl:equivalentProperty sdo:latitude, wikibase:geoLatitude .
:geoLongitude owl:equivalentProperty sdo:longitude, wikibase:geoLongitude .
:geoPrecision owl:equivalentProperty wikibase:geoPrecision .

:EarthGlobeCoordinates a owl:Class ;
rdfs:label "Earth globe coordinates"@en, "jordglobskoordinater"@sv;
rdfs:subClassOf :GlobeCoordinates, [ a owl:Restriction;
owl:onProperty wikibase:geoGlobe ;
owl:hasValue <http://www.wikidata.org/entity/Q2> ] .

:principalPlace a owl:ObjectProperty;
rdfs:label "Principal place"@en, "Centralort"@sv;
rdfs:domain :Place;
rdfs:range :Place .
#owl:equivalentProperty wdt:P36 .

:geographicCoverage a owl:ObjectProperty;
rdfs:label "Geographic coverage"@en, "Geografisk täckning"@sv;
rdfs:domain :Creation;
Expand Down
2 changes: 1 addition & 1 deletion source/vocab/display.jsonld
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,7 @@
"@id": "Place-cards",
"@type": "fresnel:Lens",
"classLensDomain": "Place",
"showProperties": [ "prefLabel", "locatedIn", "country", "description" ]
"showProperties": [ {"alternateProperties": ["prefLabel", "label"]}, "locatedIn", "country", "description" ]
},
"DescriptionConventions": {
"@id": "DescriptionConventions-cards",
Expand Down