Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Loterre-resolvers] create databases #8

Merged
merged 9 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
*/**/.venv
*/**/.env
.idea
*/**/node_modules/
*/**/bun.lockb
6 changes: 5 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
"editor.defaultFormatter": "ms-python.autopep8"
},
"cSpell.words": [
"bunx",
"loterre",
"Loterre",
"NETSCITY",
"skos",
"venv",
"webdav"
]
}
}
1 change: 1 addition & 0 deletions loterre-resolvers/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/databases
8 changes: 6 additions & 2 deletions loterre-resolvers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,9 @@ dvc config core.autostage true
1. se mettre dans ce répertoire: `cd loterre-resolvers`
2. activer l'environnement virtuel: `source .venv/bin/activate`
3. lancer le script: `./bin/get-files.sh`
4. ajouter les fichiers à DVC: `dvc add ./data/*.skos`
5. pousser les fichiers sur le remote: `dvc push`
4. lancer la création des bases à mettre à jour: `dvc repro`
5. pousser les fichiers: `dvc push`

> [!NOTE]
> Pour lancer la génération d'un vocabulaire en particulier (ex: `216`),
> utiliser `dvc repro tgz@216`.
62 changes: 62 additions & 0 deletions loterre-resolvers/combine.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Configuration du chemin de stockage des bases des données locales
[env]
path = location
value = fix(`${env('TMPDIR', '/tmp')}/databases/${env('weekNumber')}/${env('loterreID', 'noid')}`)

# STEP 0 : On normalise la valeur à rechercher (de la même manière que l'index a été créé)
[assign]
path = value
value = fix(_.deburr(String(self.value).replace(/[`~!@#$%^&*()_|+\-=?;:'",.<>\{\}\[\]\\\/\s]/g, '').toLowerCase()))

path = loterreID
value = env('loterreID')

# primer contient l'URL à Télécharger (voir download.ini)
path = primer
value = env('primer')

# STEP 1 : On vérifie si le vocabulaire est présent localement
[load]
location = env('location')
domain = fix('loterre-vocabulaires').append('-loaded')
path = loterreID
target = isAlreadyDownload

# STEP 1.1 : s'il n'est pas présent localement
[swing]
test = get('isAlreadyDownload').isEmpty()

# STEP 1.2 : on mémorise le fait que l'on va en faire une copie locale
[swing/save]
location = env('location')
domain = fix('loterre-vocabulaires').append('-loaded')
path = loterreID

# STEP 1.3 : On télécharge la version distante en XML, on la convertit en JSON et on sauvegarde localement
[swing/singleton]
file = ./v1/download.cfg

# STEP 1.4 : Avec la version JSON fraîchement générée on construit un index inversé
[swing/singleton]
file = ./v1/compile.cfg

[swing/exchange]
value = self().pick(['id', 'value', 'state'])

# STEP 2 : On cherche une correspondance dans l'index inversé
[load]
location = env('location')
domain = env('loterreID').prepend('loterre-').append('indexes')
path = value
target = result

[replace]
path = id
value = get('id')
path = value
value = get('result.value', 'n/a')

# STEP 2 : si l'index inversé nous donne une URI, avec on va chercher les informations associées
[expand]
path = value
file = ./v1/transcribe.cfg
46 changes: 46 additions & 0 deletions loterre-resolvers/compile.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
[use]
plugin = basics
plugin = analytics
plugin = storage

# build the reverse index
[cast]
location = env('location')
domain = env('loterreID').prepend('loterre-').append('concepts')

[replace]
path = id.about
value = get('rdf$about')
# select all keys and remove duplicates to avoid ambiguities
path = value
value = get('indexKeys')

path = id.score
value = get('score')

# create an entry for all keys
[exploding]

# invert id & value because of [exploding] which works only by value (not by id)
[replace]
path = id
value = get('value')
path = value
value = get('id.about')
path = score
value = get('id.score')

[debug]
text = INDEX

[save]
location = env('location')
domain = env('loterreID').prepend('loterre-').append('indexes')
path = id
score = get('score')

[pop]

[replace]
path = state
value = compile
38 changes: 38 additions & 0 deletions loterre-resolvers/create-databases.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env npx ezs

# Input: [{ "loterreID": "216" }]

# Usage: loterreID=2XK bunx ezs create-databases.ini <<< '[{"loterreId": "2XK"}]'

[use]
# JSONParse
plugin = basics
# save
plugin = storage

[env]
path = location
value = fix(`${env('PWD', '.')}/databases/${env('loterreID', 'noid')}`)

[JSONParse]

[replace]
path = loterreID
value = env('loterreID', _.get("loterreID"))

[debug]
text = Avant save

[save]
location = env('location')
domain = fix('loterre-vocabulaires').append('-loaded')
path = loterreID

[delegate]
file = ./download.ini

[delegate]
file = ./compile.ini

[dump]
indent = true
38 changes: 38 additions & 0 deletions loterre-resolvers/download.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[use]
plugin = basics
plugin = analytics
plugin = storage


# Download the skos, parse it and save it
[exchange]
value = fix([`./data/${env('loterreID', 'noid')}.skos`])

[FILELoad]
[FILEMerge]

[XMLParse]
separator = /rdf:RDF/skos:Concept

# build indexKeys (remove duplicates to avoid ambiguities)
[assign]
path = indexKeys
value = fix(_.get(self, 'skos$prefLabel'), _.get(self, 'skos$altLabel'), _.get(self, 'skos$hiddenLabel')). \
flatten().\
filter(o => _.has(o, '$t')).\
map(o => _.deburr(String(o.$t).replace(/[`~!@#$%^&*()_|+\-=?;:'",.<>\{\}\[\]\\\/\s]/g, '').toLowerCase())).\
uniq()

path = score
value = get('owl$deprecated.$t', 3).thru(x => Number(String(x).toLowerCase() === 'true' ? 1 : 2))

[save]
location = env('location')
domain = env('loterreID').prepend('loterre-').append('concepts')
path = rdf$about

[pop]

[replace]
path = state
value = download
Loading