diff --git a/.gitignore b/.gitignore index 321c05d..db99706 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ */**/.venv */**/.env .idea +*/**/node_modules/ +*/**/bun.lockb diff --git a/.vscode/settings.json b/.vscode/settings.json index 9a92850..633c14e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,8 +4,12 @@ "editor.defaultFormatter": "ms-python.autopep8" }, "cSpell.words": [ + "bunx", + "loterre", "Loterre", + "NETSCITY", + "skos", "venv", "webdav" ] -} \ No newline at end of file +} diff --git a/loterre-resolvers/.gitignore b/loterre-resolvers/.gitignore new file mode 100644 index 0000000..b8f17d9 --- /dev/null +++ b/loterre-resolvers/.gitignore @@ -0,0 +1 @@ +/databases diff --git a/loterre-resolvers/README.md b/loterre-resolvers/README.md index 69f870d..8d41508 100644 --- a/loterre-resolvers/README.md +++ b/loterre-resolvers/README.md @@ -49,5 +49,9 @@ dvc config core.autostage true 1. se mettre dans ce répertoire: `cd loterre-resolvers` 2. activer l'environnement virtuel: `source .venv/bin/activate` 3. lancer le script: `./bin/get-files.sh` -4. ajouter les fichiers à DVC: `dvc add ./data/*.skos` -5. pousser les fichiers sur le remote: `dvc push` +4. lancer la création des bases à mettre à jour: `dvc repro` +5. pousser les fichiers: `dvc push` + +> [!NOTE] +> Pour lancer la génération d'un vocabulaire en particulier (ex: `216`), +> utiliser `dvc repro tgz@216`. diff --git a/loterre-resolvers/combine.ini b/loterre-resolvers/combine.ini new file mode 100644 index 0000000..50d0d6f --- /dev/null +++ b/loterre-resolvers/combine.ini @@ -0,0 +1,62 @@ +# Configuration du chemin de stockage des bases des données locales +[env] +path = location +value = fix(`${env('TMPDIR', '/tmp')}/databases/${env('weekNumber')}/${env('loterreID', 'noid')}`) + +# STEP 0 : On normalise la valeur à rechercher (de la même manière que l'index a été créé) +[assign] +path = value +value = fix(_.deburr(String(self.value).replace(/[`~!@#$%^&*()_|+\-=?;:'",.<>\{\}\[\]\\\/\s]/g, '').toLowerCase())) + +path = loterreID +value = env('loterreID') + +# primer contient l'URL à Télécharger (voir download.ini) +path = primer +value = env('primer') + +# STEP 1 : On vérifie si le vocabulaire est présent localement +[load] +location = env('location') +domain = fix('loterre-vocabulaires').append('-loaded') +path = loterreID +target = isAlreadyDownload + +# STEP 1.1 : s'il n'est pas présent localement +[swing] +test = get('isAlreadyDownload').isEmpty() + +# STEP 1.2 : on mémorise le fait que l'on va en faire une copie locale +[swing/save] +location = env('location') +domain = fix('loterre-vocabulaires').append('-loaded') +path = loterreID + +# STEP 1.3 : On télécharge la version distante en XML, on la convertit en JSON et on sauvegarde localement +[swing/singleton] +file = ./v1/download.cfg + +# STEP 1.4 : Avec la version JSON fraîchement générée on construit un index inversé +[swing/singleton] +file = ./v1/compile.cfg + +[swing/exchange] +value = self().pick(['id', 'value', 'state']) + +# STEP 2 : On cherche une correspondance dans l'index inversé +[load] +location = env('location') +domain = env('loterreID').prepend('loterre-').append('indexes') +path = value +target = result + +[replace] +path = id +value = get('id') +path = value +value = get('result.value', 'n/a') + +# STEP 2 : si l'index inversé nous donne une URI, avec on va chercher les informations associées +[expand] +path = value +file = ./v1/transcribe.cfg diff --git a/loterre-resolvers/compile.ini b/loterre-resolvers/compile.ini new file mode 100644 index 0000000..4c63c61 --- /dev/null +++ b/loterre-resolvers/compile.ini @@ -0,0 +1,46 @@ +[use] +plugin = basics +plugin = analytics +plugin = storage + +# build the reverse index +[cast] +location = env('location') +domain = env('loterreID').prepend('loterre-').append('concepts') + +[replace] +path = id.about +value = get('rdf$about') +# select all keys and remove duplicates to avoid ambiguities +path = value +value = get('indexKeys') + +path = id.score +value = get('score') + +# create an entry for all keys +[exploding] + +# invert id & value because of [exploding] which works only by value (not by id) +[replace] +path = id +value = get('value') +path = value +value = get('id.about') +path = score +value = get('id.score') + +[debug] +text = INDEX + +[save] +location = env('location') +domain = env('loterreID').prepend('loterre-').append('indexes') +path = id +score = get('score') + +[pop] + +[replace] +path = state +value = compile diff --git a/loterre-resolvers/create-databases.ini b/loterre-resolvers/create-databases.ini new file mode 100644 index 0000000..27227c3 --- /dev/null +++ b/loterre-resolvers/create-databases.ini @@ -0,0 +1,38 @@ +#!/usr/bin/env npx ezs + +# Input: [{ "loterreID": "216" }] + +# Usage: loterreID=2XK bunx ezs create-databases.ini <<< '[{"loterreId": "2XK"}]' + +[use] +# JSONParse +plugin = basics +# save +plugin = storage + +[env] +path = location +value = fix(`${env('PWD', '.')}/databases/${env('loterreID', 'noid')}`) + +[JSONParse] + +[replace] +path = loterreID +value = env('loterreID', _.get("loterreID")) + +[debug] +text = Avant save + +[save] +location = env('location') +domain = fix('loterre-vocabulaires').append('-loaded') +path = loterreID + +[delegate] +file = ./download.ini + +[delegate] +file = ./compile.ini + +[dump] +indent = true diff --git a/loterre-resolvers/download.ini b/loterre-resolvers/download.ini new file mode 100644 index 0000000..0cfb70d --- /dev/null +++ b/loterre-resolvers/download.ini @@ -0,0 +1,38 @@ +[use] +plugin = basics +plugin = analytics +plugin = storage + + +# Download the skos, parse it and save it +[exchange] +value = fix([`./data/${env('loterreID', 'noid')}.skos`]) + +[FILELoad] +[FILEMerge] + +[XMLParse] +separator = /rdf:RDF/skos:Concept + +# build indexKeys (remove duplicates to avoid ambiguities) +[assign] +path = indexKeys +value = fix(_.get(self, 'skos$prefLabel'), _.get(self, 'skos$altLabel'), _.get(self, 'skos$hiddenLabel')). \ + flatten().\ + filter(o => _.has(o, '$t')).\ + map(o => _.deburr(String(o.$t).replace(/[`~!@#$%^&*()_|+\-=?;:'",.<>\{\}\[\]\\\/\s]/g, '').toLowerCase())).\ + uniq() + +path = score +value = get('owl$deprecated.$t', 3).thru(x => Number(String(x).toLowerCase() === 'true' ? 1 : 2)) + +[save] +location = env('location') +domain = env('loterreID').prepend('loterre-').append('concepts') +path = rdf$about + +[pop] + +[replace] +path = state +value = download diff --git a/loterre-resolvers/dvc.lock b/loterre-resolvers/dvc.lock new file mode 100644 index 0000000..d98dafb --- /dev/null +++ b/loterre-resolvers/dvc.lock @@ -0,0 +1,236 @@ +schema: '2.0' +stages: + tgz@216: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=216 bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/216.tgz databases/216 + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/216.skos + hash: md5 + md5: 3adb88d03412ed4e4569b71b14354d0c + size: 1936035 + outs: + - path: databases/216.tgz + hash: md5 + md5: 28ef3059d93b91020f524680d54d6b2f + size: 3343752 + tgz@QX8: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=QX8 bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/QX8.tgz databases/QX8 + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/QX8.skos + hash: md5 + md5: 0e0210f8098cfa0d19918469480f60a0 + size: 2027326 + outs: + - path: databases/QX8.tgz + hash: md5 + md5: d9a42f34fb27d39ad02c1811aedcd3e1 + size: 3095298 + tgz@3JP: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=3JP bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/3JP.tgz databases/3JP + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/3JP.skos + hash: md5 + md5: 8a20304e8bf72fd72c82879d922dc173 + size: 2308578 + outs: + - path: databases/3JP.tgz + hash: md5 + md5: 8dede3f3882c5238fe9fd19743f6de23 + size: 5786310 + tgz@73G: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=73G bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/73G.tgz databases/73G + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/73G.skos + hash: md5 + md5: f77a6ade78f20824c4ac32b5a5a290be + size: 2496320 + outs: + - path: databases/73G.tgz + hash: md5 + md5: aa9291d2e8edabea9c4d87e1cad22bde + size: 4861450 + tgz@9SD: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=9SD bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/9SD.tgz databases/9SD + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/9SD.skos + hash: md5 + md5: 7a7ecf5ef02e07bfb0ca26af228a0ea9 + size: 3078291 + outs: + - path: databases/9SD.tgz + hash: md5 + md5: 2e8c5de44935cbb1ed3905d5334554f9 + size: 2447909 + tgz@P66: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=P66 bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/P66.tgz databases/P66 + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/P66.skos + hash: md5 + md5: 0c9cfd915beba169a839ebfa90c20c76 + size: 3917649 + outs: + - path: databases/P66.tgz + hash: md5 + md5: ec4329197f7cde2ce323cad10a06d9fb + size: 3599562 + tgz@MDL: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=MDL bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/MDL.tgz databases/MDL + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/MDL.skos + hash: md5 + md5: 03fa07edb29af5a50801302722c8fb16 + size: 5657134 + outs: + - path: databases/MDL.tgz + hash: md5 + md5: 3db94dbb55139947b21a466f5f1dbf8b + size: 7664643 + tgz@P21: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=P21 bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/P21.tgz databases/P21 + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/P21.skos + hash: md5 + md5: 3d44f296cb995aedae522a50a33b2762 + size: 8495524 + outs: + - path: databases/P21.tgz + hash: md5 + md5: ed22963ea4f4470fb4d293bc5f312a49 + size: 12846318 + tgz@BVM: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=BVM bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/BVM.tgz databases/BVM + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/BVM.skos + hash: md5 + md5: 08e1013cdf5b8d13f9da6c318a9e90c4 + size: 9635826 + outs: + - path: databases/BVM.tgz + hash: md5 + md5: 3f8a4c3ceba8f3c45dc91be3d9ba948f + size: 5085076 + tgz@2XK: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=2XK bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/2XK.tgz databases/2XK + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/2XK.skos + hash: md5 + md5: b9c11f0d64f73cbd1109b8f80a0850e7 + size: 28170863 + outs: + - path: databases/2XK.tgz + hash: md5 + md5: ebf4d581d27c9db024e1debf9ec849a1 + size: 21391769 + tgz@N9J: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=N9J bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/N9J.tgz databases/N9J + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/N9J.skos + hash: md5 + md5: 4cbae496c05d3d2a534f52c059153d90 + size: 58246718 + outs: + - path: databases/N9J.tgz + hash: md5 + md5: 2b78b493c8a108597296503e857e3c93 + size: 56017597 + tgz@D63: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=D63 bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/D63.tgz databases/D63 + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/D63.skos + hash: md5 + md5: 990c26cb46fc302441f84ca46b6d0a28 + size: 92156390 + outs: + - path: databases/D63.tgz + hash: md5 + md5: bac27a547b6bcacb5b7c0eeb60a25825 + size: 40158038 + tgz@JVR: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=JVR bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/JVR.tgz databases/JVR + deps: + - path: create-databases.ini + hash: md5 + md5: d24e36d0f1e6c881b412528ecbd78b4d + size: 604 + - path: data/JVR.skos + hash: md5 + md5: 4ef3370991e26d92054eeb66c5f75b42 + size: 117642710 + outs: + - path: databases/JVR.tgz + hash: md5 + md5: 3df5c6acfb417d2549721c61af666edf + size: 113042022 diff --git a/loterre-resolvers/dvc.yaml b/loterre-resolvers/dvc.yaml new file mode 100644 index 0000000..f71d731 --- /dev/null +++ b/loterre-resolvers/dvc.yaml @@ -0,0 +1,25 @@ +stages: + tgz: + foreach: + - 216 # Éducation + - QX8 # Paléoclimatologie + - 3JP # Sociologie + - 73G # Philosophie + - 9SD # Pays et subdivisions + - P66 # Mémoire + - MDL # Astronomie + - P21 # Littérature + - BVM # NETSCITY + - 2XK # Laboratoires + - N9J # SAGE + - D63 # Communes (France) + - JVR # MESH + do: + cmd: + - EZS_PIPELINE_DELAY=1200 loterreID=${item} bunx ezs create-databases.ini <<< '[{}]' + - tar -czf databases/${item}.tgz databases/${item} + deps: + - data/${item}.skos + - create-databases.ini + outs: + - databases/${item}.tgz diff --git a/loterre-resolvers/package.json b/loterre-resolvers/package.json new file mode 100644 index 0000000..eedb66f --- /dev/null +++ b/loterre-resolvers/package.json @@ -0,0 +1 @@ +{ "dependencies": { "@ezs/analytics": "^2.3.2", "@ezs/basics": "^2.7.2", "@ezs/core": "^3.10.4", "@ezs/storage": "^3.2.3" } } \ No newline at end of file