Skip to content

Commit

Permalink
Add more details about the lemmatizer at the new corpus phase.
Browse files Browse the repository at this point in the history
  • Loading branch information
PonteIneptique committed Mar 19, 2024
1 parent 63661c3 commit 272b825
Show file tree
Hide file tree
Showing 4 changed files with 704 additions and 242 deletions.
12 changes: 12 additions & 0 deletions app/lemmatizers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from typing import Optional
from dataclasses import dataclass


@dataclass
class LemmatizerService:
title: str # e.g. "Old French"
uri: str # Current address
provider: str # e.g. Deucalion at Ecole nationale des Chartes
bibtex: str # Citation Scheme
apa: str # APA equivalent
ui: Optional[str] = None
33 changes: 30 additions & 3 deletions app/templates/main/corpus_new.html
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,28 @@ <h1>{{ _('Create a new corpus') }}</h1>
<div class="col-md-9">
<div class="input-group">
<select class="form-control" id="language-model">
{%- for lang, address in lemmatizers %}
<option value="{{ address }}">{{ lang }}</option>
<option selected value="disabled" class="text-muted disabled">{{_('Select a service')}}</option>
{%- for service in lemmatizers %}
<option value="{{ service.uri }}">{{ service.title }}</option>
{% endfor -%}
</select>
{%- for service in lemmatizers %}
<div class="alert alert-info lemmatizer-details" data-uri="{{service.uri}}" style="display: none;">
{% trans name=service.title, provider=service.provider %}
<em>{{ name }}</em> is a lemmatization service provided by <i>{{ provider }}</i>.<br />
{% endtrans %}
{% if service.ui %}
<em>{{_("If you only want to lemmatize (and not correct data), you can use their user interface here:")}}</em>
<a href="{{service.ui}}">{{service.ui}}</a><br/>
{% endif %}
{{ _("Please cite tools accordingly. You can use the following APA or Bibtex: ")}}
<ul>
<li><em>APA:</em> <code>{{service.apa }}</code></li>
<li><em>BIB:</em> <code style="padding-left: 1em; display: block;">{{service.bibtex}}</code></li>
</ul>
</div>
{% endfor %}

<div class="input-group-append">
<button id="submit-model" class="btn btn-outline-secondary" type="button">{{ _('Lemmatize') }}</button>
</div>
Expand Down Expand Up @@ -200,7 +218,14 @@ <h1>{{ _('Create a new corpus') }}</h1>
<script type="text/javascript">

$(document).ready(function() {

$("#language-model").on("change", function (event) {
let uri = $("#language-model").val();
document.querySelectorAll(".lemmatizer-details").forEach(el => el.style.display = "none");
if (uri === "disabled") {
return;
}
document.querySelectorAll(`.lemmatizer-details[data-uri='${uri}']`).forEach(el => el.style.display = "block");
});
$("#submit-model").on("click", function(event) {
event.preventDefault();
// Get the parameters
Expand All @@ -213,6 +238,8 @@ <h1>{{ _('Create a new corpus') }}</h1>
tokens_fail = document.getElementById("tokens-fail"),
approximate_count = document.getElementById("tokens-approximate");

if (uri === "disabled") { return; }

// Create the form
var formData = new FormData();
formData.append("data", text_data.value);
Expand Down
22 changes: 20 additions & 2 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

from app.lemmatizers import LemmatizerService
from typing import List

basedir = os.path.abspath(os.path.dirname(__file__))

Expand Down Expand Up @@ -38,7 +39,7 @@ class Config:
PAGINATION_DEFAULT_TOKENS = 100

# Lemmatizer (until Deucalion client)
LEMMATIZERS = []
LEMMATIZERS: List[LemmatizerService] = []

# Change automatically the Postgresql instance language if not english
FORCE_PSQL_EN_LOCALE = True
Expand Down Expand Up @@ -101,6 +102,23 @@ class BaseTestConfig(Config):
EMAIL_SUBJECT_PREFIX = '[{}]'.format(Config.APP_NAME)
EMAIL_SENDER = '{app_name} Admin <{email}>'.format(app_name=Config.APP_NAME, email=MAIL_USERNAME)

LEMMATIZERS = [
LemmatizerService(
"Dummy lemmatizer",
"http://dummy-uri",
provider="ProviderInstitution",
ui="someui.com",
apa="Clérice et al. 2019",
bibtex="""@article{camps2021corpus,
title = {Corpus and Models for Lemmatisation and POS-tagging of Old French},
author = {Camps, Jean-Baptiste and Cl{\'e}rice, Thibault and Duval, Fr{\'e}d{\'e}ric and Kanaoka, Naomi and Pinche, Ariane and others},
year = 2021,
journal = {arXiv preprint arXiv:2109.11442},
keywords = {Old French}
}"""
)
]


class SQLiteTestConfig(BaseTestConfig):
SQLALCHEMY_DATABASE_URI = os.environ.get('TEST_DATABASE_URL') or \
Expand Down
Loading

0 comments on commit 272b825

Please sign in to comment.