Skip to content

Commit

Permalink
ajout fonction filtres tokens à corriger
Browse files Browse the repository at this point in the history
  • Loading branch information
Juliettejns committed Jun 13, 2024
1 parent 49016d0 commit 4308ecc
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 16 deletions.
33 changes: 33 additions & 0 deletions app/main/views/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,39 @@ def tokens_correct_unallowed(corpus_id, allowed_type):
)


@main.route('/corpus/<corpus_id>/tokens/unallowed/<allowed_type>/correct/filter', methods=['POST'])
@login_required
@requires_corpus_access("corpus_id")
def tokens_correct_unallowed_filter(corpus_id, allowed_type):
corpus = Corpus.query.filter_by(**{"id": corpus_id}).first()
list_filter=[]
if request.method=="POST":
list_filter.append(request.form.get("punct"))
list_filter.append(request.form.get("numeral"))
list_filter.append(request.form.get("ignore"))
list_filter.append(request.form.get("metadata"))
filtered_filter=[]
for el in list_filter:
if el != None:
filtered_filter.append(el)
filter = " ".join(filtered_filter)
print(filter)


tokens = corpus \
.get_unallowed(allowed_type, ignore=filter) \
.paginate(
page=int_or(request.args.get("page"), 1),
per_page=int_or(request.args.get("limit"), current_app.config["PAGINATION_DEFAULT_TOKENS"])
)
return render_template_with_nav_info(
'main/tokens_correct_unallowed.html',
corpus=corpus,
tokens=tokens,
allowed_type=allowed_type,
changed=corpus.changed(tokens.items))


@main.route('/corpus/<int:corpus_id>/tokens/changes/similar/<int:record_id>')
@login_required
@requires_corpus_access("corpus_id")
Expand Down
62 changes: 46 additions & 16 deletions app/models/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ def get_allowed_values(self, allowed_type="lemma", label=None, order_by="label",
).order_by(order_by)
return db.session.query(cls).filter(cls.control_list == self.control_lists_id).order_by(order_by)

def get_unallowed(self, allowed_type="lemma"):
def get_unallowed(self, allowed_type="lemma", ignore=False):
""" Search for WordToken that would not comply with Allowed Values (in AllowedLemma,
AllowedPOS, AllowedMorph) nor with a corpus custom dictionary
Expand All @@ -317,22 +317,52 @@ def get_unallowed(self, allowed_type="lemma"):
else:
raise ValueError("Get Allowed value had %s and it's not from the lemma, POS, morph set" % allowed_type)

allowed = db.session.query(cls).filter(
cls.control_list == self.control_lists_id,
cls.label == prop
)
custom_dict = db.session.query(CorpusCustomDictionary).filter(
CorpusCustomDictionary.corpus == self.id,
CorpusCustomDictionary.category == allowed_type,
CorpusCustomDictionary.label == prop
)
return db.session.query(WordToken).filter(
db.and_(
WordToken.corpus == self.id,
not_(allowed.exists()),
not_(custom_dict.exists())
if ignore:
regex_liste = []
if "metadata" in ignore:
regex_liste.append(r'^(?!\[[^\]]+:[^\]]*\]$).*')
if "ignore" in ignore:
regex_liste.append(r'^(?!^\[IGNORE\]$)')
if "punct" in ignore:
regex_liste.append(r"(?!^[^\w\s]$).")
if "numeral" in ignore:
regex_liste.append(r'(?!^\d+$).+')
regex = "".join(regex_liste)
allowed = db.session.query(cls).filter(
cls.control_list == self.control_lists_id,
cls.label == prop
)
custom_dict = db.session.query(CorpusCustomDictionary).filter(
CorpusCustomDictionary.corpus == self.id,
CorpusCustomDictionary.category == allowed_type,
CorpusCustomDictionary.label == prop
)
return db.session.query(WordToken).filter(
db.and_(
WordToken.corpus == self.id,
not_(allowed.exists()),
not_(custom_dict.exists()),
WordToken.form.op('~')(regex)
)
).order_by(WordToken.order_id)

else:
allowed = db.session.query(cls).filter(
cls.control_list == self.control_lists_id,
cls.label == prop
)
).order_by(WordToken.order_id)
custom_dict = db.session.query(CorpusCustomDictionary).filter(
CorpusCustomDictionary.corpus == self.id,
CorpusCustomDictionary.category == allowed_type,
CorpusCustomDictionary.label == prop
)
return db.session.query(WordToken).filter(
db.and_(
WordToken.corpus == self.id,
not_(allowed.exists()),
not_(custom_dict.exists())
)
).order_by(WordToken.order_id)

@property
def tokens_count(self):
Expand Down
19 changes: 19 additions & 0 deletions app/templates/macros/nav_macros.html
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,25 @@
</a>
{% endif %}
</div>
<div>
<div class="btn-group btn-group-sm" role="group">
<a class="btn btn-outline-dark title">{{ _('Correction: Ignore Invalide Lemmas: ') }}</a>
{% set url = request.path %}
{% if 'lemma' in request.path %}
<form class="btn-group btn-outline-dark title" method="post" action="{{url_for("main.tokens_correct_unallowed_filter", corpus_id=corpus.id, allowed_type='lemma')}}">
{% endif %}
{% if 'morph' in request.path %}
<form class="btn-group btn-outline-dark title" method="post" action="{{url_for("main.tokens_correct_unallowed_filter", corpus_id=corpus.id, allowed_type='morph')}}">
{% endif %}
{% if 'POS' in request.path %}
<form class="btn-group btn-outline-dark title" method="post" action="{{url_for("main.tokens_correct_unallowed_filter", corpus_id=corpus.id, allowed_type='POS')}}">
{% endif %}
<a class="btn btn-outline-dark title"><input type="checkbox" value="punct" name="punct" title="Allow punctuation as lemma"> Punctuation</a>
<a class="btn btn-outline-dark title"><input type="checkbox" value="numeral" name="numeral" title="Allow numerals as lemma"> Numeral</a>
<a class="btn btn-outline-dark title"><input type="checkbox" value="ignore" name="ignore" title="Allow lemma [IGNORE]"> Ignore</a>
<a class="btn btn-outline-dark title"><input type="checkbox" value="metadata" name="metadata" title="Allow metadata such as [METADATA:something] and [REF:1.2.3]"> Metadata</a>
<button type="submit" class="btn btn-outline-dark title">Submit</button>
</form></div></div>
</div></div>
</div>
<hr />
Expand Down

0 comments on commit 4308ecc

Please sign in to comment.