Skip to content

Commit

Permalink
Merge pull request #1102 from AI4Bharat/wer_fix_org
Browse files Browse the repository at this point in the history
added org level wer scores
  • Loading branch information
ishvindersethi22 authored Jul 12, 2024
2 parents 4764095 + b82a915 commit 4c228fd
Showing 1 changed file with 92 additions and 1 deletion.
93 changes: 92 additions & 1 deletion backend/organizations/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
ANNOTATOR_ANNOTATION,
REVIEWER_ANNOTATION,
SUPER_CHECKER_ANNOTATION,
ACCEPTED,
ACCEPTED_WITH_MINOR_CHANGES,
ACCEPTED_WITH_MAJOR_CHANGES,
VALIDATED,
VALIDATED_WITH_CHANGES,
)
from .models import Organization
from users.models import User
Expand All @@ -23,6 +28,7 @@
get_audio_transcription_duration,
get_audio_segments_count,
ocr_word_count,
calculate_word_error_rate_between_two_audio_transcription_annotation,
)
from workspaces.tasks import (
un_pack_annotation_tasks,
Expand Down Expand Up @@ -68,6 +74,47 @@ def get_all_annotation_reports(
completed_by=userid,
updated_at__range=[start_date, end_date],
)
number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer = (
0,
0,
)
ar_wer_score, as_wer_score = 0, 0
for ann in submitted_tasks:
all_annotations = Annotation.objects.filter(task_id=ann.task_id)
ar_done, as_done = False, False
ann_ann, rev_ann, sup_ann = "", "", ""
for a in all_annotations:
if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [
ACCEPTED,
ACCEPTED_WITH_MINOR_CHANGES,
ACCEPTED_WITH_MAJOR_CHANGES,
]:
rev_ann = a
elif (
a.annotation_type == SUPER_CHECKER_ANNOTATION
and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
):
sup_ann = a
elif a.annotation_type == ANNOTATOR_ANNOTATION:
ann_ann = a
if ann_ann and rev_ann and not ar_done:
try:
ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
rev_ann.result, ann_ann.result, project_type
)
number_of_tasks_contributed_for_ar_wer += 1
ar_done = True
except Exception as e:
pass
if ann_ann and sup_ann and not as_done:
try:
as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
sup_ann.result, ann_ann.result, project_type
)
number_of_tasks_contributed_for_as_wer += 1
as_done = True
except Exception as e:
pass

submitted_tasks_count = submitted_tasks.count()

Expand Down Expand Up @@ -122,6 +169,20 @@ def get_all_annotation_reports(
"Word Count": total_word_count,
"Submitted Tasks": submitted_tasks_count,
"Language": user_lang,
"Average Word Error Rate Annotator Vs Reviewer": ar_wer_score
/ number_of_tasks_contributed_for_ar_wer
if number_of_tasks_contributed_for_ar_wer
else 0,
"Cumulative Word Error Rate Annotator Vs Reviewer": ar_wer_score
if number_of_tasks_contributed_for_ar_wer
else 0,
"Average Word Error Rate Annotator Vs Superchecker": as_wer_score
/ number_of_tasks_contributed_for_as_wer
if number_of_tasks_contributed_for_as_wer
else 0,
"Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score
if number_of_tasks_contributed_for_as_wer
else 0,
}

if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
Expand Down Expand Up @@ -189,7 +250,29 @@ def get_all_review_reports(
annotation_type=REVIEWER_ANNOTATION,
updated_at__range=[start_date, end_date],
)

number_of_tasks_contributed_for_rs_wer = 0
rs_wer_score = 0
for ann in submitted_tasks:
all_annotations = Annotation.objects.filter(task_id=ann.task_id)
rs_done = False # for duplicate annotations
sup_ann, rev_ann = "", ""
for a in all_annotations:
if (
a.annotation_type == SUPER_CHECKER_ANNOTATION
and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
):
sup_ann = a
elif a.annotation_type == REVIEWER_ANNOTATION:
rev_ann = a
if rev_ann and sup_ann and not rs_done:
try:
rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
sup_ann.result, rev_ann.result, project_type
)
number_of_tasks_contributed_for_rs_wer += 1
rs_done = True
except Exception as e:
pass
submitted_tasks_count = submitted_tasks.count()

project_type_lower = project_type.lower()
Expand Down Expand Up @@ -243,6 +326,13 @@ def get_all_review_reports(
"Word Count": total_word_count,
"Submitted Tasks": submitted_tasks_count,
"Language": user_lang,
"Average Word Error Rate Reviewer Vs Superchecker": rs_wer_score
/ number_of_tasks_contributed_for_rs_wer
if number_of_tasks_contributed_for_rs_wer
else 0,
"Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score
if number_of_tasks_contributed_for_rs_wer
else 0,
}

if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
Expand Down Expand Up @@ -512,6 +602,7 @@ def send_user_reports_mail_org(
final_reports = sorted(final_reports, key=lambda x: x["Name"], reverse=False)

df = pd.DataFrame.from_dict(final_reports)
df = df.fillna("NA")

content = df.to_csv(index=False)
content_type = "text/csv"
Expand Down

0 comments on commit 4c228fd

Please sign in to comment.