From 64df2db771b4d154d6017db7dfd5959acc841898 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 8 Jul 2024 10:12:50 +0000
Subject: [PATCH 1/2] added wer scores

---
 backend/projects/utils.py   |  4 +-
 backend/workspaces/tasks.py | 88 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index 9408d44ce..678515b47 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -222,7 +222,7 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation(
     annotation_result2_text = ""
 
     for result in annotation_result1:
-        if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]:
+        if "type" in result and result["type"] == "textarea":
             try:
                 for s in result["value"]["text"]:
                     annotation_result1_text += s
@@ -230,7 +230,7 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation(
                 pass
 
     for result in annotation_result2:
-        if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]:
+        if "type" in result and result["type"] == "textarea":
             try:
                 for s in result["value"]["text"]:
                     annotation_result2_text += s
diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py
index e63273dea..9ffd8afb1 100644
--- a/backend/workspaces/tasks.py
+++ b/backend/workspaces/tasks.py
@@ -13,6 +13,11 @@
     ANNOTATOR_ANNOTATION,
     REVIEWER_ANNOTATION,
     SUPER_CHECKER_ANNOTATION,
+    ACCEPTED,
+    ACCEPTED_WITH_MINOR_CHANGES,
+    ACCEPTED_WITH_MAJOR_CHANGES,
+    VALIDATED,
+    VALIDATED_WITH_CHANGES,
 )
 from .models import Workspace
 from users.models import User
@@ -66,6 +71,45 @@ def get_all_annotation_reports(
             completed_by=userid,
             updated_at__range=[start_date, end_date],
         )
+    number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer = (
+        0,
+        0,
+    )
+    ar_wer_score, as_wer_score = 0, 0
+    for ann in submitted_tasks:
+        all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        ar_done, as_done = False, False  # for duplicate annotations
+        for a in all_annotations:
+            rev_ann, sup_ann = "", ""
+            if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [
+                ACCEPTED,
+                ACCEPTED_WITH_MINOR_CHANGES,
+                ACCEPTED_WITH_MAJOR_CHANGES,
+            ]:
+                rev_ann = a
+            elif (
+                a.annotation_type == SUPER_CHECKER_ANNOTATION
+                and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
+            ):
+                sup_ann = a
+            if rev_ann and not ar_done:
+                try:
+                    ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        rev_ann.result, ann.result
+                    )
+                    number_of_tasks_contributed_for_ar_wer += 1
+                    ar_done = True
+                except Exception as e:
+                    pass
+            if sup_ann and not as_done:
+                try:
+                    as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        sup_ann.result, ann.result
+                    )
+                    number_of_tasks_contributed_for_as_wer += 1
+                    as_done = True
+                except Exception as e:
+                    pass
 
     submitted_tasks_count = submitted_tasks.count()
 
@@ -120,6 +164,20 @@ def get_all_annotation_reports(
         "Word Count": total_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Word Error Rate Annotator Vs Reviewer": ar_wer_score
+        / number_of_tasks_contributed_for_ar_wer
+        if number_of_tasks_contributed_for_ar_wer
+        else 0,
+        "Cumulative Word Error Rate Annotator Vs Reviewer": ar_wer_score
+        if number_of_tasks_contributed_for_ar_wer
+        else 0,
+        "Average Word Error Rate Annotator Vs Superchecker": as_wer_score
+        / number_of_tasks_contributed_for_as_wer
+        if number_of_tasks_contributed_for_as_wer
+        else 0,
+        "Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score
+        if number_of_tasks_contributed_for_as_wer
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -187,7 +245,27 @@ def get_all_review_reports(
             annotation_type=REVIEWER_ANNOTATION,
             updated_at__range=[start_date, end_date],
         )
-
+    number_of_tasks_contributed_for_rs_wer = 0
+    rs_wer_score = 0
+    for ann in submitted_tasks:
+        all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        rs_done = False  # for duplicate annotations
+        for a in all_annotations:
+            sup_ann = ""
+            if (
+                a.annotation_type == SUPER_CHECKER_ANNOTATION
+                and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
+            ):
+                sup_ann = a
+            if sup_ann and not rs_done:
+                try:
+                    rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        sup_ann.result, ann.result
+                    )
+                    number_of_tasks_contributed_for_rs_wer += 1
+                    rs_done = True
+                except Exception as e:
+                    pass
     submitted_tasks_count = submitted_tasks.count()
 
     project_type_lower = project_type.lower()
@@ -240,6 +318,13 @@ def get_all_review_reports(
         "Word Count": total_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Word Error Rate Annotator Vs Superchecker": rs_wer_score
+        / number_of_tasks_contributed_for_rs_wer
+        if number_of_tasks_contributed_for_rs_wer
+        else 0,
+        "Cumulative Word Error Rate Annotator Vs Superchecker": rs_wer_score
+        if number_of_tasks_contributed_for_rs_wer
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -509,6 +594,7 @@ def send_user_reports_mail_ws(
     final_reports = sorted(final_reports, key=lambda x: x["Name"], reverse=False)
 
     df = pd.DataFrame.from_dict(final_reports)
+    df = df.fillna("NA")
 
     content = df.to_csv(index=False)
     content_type = "text/csv"

From 9589f3ed8ffaf0384616877d35c047f2ecb19b36 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 8 Jul 2024 11:43:20 +0000
Subject: [PATCH 2/2] minor fix

---
 backend/projects/utils.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index 678515b47..71c8fa982 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -215,8 +215,13 @@ def audio_word_count(annotation_result):
 def calculate_word_error_rate_between_two_audio_transcription_annotation(
     annotation_result1, annotation_result2
 ):
-    annotation_result1 = sorted(annotation_result1, key=lambda i: (i["value"]["end"]))
-    annotation_result2 = sorted(annotation_result2, key=lambda i: (i["value"]["end"]))
+    if "end" in annotation_result1[0]["value"]:
+        annotation_result1 = sorted(
+            annotation_result1, key=lambda i: (i["value"]["end"])
+        )
+        annotation_result2 = sorted(
+            annotation_result2, key=lambda i: (i["value"]["end"])
+        )
 
     annotation_result1_text = ""
     annotation_result2_text = ""