From 64df2db771b4d154d6017db7dfd5959acc841898 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Mon, 8 Jul 2024 10:12:50 +0000 Subject: [PATCH 1/2] added wer scores --- backend/projects/utils.py | 4 +- backend/workspaces/tasks.py | 88 ++++++++++++++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/backend/projects/utils.py b/backend/projects/utils.py index 9408d44ce..678515b47 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -222,7 +222,7 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation( annotation_result2_text = "" for result in annotation_result1: - if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]: + if "type" in result and result["type"] == "textarea": try: for s in result["value"]["text"]: annotation_result1_text += s @@ -230,7 +230,7 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation( pass for result in annotation_result2: - if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]: + if "type" in result and result["type"] == "textarea": try: for s in result["value"]["text"]: annotation_result2_text += s diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index e63273dea..9ffd8afb1 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -13,6 +13,11 @@ ANNOTATOR_ANNOTATION, REVIEWER_ANNOTATION, SUPER_CHECKER_ANNOTATION, + ACCEPTED, + ACCEPTED_WITH_MINOR_CHANGES, + ACCEPTED_WITH_MAJOR_CHANGES, + VALIDATED, + VALIDATED_WITH_CHANGES, ) from .models import Workspace from users.models import User @@ -66,6 +71,45 @@ def get_all_annotation_reports( completed_by=userid, updated_at__range=[start_date, end_date], ) + number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer = ( + 0, + 0, + ) + ar_wer_score, as_wer_score = 0, 0 + for ann in submitted_tasks: + all_annotations = Annotation.objects.filter(task_id=ann.task_id) + ar_done, as_done = False, False # for duplicate annotations + for a in all_annotations: + rev_ann, sup_ann = "", "" + if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [ + ACCEPTED, + ACCEPTED_WITH_MINOR_CHANGES, + ACCEPTED_WITH_MAJOR_CHANGES, + ]: + rev_ann = a + elif ( + a.annotation_type == SUPER_CHECKER_ANNOTATION + and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES] + ): + sup_ann = a + if rev_ann and not ar_done: + try: + ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( + rev_ann.result, ann.result + ) + number_of_tasks_contributed_for_ar_wer += 1 + ar_done = True + except Exception as e: + pass + if sup_ann and not as_done: + try: + as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( + sup_ann.result, ann.result + ) + number_of_tasks_contributed_for_as_wer += 1 + as_done = True + except Exception as e: + pass submitted_tasks_count = submitted_tasks.count() @@ -120,6 +164,20 @@ def get_all_annotation_reports( "Word Count": total_word_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, + "Average Word Error Rate Annotator Vs Reviewer": ar_wer_score + / number_of_tasks_contributed_for_ar_wer + if number_of_tasks_contributed_for_ar_wer + else 0, + "Cumulative Word Error Rate Annotator Vs Reviewer": ar_wer_score + if number_of_tasks_contributed_for_ar_wer + else 0, + "Average Word Error Rate Annotator Vs Superchecker": as_wer_score + / number_of_tasks_contributed_for_as_wer + if number_of_tasks_contributed_for_as_wer + else 0, + "Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score + if number_of_tasks_contributed_for_as_wer + else 0, } if project_type in get_audio_project_types() or project_type == "AllAudioProjects": @@ -187,7 +245,27 @@ def get_all_review_reports( annotation_type=REVIEWER_ANNOTATION, updated_at__range=[start_date, end_date], ) - + number_of_tasks_contributed_for_rs_wer = 0 + rs_wer_score = 0 + for ann in submitted_tasks: + all_annotations = Annotation.objects.filter(task_id=ann.task_id) + rs_done = False # for duplicate annotations + for a in all_annotations: + sup_ann = "" + if ( + a.annotation_type == SUPER_CHECKER_ANNOTATION + and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES] + ): + sup_ann = a + if sup_ann and not rs_done: + try: + rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( + sup_ann.result, ann.result + ) + number_of_tasks_contributed_for_rs_wer += 1 + rs_done = True + except Exception as e: + pass submitted_tasks_count = submitted_tasks.count() project_type_lower = project_type.lower() @@ -240,6 +318,13 @@ def get_all_review_reports( "Word Count": total_word_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, + "Average Word Error Rate Annotator Vs Superchecker": rs_wer_score + / number_of_tasks_contributed_for_rs_wer + if number_of_tasks_contributed_for_rs_wer + else 0, + "Cumulative Word Error Rate Annotator Vs Superchecker": rs_wer_score + if number_of_tasks_contributed_for_rs_wer + else 0, } if project_type in get_audio_project_types() or project_type == "AllAudioProjects": @@ -509,6 +594,7 @@ def send_user_reports_mail_ws( final_reports = sorted(final_reports, key=lambda x: x["Name"], reverse=False) df = pd.DataFrame.from_dict(final_reports) + df = df.fillna("NA") content = df.to_csv(index=False) content_type = "text/csv" From 9589f3ed8ffaf0384616877d35c047f2ecb19b36 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Mon, 8 Jul 2024 11:43:20 +0000 Subject: [PATCH 2/2] minor fix --- backend/projects/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/projects/utils.py b/backend/projects/utils.py index 678515b47..71c8fa982 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -215,8 +215,13 @@ def audio_word_count(annotation_result): def calculate_word_error_rate_between_two_audio_transcription_annotation( annotation_result1, annotation_result2 ): - annotation_result1 = sorted(annotation_result1, key=lambda i: (i["value"]["end"])) - annotation_result2 = sorted(annotation_result2, key=lambda i: (i["value"]["end"])) + if "end" in annotation_result1[0]["value"]: + annotation_result1 = sorted( + annotation_result1, key=lambda i: (i["value"]["end"]) + ) + annotation_result2 = sorted( + annotation_result2, key=lambda i: (i["value"]["end"]) + ) annotation_result1_text = "" annotation_result2_text = ""