From ae07eb7af5d488519d3d7c9a1109eb4269279950 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Tue, 12 Nov 2024 12:07:03 +0530
Subject: [PATCH 1/6] added final changes

---
 backend/projects/tasks.py |  41 +++------
 backend/projects/views.py | 177 ++++++++++++++++++++++----------------
 backend/tasks/views.py    | 104 +++++++++++-----------
 3 files changed, 167 insertions(+), 155 deletions(-)

diff --git a/backend/projects/tasks.py b/backend/projects/tasks.py
index a5e58a88..2e94838d 100644
--- a/backend/projects/tasks.py
+++ b/backend/projects/tasks.py
@@ -199,29 +199,9 @@ def create_tasks_from_dataitems(items, project):
         # Remove data id because it's not needed in task.data
         if "id" in item:
             del item["id"]
-        task = Task(data=item, project_id=project, input_data=data)
-        """
-        if is_translation_project or dataset_type1 == "TranslationPair":
-            if is_conversation_project:
-                field_name = (
-                    "source_conversation_json"
-                    if is_editing_project
-                    else "conversation_json"
-                )
-                task.data["word_count"] = conversation_wordcount(task.data[field_name])
-                task.data["sentence_count"] = conversation_sentence_count(
-                    task.data[field_name]
-                )
-            else:
-                task.data["word_count"] = no_of_words(task.data["input_text"])
-        if is_audio_project:
-            indx = 0
-            for speaker in task.data["speakers_json"]:
-                field_name = "speaker_" + str(indx) + "_details"
-                task.data[field_name] = stringify_json(task.data["speakers_json"][indx])
-                indx += 1
-        """
-        tasks.append(task)
+        for _ in range(project.required_annotators_per_task):
+            task = Task(data=item, project_id=project, input_data=data)
+            tasks.append(task)
     # Bulk create the tasks
     Task.objects.bulk_create(tasks)
 
@@ -424,19 +404,20 @@ def export_project_in_place(
     # List for storing the annotated tasks that have been accepted as correct annotation
     annotated_tasks = []
     export_excluded_task_ids = []
-    required_annotators_per_task = project.required_annotators_per_task
+    # required_annotators_per_task = project.required_annotators_per_task
     for task in tasks:
         task_dict = model_to_dict(task)
         # Rename keys to match label studio converter
         # task_dict['id'] = task_dict['task_id']
         # del task_dict['task_id']
         ann_list = []
-        if required_annotators_per_task >= 2:
-            all_ann = Annotation.objects.filter(task=task)
-            for a in all_ann:
-                ann_list.append(a)
-            task_dict["annotations"] = ann_list
-        elif task.correct_annotation is not None:
+        # if required_annotators_per_task >= 2:
+        #     all_ann = Annotation.objects.filter(task=task)
+        #     for a in all_ann:
+        #         ann_list.append(a)
+        #     task_dict["annotations"] = ann_list
+        # elif task.correct_annotation is not None:
+        if task.correct_annotation is not None:
             annotated_tasks.append(task)
             annotation_dict = model_to_dict(task.correct_annotation)
             # annotation_dict['result'] = annotation_dict['result_json']
diff --git a/backend/projects/views.py b/backend/projects/views.py
index be92604a..16631b2d 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -16,7 +16,7 @@
 from rest_framework.response import Response
 from users.models import LANG_CHOICES
 from users.serializers import UserEmailSerializer
-from dataset.serializers import TaskResultSerializer
+from dataset.serializers import TaskResultSerializer, DatasetInstanceSerializer
 from utils.search import process_search_query
 from django_celery_results.models import TaskResult
 from drf_yasg import openapi
@@ -1547,36 +1547,37 @@ def next(self, request, pk):
             task_ids = [an.task_id for an in ann_filter1]
 
             queryset = Task.objects.filter(id__in=task_ids).order_by("id")
-            required_annotators_per_task = project.required_annotators_per_task
-            next_anno = ""
-            if required_annotators_per_task > 1:
-                try:
-                    curr_anno_id = int(request.data.get("current_annotation_id"))
-                except Exception as e:
-                    ret_dict = {"message": "Please send the current_annotation_id"}
-                    ret_status = status.HTTP_400_BAD_REQUEST
-                    return Response(ret_dict, status=ret_status)
-                for task in queryset:
-                    curr_task_anno = ann_filter1.filter(task=task).order_by("id")
-                    ann_ids = [an.id for an in curr_task_anno]
-                    if curr_anno_id != ann_ids[-1]:
-                        for i, c in enumerate(ann_ids):
-                            if c == curr_anno_id:
-                                next_anno = ann_ids[i + 1]
-            if next_anno:
-                queryset = queryset.filter(id=current_task_id)
-            elif current_task_id != None:
+            # required_annotators_per_task = project.required_annotators_per_task
+            # next_anno = ""
+            # if required_annotators_per_task > 1:
+            #     try:
+            #         curr_anno_id = int(request.data.get("current_annotation_id"))
+            #     except Exception as e:
+            #         ret_dict = {"message": "Please send the current_annotation_id"}
+            #         ret_status = status.HTTP_400_BAD_REQUEST
+            #         return Response(ret_dict, status=ret_status)
+            #     for task in queryset:
+            #         curr_task_anno = ann_filter1.filter(task=task).order_by("id")
+            #         ann_ids = [an.id for an in curr_task_anno]
+            #         if curr_anno_id != ann_ids[-1]:
+            #             for i, c in enumerate(ann_ids):
+            #                 if c == curr_anno_id:
+            #                     next_anno = ann_ids[i + 1]
+            # if next_anno:
+            #     queryset = queryset.filter(id=current_task_id)
+            # elif current_task_id != None:
+            if current_task_id != None:
                 queryset = queryset.filter(id__gt=current_task_id)
             for task in queryset:
-                if next_anno:
-                    task_dict = TaskSerializer(task, many=False).data
-                    task_dict["correct_annotation"] = next_anno
-                    return Response(task_dict)
-                elif required_annotators_per_task > 1:
-                    next_anno = ann_filter1.filter(task=task).order_by("id")
-                    task_dict = TaskSerializer(task, many=False).data
-                    task_dict["correct_annotation"] = next_anno[0].id
-                    return Response(task_dict)
+                # if next_anno:
+                #     task_dict = TaskSerializer(task, many=False).data
+                #     task_dict["correct_annotation"] = next_anno
+                #     return Response(task_dict)
+                # elif required_annotators_per_task > 1:
+                #     next_anno = ann_filter1.filter(task=task).order_by("id")
+                #     task_dict = TaskSerializer(task, many=False).data
+                #     task_dict["correct_annotation"] = next_anno[0].id
+                #     return Response(task_dict)
                 task_dict = TaskSerializer(task, many=False).data
                 return Response(task_dict)
             ret_dict = {"message": "No more tasks available!"}
@@ -1948,34 +1949,39 @@ def assign_new_tasks(self, request, pk, *args, **kwargs):
                     project.max_tasks_per_user - tasks_assigned_to_user,
                     tasks_to_be_assigned,
                 )
+        (
+            data_items_of_unassigned_tasks,
+            data_items_of_assigned_tasks,
+            data_items_vs_tasks_map,
+        ) = (set(), set(), {})
+        for t in tasks:
+            if not t.annotation_users.all():
+                data_items_vs_tasks_map[t.input_data.id] = t
+                data_items_of_unassigned_tasks.add(t.input_data.id)
+        for anno in proj_annotations:
+            data_items_of_assigned_tasks.add(anno.task.input_data.id)
+        all_unassigned_data_items = (
+            data_items_of_unassigned_tasks - data_items_of_assigned_tasks
+        )
+        tasks = [data_items_vs_tasks_map[audt] for audt in all_unassigned_data_items]
         if max_task_that_can_be_assigned:
             tasks = tasks[:max_task_that_can_be_assigned]
         else:
             tasks = tasks[:tasks_to_be_assigned]
-        # tasks = tasks.order_by("id")
+        if not tasks:
+            project.release_lock(ANNOTATION_LOCK)
+            return Response(
+                {"message": "No tasks left for assignment in this project"},
+                status=status.HTTP_404_NOT_FOUND,
+            )
         for task in tasks:
             task.annotation_users.add(cur_user)
             task.save()
             result = []
-            if project.project_type in [
-                "AcousticNormalisedTranscriptionEditing",
-                "AudioTranscriptionEditing",
-                "OCRTranscriptionEditing",
-            ]:
-                try:
-                    result = convert_prediction_json_to_annotation_result(
-                        task.input_data.id, project.project_type
-                    )
-                except Exception as e:
-                    print(
-                        f"The prediction json of the data item-{task.input_data.id} is corrupt."
-                    )
-                    task.delete()
-                    continue
             annotator_anno_count = Annotation_model.objects.filter(
                 task_id=task, annotation_type=ANNOTATOR_ANNOTATION
             ).count()
-            if annotator_anno_count < project.required_annotators_per_task:
+            if annotator_anno_count == 0:
                 cur_user_anno_count = Annotation_model.objects.filter(
                     task_id=task,
                     annotation_type=ANNOTATOR_ANNOTATION,
@@ -2236,6 +2242,33 @@ def assign_new_review_tasks(self, request, pk, *args, **kwargs):
         task_ids = task_ids[:task_pull_count]
         seen = set()
         required_annotators_per_task = project.required_annotators_per_task
+        corrupted_tasks = set()
+        if required_annotators_per_task > 1:
+            seen_tasks = set(task_ids)
+            for i in range(len(task_ids)):
+                ti = task_ids[i]
+                t = Task.objects.get(id=ti)
+                similar_tasks = (
+                    Task.objects.filter(input_data=t.input_data, project_id=project.id)
+                    .filter(task_status=ANNOTATED)
+                    .filter(review_user__isnull=True)
+                    .exclude(id=t.id)
+                )
+                corrupt_tasks = (
+                    Task.objects.filter(input_data=t.input_data, project_id=project.id)
+                    .filter(task_status=INCOMPLETE)
+                    .filter(review_user__isnull=True)
+                    .exclude(id=t.id)
+                )
+                if corrupt_tasks:
+                    corrupted_tasks.add(task_ids[i])
+                    continue
+                for j in range(len(similar_tasks)):
+                    st = similar_tasks[j]
+                    if st.id not in seen_tasks:
+                        task_ids.append(st.id)
+        task_ids = [t for t in task_ids if t not in corrupted_tasks]
+        task_ids = task_ids[:task_pull_count]
         for task_id in task_ids:
             if task_id in seen:
                 continue
@@ -2254,26 +2287,25 @@ def assign_new_review_tasks(self, request, pk, *args, **kwargs):
             reviewer_anno_count = Annotation_model.objects.filter(
                 task_id=task_id, annotation_type=REVIEWER_ANNOTATION
             ).count()
-            for i in range(required_annotators_per_task):
-                if reviewer_anno_count == 0:
-                    base_annotation_obj = Annotation_model(
-                        result=rec_ann[i].result,
-                        task=task,
-                        completed_by=cur_user,
-                        annotation_status="unreviewed",
-                        parent_annotation=rec_ann[i],
-                        annotation_type=REVIEWER_ANNOTATION,
+            if reviewer_anno_count == 0:
+                base_annotation_obj = Annotation_model(
+                    result=rec_ann[0].result,
+                    task=task,
+                    completed_by=cur_user,
+                    annotation_status="unreviewed",
+                    parent_annotation=rec_ann[0],
+                    annotation_type=REVIEWER_ANNOTATION,
+                )
+                try:
+                    base_annotation_obj.save()
+                except IntegrityError as e:
+                    print(
+                        f"Task, completed_by and parent_annotation fields are same while assigning new review task "
+                        f"for project id-{project.id}, user-{cur_user.email}"
                     )
-                    try:
-                        base_annotation_obj.save()
-                    except IntegrityError as e:
-                        print(
-                            f"Task, completed_by and parent_annotation fields are same while assigning new review task "
-                            f"for project id-{project.id}, user-{cur_user.email}"
-                        )
-                else:
-                    task.review_user = reviewer_anno[i].completed_by
-                    task.save()
+            else:
+                task.review_user = reviewer_anno[i].completed_by
+                task.save()
         project.release_lock(REVIEW_LOCK)
         return Response(
             {"message": "Tasks assigned successfully"}, status=status.HTTP_200_OK
@@ -3669,7 +3701,7 @@ def download(self, request, pk=None, *args, **kwargs):
                 ret_status = status.HTTP_200_OK
                 return Response(ret_dict, status=ret_status)
             tasks_list = []
-            required_annotators_per_task = project.required_annotators_per_task
+            # required_annotators_per_task = project.required_annotators_per_task
             for task in tasks:
                 ann_list = []
                 task_dict = model_to_dict(task)
@@ -3693,17 +3725,18 @@ def download(self, request, pk=None, *args, **kwargs):
                     )[0]
 
                 annotator_email = ""
-                if correct_annotation is not None and required_annotators_per_task < 2:
+                # if correct_annotation is not None and required_annotators_per_task < 2:
+                if correct_annotation is not None:
                     try:
                         annotator_email = correct_annotation.completed_by.email
                     except:
                         pass
                     task_dict["annotations"] = [correct_annotation]
-                elif required_annotators_per_task >= 2:
-                    all_ann = Annotation.objects.filter(task=task)
-                    for a in all_ann:
-                        ann_list.append(a)
-                    task_dict["annotations"] = ann_list
+                # elif required_annotators_per_task >= 2:
+                #     all_ann = Annotation.objects.filter(task=task)
+                #     for a in all_ann:
+                #         ann_list.append(a)
+                #     task_dict["annotations"] = ann_list
                 else:
                     task_dict["annotations"] = []
 
diff --git a/backend/tasks/views.py b/backend/tasks/views.py
index 398bbd39..2f5e7046 100644
--- a/backend/tasks/views.py
+++ b/backend/tasks/views.py
@@ -216,7 +216,7 @@ def list(self, request, *args, **kwargs):
 
             if exist_req_user:
                 user_id = int(req_user)
-            required_annotators_per_task = proj_objs[0].required_annotators_per_task
+            # required_annotators_per_task = proj_objs[0].required_annotators_per_task
             if "annotation_status" in dict(request.query_params):
                 ann_status = request.query_params["annotation_status"]
                 ann_status = ast.literal_eval(ann_status)
@@ -389,32 +389,32 @@ def list(self, request, *args, **kwargs):
                         task_objs.sort(key=lambda x: x["id"])
                         ordered_tasks = []
                         final_dict = {}
-                        seen = set()
+                        # seen = set()
                         for task_obj in task_objs:
-                            if task_obj["id"] in seen:
-                                continue
-                            seen.add(task_obj["id"])
+                            # if task_obj["id"] in seen:
+                            #     continue
+                            # seen.add(task_obj["id"])
                             tas = Task.objects.filter(id=task_obj["id"])
                             tas = tas.values()[0]
                             tas["review_status"] = task_obj["annotation_status"]
                             tas["user_mail"] = task_obj["user_mail"]
-                            if required_annotators_per_task > 1:
-                                review_ann = [
-                                    a
-                                    for a in Annotation.objects.filter(
-                                        task_id=tas["id"]
-                                    ).order_by("id")
-                                    if a.annotation_type == REVIEWER_ANNOTATION
-                                ]
-                                if len(review_ann) > 1:
-                                    for r in review_ann:
-                                        tas_copy = deepcopy(tas)
-                                        tas_copy["correct_annotation_id"] = r.id
-                                        tas_copy[
-                                            "annotator_mail"
-                                        ] = r.parent_annotation.completed_by.email
-                                        ordered_tasks.append(tas_copy)
-                                    continue
+                            # if required_annotators_per_task > 1:
+                            #     review_ann = [
+                            #         a
+                            #         for a in Annotation.objects.filter(
+                            #             task_id=tas["id"]
+                            #         ).order_by("id")
+                            #         if a.annotation_type == REVIEWER_ANNOTATION
+                            #     ]
+                            #     if len(review_ann) > 1:
+                            #         for r in review_ann:
+                            #             tas_copy = deepcopy(tas)
+                            #             tas_copy["correct_annotation_id"] = r.id
+                            #             tas_copy[
+                            #                 "annotator_mail"
+                            #             ] = r.parent_annotation.completed_by.email
+                            #             ordered_tasks.append(tas_copy)
+                            #         continue
                             ordered_tasks.append(tas)
 
                         if page_number is not None:
@@ -507,11 +507,11 @@ def list(self, request, *args, **kwargs):
                 task_objs.sort(key=lambda x: x["id"])
                 ordered_tasks = []
                 final_dict = {}
-                seen = set()
+                # seen = set()
                 for task_obj in task_objs:
-                    if task_obj["id"] in seen:
-                        continue
-                    seen.add(task_obj["id"])
+                    # if task_obj["id"] in seen:
+                    #     continue
+                    # seen.add(task_obj["id"])
                     tas = Task.objects.filter(id=task_obj["id"])
                     tas = tas.values()[0]
                     tas["review_status"] = task_obj["annotation_status"]
@@ -559,23 +559,23 @@ def list(self, request, *args, **kwargs):
                             else:
                                 tas["data"]["output_text"] = "-"
                         del tas["data"]["machine_translation"]
-                    if required_annotators_per_task > 1:
-                        review_ann = [
-                            a
-                            for a in Annotation.objects.filter(
-                                task_id=tas["id"]
-                            ).order_by("id")
-                            if a.annotation_type == REVIEWER_ANNOTATION
-                        ]
-                        if len(review_ann) > 1:
-                            for r in review_ann:
-                                tas_copy = deepcopy(tas)
-                                tas_copy["correct_annotation_id"] = r.id
-                                tas_copy[
-                                    "annotator_mail"
-                                ] = r.parent_annotation.completed_by.email
-                                ordered_tasks.append(tas_copy)
-                            continue
+                    # if required_annotators_per_task > 1:
+                    #     review_ann = [
+                    #         a
+                    #         for a in Annotation.objects.filter(
+                    #             task_id=tas["id"]
+                    #         ).order_by("id")
+                    #         if a.annotation_type == REVIEWER_ANNOTATION
+                    #     ]
+                    #     if len(review_ann) > 1:
+                    #         for r in review_ann:
+                    #             tas_copy = deepcopy(tas)
+                    #             tas_copy["correct_annotation_id"] = r.id
+                    #             tas_copy[
+                    #                 "annotator_mail"
+                    #             ] = r.parent_annotation.completed_by.email
+                    #             ordered_tasks.append(tas_copy)
+                    #         continue
                     ordered_tasks.append(tas)
                 if page_number is not None:
                     page_object = Paginator(ordered_tasks, records)
@@ -1689,17 +1689,15 @@ def partial_update(self, request, pk=None):
                 no_of_annotations = task.annotations.filter(
                     annotation_type=ANNOTATOR_ANNOTATION, annotation_status="labeled"
                 ).count()
-                if task.project_id.required_annotators_per_task == no_of_annotations:
-                    # if True:
-                    task.task_status = ANNOTATED
-                    if not (
-                        task.project_id.project_stage == REVIEW_STAGE
-                        or task.project_id.project_stage == SUPERCHECK_STAGE
-                    ):
-                        if no_of_annotations == 1:
-                            task.correct_annotation = annotation
+                task.task_status = ANNOTATED
+                if not (
+                    task.project_id.project_stage == REVIEW_STAGE
+                    or task.project_id.project_stage == SUPERCHECK_STAGE
+                ):
+                    if no_of_annotations == 1:
+                        task.correct_annotation = annotation
 
-                    task.save()
+                task.save()
 
         # Review annotation update
         elif annotation_obj.annotation_type == REVIEWER_ANNOTATION:

From 7d141fb73b157e819c8d838a3e811948af08283e Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Wed, 20 Nov 2024 09:30:58 +0530
Subject: [PATCH 2/6] adding final changes for filter, review count and wer
 scores

---
 backend/functions/tasks.py |  69 ++++++++++++++++-
 backend/projects/utils.py  | 152 +++++++++++++++++++++++++++++++++----
 backend/projects/views.py  |  38 +++++++---
 3 files changed, 232 insertions(+), 27 deletions(-)

diff --git a/backend/functions/tasks.py b/backend/functions/tasks.py
index 9f30ef6e..c3dd79ae 100644
--- a/backend/functions/tasks.py
+++ b/backend/functions/tasks.py
@@ -196,6 +196,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u
             result_ann_meta_stats,
             result_rev_meta_stats,
             result_sup_meta_stats,
+            average_ann_vs_rev_WER,
+            average_rev_vs_sup_WER,
+            average_ann_vs_sup_WER,
         ) = get_stats_definitions()
         for ann_obj in annotations:
             if ann_obj.annotation_type == ANNOTATOR_ANNOTATION:
@@ -208,6 +211,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u
                         result_ann_meta_stats,
                         ann_obj,
                         project_type,
+                        average_ann_vs_rev_WER,
+                        average_rev_vs_sup_WER,
+                        average_ann_vs_sup_WER,
                     )
                 except:
                     continue
@@ -221,6 +227,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u
                         result_rev_meta_stats,
                         ann_obj,
                         project_type,
+                        average_ann_vs_rev_WER,
+                        average_rev_vs_sup_WER,
+                        average_ann_vs_sup_WER,
                     )
                 except:
                     continue
@@ -234,6 +243,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u
                         result_sup_meta_stats,
                         ann_obj,
                         project_type,
+                        average_ann_vs_rev_WER,
+                        average_rev_vs_sup_WER,
+                        average_ann_vs_sup_WER,
                     )
                 except:
                     continue
@@ -247,6 +259,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u
             anno_stats,
             meta_stats,
             complete_stats,
+            average_ann_vs_rev_WER,
+            average_rev_vs_sup_WER,
+            average_ann_vs_sup_WER,
             proj.id,
             user,
         )
@@ -407,6 +422,9 @@ def get_stats_definitions():
         result_ann_meta_stats,
         result_rev_meta_stats,
         result_sup_meta_stats,
+        [],
+        [],
+        [],
     )
 
 
@@ -420,6 +438,9 @@ def get_modified_stats_result(
     anno_stats,
     meta_stats,
     complete_stats,
+    average_ann_vs_rev_WER,
+    average_rev_vs_sup_WER,
+    average_ann_vs_sup_WER,
     proj_id,
     user,
 ):
@@ -467,6 +488,15 @@ def get_modified_stats_result(
         .exclude(review_user=user.id)
         .count()
     )
+    result["Average Annotator VS Reviewer Word Error Rate"] = "{:.2f}".format(
+        get_average_of_a_list(average_ann_vs_rev_WER)
+    )
+    result["Average Reviewer VS Superchecker Word Error Rate"] = "{:.2f}".format(
+        get_average_of_a_list(average_rev_vs_sup_WER)
+    )
+    result["Average Annotator VS Superchecker Word Error Rate"] = "{:.2f}".format(
+        get_average_of_a_list(average_rev_vs_sup_WER)
+    )
     return result
 
 
@@ -476,7 +506,7 @@ def get_average_of_a_list(arr):
     total_sum = 0
     total_length = 0
     for num in arr:
-        if isinstance(num, int):
+        if isinstance(num, int) or isinstance(num, float):
             total_sum += num
             total_length += 1
     return total_sum / total_length if total_length > 0 else 0
@@ -547,6 +577,9 @@ def get_stats_helper(
     result_meta_stats,
     ann_obj,
     project_type,
+    average_ann_vs_rev_WER,
+    average_rev_vs_sup_WER,
+    average_ann_vs_sup_WER,
 ):
     task_obj = ann_obj.task
     task_data = task_obj.data
@@ -560,6 +593,40 @@ def get_stats_helper(
         ann_obj,
         project_type,
     )
+    if task_obj.task_status == REVIEWED:
+        if ann_obj.annotation_type == REVIEWER_ANNOTATION:
+            try:
+                average_ann_vs_rev_WER.append(
+                    calculate_wer_between_two_annotations(
+                        get_most_recent_annotation(ann_obj).result,
+                        get_most_recent_annotation(ann_obj.parent_annotation).result,
+                    )
+                )
+            except Exception as error:
+                pass
+    elif task_obj.task_status == SUPER_CHECKED:
+        if ann_obj.annotation_type == SUPER_CHECKER_ANNOTATION:
+            try:
+                average_ann_vs_rev_WER.append(
+                    calculate_wer_between_two_annotations(
+                        get_most_recent_annotation(ann_obj.parent_annotation).result,
+                        get_most_recent_annotation(
+                            ann_obj.parent_annotation.parent_annotation
+                        ).result,
+                    )
+                )
+            except Exception as error:
+                pass
+            try:
+                average_rev_vs_sup_WER.append(
+                    calculate_wer_between_two_annotations(
+                        get_most_recent_annotation(ann_obj).result,
+                        get_most_recent_annotation(ann_obj.parent_annotation).result,
+                    )
+                )
+            except Exception as error:
+                pass
+
     return 0
 
 
diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index ad43562d..f38ba2f5 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -7,7 +7,7 @@
 from projects.models import Project
 from rest_framework.response import Response
 from rest_framework import status
-from tasks.models import Annotation as Annotation_model
+from tasks.models import Annotation as Annotation_model, LABELED, Task
 from users.models import User
 
 from dataset.models import Instruction, Interaction
@@ -189,27 +189,20 @@ def get_audio_segments_count(annotation_result):
 def calculate_word_error_rate_between_two_audio_transcription_annotation(
     annotation_result1, annotation_result2
 ):
-    annotation_result1 = sorted(annotation_result1, key=lambda i: (i["value"]["end"]))
-    annotation_result2 = sorted(annotation_result2, key=lambda i: (i["value"]["end"]))
-
     annotation_result1_text = ""
     annotation_result2_text = ""
 
     for result in annotation_result1:
-        if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]:
-            try:
-                for s in result["value"]["text"]:
-                    annotation_result1_text += s
-            except:
-                pass
+        try:
+            annotation_result1_text += result["prompt"]
+        except:
+            pass
 
     for result in annotation_result2:
-        if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]:
-            try:
-                for s in result["value"]["text"]:
-                    annotation_result2_text += s
-            except:
-                pass
+        try:
+            annotation_result2_text += result["prompt"]
+        except:
+            pass
     if len(annotation_result1_text) == 0 or len(annotation_result2_text) == 0:
         return 0
     return wer(annotation_result1_text, annotation_result2_text)
@@ -384,3 +377,130 @@ def get_annotations_for_project(
     return None, Response(
         {"message": "Project id not provided"}, status=status.HTTP_400_BAD_REQUEST
     )
+
+
+def filter_tasks_for_review_filter_criteria(task_ids):
+    tasks_to_be_removed = set()
+    for task_id in task_ids:
+        task = Task.objects.filter(id=task_id)
+        ann = Annotation.objects.filter(task=task[0], annotation_status=LABELED)[0]
+        for r in ann.result:
+            model_responses_json = r["model_responses_json"]
+            for mr in model_responses_json:
+                questions_response = mr["questions_response"]
+                for qr in questions_response:
+                    if (
+                        "review_filter_criteria" in qr["question"]
+                        and "review_filter_values" in qr["question"]
+                        and "response" in qr
+                    ):
+                        response = qr["response"]
+                        if not isinstance(response, list) or not isinstance(
+                            qr["question"]["review_filter_values"], list
+                        ):
+                            tasks_to_be_removed.add(task_id)
+                        elif (
+                            qr["question"]["review_filter_criteria"].lower() == "equals"
+                        ):
+                            if not check_matching_values_equal(
+                                response, qr["question"]["review_filter_values"]
+                            ):
+                                tasks_to_be_removed.add(task_id)
+                        elif (
+                            qr["question"]["review_filter_criteria"].lower()
+                            == "not_equals"
+                        ):
+                            if check_matching_values_equal(
+                                response, qr["question"]["review_filter_values"]
+                            ):
+                                tasks_to_be_removed.add(task_id)
+                        elif (
+                            qr["question"]["review_filter_criteria"].lower()
+                            == "greater_than"
+                        ):
+                            if not check_matching_values_greater(
+                                response,
+                                qr["question"]["review_filter_values"],
+                                "greater_than",
+                            ):
+                                tasks_to_be_removed.add(task_id)
+                        elif (
+                            qr["question"]["review_filter_criteria"].lower()
+                            == "greater_than_equals"
+                        ):
+                            if not check_matching_values_greater(
+                                response,
+                                qr["question"]["review_filter_values"],
+                                "greater_than_equals",
+                            ):
+                                tasks_to_be_removed.add(task_id)
+                        elif (
+                            qr["question"]["review_filter_criteria"].lower()
+                            == "less_than"
+                        ):
+                            if check_matching_values_greater(
+                                response,
+                                qr["question"]["review_filter_values"],
+                                "greater_than_equals",
+                            ):
+                                tasks_to_be_removed.add(task_id)
+                        elif (
+                            qr["question"]["review_filter_criteria"].lower()
+                            == "less_than_equals"
+                        ):
+                            if check_matching_values_greater(
+                                response,
+                                qr["question"]["review_filter_values"],
+                                "greater_than",
+                            ):
+                                tasks_to_be_removed.add(task_id)
+    task_ids = [t for t in task_ids if t not in tasks_to_be_removed]
+    return task_ids
+
+
+def check_matching_values_equal(list1, list2):
+    processed_list1 = set()
+
+    for item in list1:
+        if isinstance(item, str):
+            processed_list1.add(item.lower())
+        elif isinstance(item, int):
+            processed_list1.add(float(item))
+
+    for item in list2:
+        if isinstance(item, str):
+            if item.lower() in processed_list1:
+                return True
+        elif isinstance(item, int):
+            if float(item) in processed_list1:
+                return True
+    return False
+
+
+def check_matching_values_greater(list1, list2, criteria):
+    integers_list1, integers_list2 = [], []
+    for item1 in list1:
+        if isinstance(item1, int):
+            integers_list1.append(item1)
+        elif isinstance(item1, str):
+            if item1.isdigit():
+                integers_list1.append(int(item1))
+    for item2 in list2:
+        if isinstance(item2, int):
+            integers_list1.append(item2)
+        elif isinstance(item2, str):
+            if item2.isdigit():
+                integers_list1.append(int(item2))
+
+    if criteria == "greater_than":
+        for num1 in integers_list1:
+            for num2 in integers_list2:
+                if num1 > num2:
+                    return True
+        return False
+    else:
+        for num1 in integers_list1:
+            for num2 in integers_list2:
+                if num1 >= num2:
+                    return True
+        return False
diff --git a/backend/projects/views.py b/backend/projects/views.py
index 16631b2d..18876c28 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -8,7 +8,7 @@
 
 from django.core.files import File
 from django.db import IntegrityError
-from django.db.models import Count, Q, F, Case, When
+from django.db.models import Count, Q, F, Case, When, OuterRef, Exists
 from django.forms.models import model_to_dict
 from rest_framework import status, viewsets
 from rest_framework.decorators import action
@@ -46,6 +46,7 @@
     get_user_from_query_params,
     ocr_word_count,
     get_attributes_for_ModelInteractionEvaluation,
+    filter_tasks_for_review_filter_criteria,
 )
 
 from dataset.models import DatasetInstance
@@ -864,15 +865,31 @@ def retrieve(self, request, pk, *args, **kwargs):
         project_response.data["unassigned_task_count"] = get_task_count_unassigned(
             pk, request.user
         )
-
-        # Add a field to specify the no. of labeled tasks
-        project_response.data["labeled_task_count"] = (
-            Task.objects.filter(project_id=pk)
-            .filter(task_status=ANNOTATED)
-            .filter(review_user__isnull=True)
-            .exclude(annotation_users=request.user.id)
-            .count()
-        )
+        project = Project.objects.get(id=pk)
+        if project.required_annotators_per_task > 1:
+            similar_task_incomplete = Task.objects.filter(
+                project_id=OuterRef("project_id"),
+                input_data=OuterRef("input_data"),
+                task_status=INCOMPLETE,
+            ).exclude(id=OuterRef("id"))
+
+            tasks = (
+                Task.objects.filter(
+                    project_id=pk, task_status=ANNOTATED, review_user__isnull=True
+                )
+                .exclude(annotation_users=request.user.id)
+                .exclude(Exists(similar_task_incomplete))
+                .count()
+            )
+            project_response.data["labeled_task_count"] = tasks
+        else:
+            project_response.data["labeled_task_count"] = (
+                Task.objects.filter(project_id=pk)
+                .filter(task_status=ANNOTATED)
+                .filter(review_user__isnull=True)
+                .exclude(annotation_users=request.user.id)
+                .count()
+            )
 
         # Add a field to specify the no. of reviewed tasks
         project_response.data["reviewed_task_count"] = (
@@ -2269,6 +2286,7 @@ def assign_new_review_tasks(self, request, pk, *args, **kwargs):
                         task_ids.append(st.id)
         task_ids = [t for t in task_ids if t not in corrupted_tasks]
         task_ids = task_ids[:task_pull_count]
+        task_ids = filter_tasks_for_review_filter_criteria(task_ids)
         for task_id in task_ids:
             if task_id in seen:
                 continue

From 6d4d9884151e67fcd5225ee2eff7966db32020ab Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Wed, 20 Nov 2024 13:14:06 +0530
Subject: [PATCH 3/6] minor_fix

---
 backend/projects/utils.py | 141 ++++++++++++++++++++------------------
 1 file changed, 73 insertions(+), 68 deletions(-)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index f38ba2f5..0c2ce6a3 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -384,76 +384,81 @@ def filter_tasks_for_review_filter_criteria(task_ids):
     for task_id in task_ids:
         task = Task.objects.filter(id=task_id)
         ann = Annotation.objects.filter(task=task[0], annotation_status=LABELED)[0]
+        if not isinstance(ann.result, list):
+            continue
         for r in ann.result:
-            model_responses_json = r["model_responses_json"]
-            for mr in model_responses_json:
-                questions_response = mr["questions_response"]
-                for qr in questions_response:
-                    if (
-                        "review_filter_criteria" in qr["question"]
-                        and "review_filter_values" in qr["question"]
-                        and "response" in qr
-                    ):
-                        response = qr["response"]
-                        if not isinstance(response, list) or not isinstance(
-                            qr["question"]["review_filter_values"], list
-                        ):
-                            tasks_to_be_removed.add(task_id)
-                        elif (
-                            qr["question"]["review_filter_criteria"].lower() == "equals"
-                        ):
-                            if not check_matching_values_equal(
-                                response, qr["question"]["review_filter_values"]
+            if "model_responses_json" in r:
+                model_responses_json = r["model_responses_json"]
+                for mr in model_responses_json:
+                    if "questions_response" in mr:
+                        questions_response = mr["questions_response"]
+                        for qr in questions_response:
+                            if (
+                                "review_filter_criteria" in qr["question"]
+                                and "review_filter_values" in qr["question"]
+                                and "response" in qr
                             ):
-                                tasks_to_be_removed.add(task_id)
-                        elif (
-                            qr["question"]["review_filter_criteria"].lower()
-                            == "not_equals"
-                        ):
-                            if check_matching_values_equal(
-                                response, qr["question"]["review_filter_values"]
-                            ):
-                                tasks_to_be_removed.add(task_id)
-                        elif (
-                            qr["question"]["review_filter_criteria"].lower()
-                            == "greater_than"
-                        ):
-                            if not check_matching_values_greater(
-                                response,
-                                qr["question"]["review_filter_values"],
-                                "greater_than",
-                            ):
-                                tasks_to_be_removed.add(task_id)
-                        elif (
-                            qr["question"]["review_filter_criteria"].lower()
-                            == "greater_than_equals"
-                        ):
-                            if not check_matching_values_greater(
-                                response,
-                                qr["question"]["review_filter_values"],
-                                "greater_than_equals",
-                            ):
-                                tasks_to_be_removed.add(task_id)
-                        elif (
-                            qr["question"]["review_filter_criteria"].lower()
-                            == "less_than"
-                        ):
-                            if check_matching_values_greater(
-                                response,
-                                qr["question"]["review_filter_values"],
-                                "greater_than_equals",
-                            ):
-                                tasks_to_be_removed.add(task_id)
-                        elif (
-                            qr["question"]["review_filter_criteria"].lower()
-                            == "less_than_equals"
-                        ):
-                            if check_matching_values_greater(
-                                response,
-                                qr["question"]["review_filter_values"],
-                                "greater_than",
-                            ):
-                                tasks_to_be_removed.add(task_id)
+                                response = qr["response"]
+                                if not isinstance(response, list) or not isinstance(
+                                    qr["question"]["review_filter_values"], list
+                                ):
+                                    tasks_to_be_removed.add(task_id)
+                                elif (
+                                    qr["question"]["review_filter_criteria"].lower()
+                                    == "equals"
+                                ):
+                                    if not check_matching_values_equal(
+                                        response, qr["question"]["review_filter_values"]
+                                    ):
+                                        tasks_to_be_removed.add(task_id)
+                                elif (
+                                    qr["question"]["review_filter_criteria"].lower()
+                                    == "not_equals"
+                                ):
+                                    if check_matching_values_equal(
+                                        response, qr["question"]["review_filter_values"]
+                                    ):
+                                        tasks_to_be_removed.add(task_id)
+                                elif (
+                                    qr["question"]["review_filter_criteria"].lower()
+                                    == "greater_than"
+                                ):
+                                    if not check_matching_values_greater(
+                                        response,
+                                        qr["question"]["review_filter_values"],
+                                        "greater_than",
+                                    ):
+                                        tasks_to_be_removed.add(task_id)
+                                elif (
+                                    qr["question"]["review_filter_criteria"].lower()
+                                    == "greater_than_equals"
+                                ):
+                                    if not check_matching_values_greater(
+                                        response,
+                                        qr["question"]["review_filter_values"],
+                                        "greater_than_equals",
+                                    ):
+                                        tasks_to_be_removed.add(task_id)
+                                elif (
+                                    qr["question"]["review_filter_criteria"].lower()
+                                    == "less_than"
+                                ):
+                                    if check_matching_values_greater(
+                                        response,
+                                        qr["question"]["review_filter_values"],
+                                        "greater_than_equals",
+                                    ):
+                                        tasks_to_be_removed.add(task_id)
+                                elif (
+                                    qr["question"]["review_filter_criteria"].lower()
+                                    == "less_than_equals"
+                                ):
+                                    if check_matching_values_greater(
+                                        response,
+                                        qr["question"]["review_filter_values"],
+                                        "greater_than",
+                                    ):
+                                        tasks_to_be_removed.add(task_id)
     task_ids = [t for t in task_ids if t not in tasks_to_be_removed]
     return task_ids
 

From 9c29a14418166f45d6d32eed5059e5e390f97de2 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Wed, 20 Nov 2024 13:16:06 +0530
Subject: [PATCH 4/6] black formatting

---
 backend/tasks/urls.py  | 8 ++++----
 backend/tasks/views.py | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/backend/tasks/urls.py b/backend/tasks/urls.py
index 658eb5da..8a1a2ea1 100644
--- a/backend/tasks/urls.py
+++ b/backend/tasks/urls.py
@@ -17,8 +17,8 @@
 urlpatterns = [
     path("get_celery_tasks", get_celery_tasks),
     path(
-         "xlit-api/generic/transliteration/<str:target_language>/<str:data>",
-         TransliterationAPIView.as_view(),
-         name="transliteration-api",
-     ),
+        "xlit-api/generic/transliteration/<str:target_language>/<str:data>",
+        TransliterationAPIView.as_view(),
+        name="transliteration-api",
+    ),
 ] + router.urls
diff --git a/backend/tasks/views.py b/backend/tasks/views.py
index 24762864..3f7ccf13 100644
--- a/backend/tasks/views.py
+++ b/backend/tasks/views.py
@@ -2496,6 +2496,7 @@ def get_celery_tasks(request):
     data = paginate_queryset(filtered_tasks, page_number, page_size)
     return JsonResponse(data["results"], safe=False)
 
+
 class TransliterationAPIView(APIView):
     permission_classes = [IsAuthenticated]
 
@@ -2506,4 +2507,4 @@ def get(self, request, target_language, data, *args, **kwargs):
         )
 
         transliteration_output = response_transliteration.json()
-        return Response(transliteration_output, status=status.HTTP_200_OK)
\ No newline at end of file
+        return Response(transliteration_output, status=status.HTTP_200_OK)

From 670286f8290c459d5ba22515b2ff420a4eb4e502 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Thu, 21 Nov 2024 13:53:04 +0530
Subject: [PATCH 5/6] minor_fix_review_tasks

---
 backend/projects/utils.py | 5 ++++-
 backend/projects/views.py | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index 0c2ce6a3..b495a67d 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -383,7 +383,10 @@ def filter_tasks_for_review_filter_criteria(task_ids):
     tasks_to_be_removed = set()
     for task_id in task_ids:
         task = Task.objects.filter(id=task_id)
-        ann = Annotation.objects.filter(task=task[0], annotation_status=LABELED)[0]
+        try:
+            ann = Annotation.objects.filter(task=task[0], annotation_status=LABELED)
+        except Exception as e:
+            continue
         if not isinstance(ann.result, list):
             continue
         for r in ann.result:
diff --git a/backend/projects/views.py b/backend/projects/views.py
index 18876c28..557486ca 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -2286,7 +2286,8 @@ def assign_new_review_tasks(self, request, pk, *args, **kwargs):
                         task_ids.append(st.id)
         task_ids = [t for t in task_ids if t not in corrupted_tasks]
         task_ids = task_ids[:task_pull_count]
-        task_ids = filter_tasks_for_review_filter_criteria(task_ids)
+        if required_annotators_per_task > 1:
+            task_ids = filter_tasks_for_review_filter_criteria(task_ids)
         for task_id in task_ids:
             if task_id in seen:
                 continue

From 462aa92b92a0d0c343275806c395aa9b5411b759 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Fri, 22 Nov 2024 20:23:51 +0530
Subject: [PATCH 6/6] added minor_fix_390_putils

---
 backend/projects/utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index b495a67d..60599203 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -387,6 +387,10 @@ def filter_tasks_for_review_filter_criteria(task_ids):
             ann = Annotation.objects.filter(task=task[0], annotation_status=LABELED)
         except Exception as e:
             continue
+        try:
+            ann = ann[0]
+        except Exception as e:
+            pass
         if not isinstance(ann.result, list):
             continue
         for r in ann.result: