From ae07eb7af5d488519d3d7c9a1109eb4269279950 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Tue, 12 Nov 2024 12:07:03 +0530 Subject: [PATCH 1/6] added final changes --- backend/projects/tasks.py | 41 +++------ backend/projects/views.py | 177 ++++++++++++++++++++++---------------- backend/tasks/views.py | 104 +++++++++++----------- 3 files changed, 167 insertions(+), 155 deletions(-) diff --git a/backend/projects/tasks.py b/backend/projects/tasks.py index a5e58a88..2e94838d 100644 --- a/backend/projects/tasks.py +++ b/backend/projects/tasks.py @@ -199,29 +199,9 @@ def create_tasks_from_dataitems(items, project): # Remove data id because it's not needed in task.data if "id" in item: del item["id"] - task = Task(data=item, project_id=project, input_data=data) - """ - if is_translation_project or dataset_type1 == "TranslationPair": - if is_conversation_project: - field_name = ( - "source_conversation_json" - if is_editing_project - else "conversation_json" - ) - task.data["word_count"] = conversation_wordcount(task.data[field_name]) - task.data["sentence_count"] = conversation_sentence_count( - task.data[field_name] - ) - else: - task.data["word_count"] = no_of_words(task.data["input_text"]) - if is_audio_project: - indx = 0 - for speaker in task.data["speakers_json"]: - field_name = "speaker_" + str(indx) + "_details" - task.data[field_name] = stringify_json(task.data["speakers_json"][indx]) - indx += 1 - """ - tasks.append(task) + for _ in range(project.required_annotators_per_task): + task = Task(data=item, project_id=project, input_data=data) + tasks.append(task) # Bulk create the tasks Task.objects.bulk_create(tasks) @@ -424,19 +404,20 @@ def export_project_in_place( # List for storing the annotated tasks that have been accepted as correct annotation annotated_tasks = [] export_excluded_task_ids = [] - required_annotators_per_task = project.required_annotators_per_task + # required_annotators_per_task = project.required_annotators_per_task for task in tasks: task_dict = model_to_dict(task) # Rename keys to match label studio converter # task_dict['id'] = task_dict['task_id'] # del task_dict['task_id'] ann_list = [] - if required_annotators_per_task >= 2: - all_ann = Annotation.objects.filter(task=task) - for a in all_ann: - ann_list.append(a) - task_dict["annotations"] = ann_list - elif task.correct_annotation is not None: + # if required_annotators_per_task >= 2: + # all_ann = Annotation.objects.filter(task=task) + # for a in all_ann: + # ann_list.append(a) + # task_dict["annotations"] = ann_list + # elif task.correct_annotation is not None: + if task.correct_annotation is not None: annotated_tasks.append(task) annotation_dict = model_to_dict(task.correct_annotation) # annotation_dict['result'] = annotation_dict['result_json'] diff --git a/backend/projects/views.py b/backend/projects/views.py index be92604a..16631b2d 100644 --- a/backend/projects/views.py +++ b/backend/projects/views.py @@ -16,7 +16,7 @@ from rest_framework.response import Response from users.models import LANG_CHOICES from users.serializers import UserEmailSerializer -from dataset.serializers import TaskResultSerializer +from dataset.serializers import TaskResultSerializer, DatasetInstanceSerializer from utils.search import process_search_query from django_celery_results.models import TaskResult from drf_yasg import openapi @@ -1547,36 +1547,37 @@ def next(self, request, pk): task_ids = [an.task_id for an in ann_filter1] queryset = Task.objects.filter(id__in=task_ids).order_by("id") - required_annotators_per_task = project.required_annotators_per_task - next_anno = "" - if required_annotators_per_task > 1: - try: - curr_anno_id = int(request.data.get("current_annotation_id")) - except Exception as e: - ret_dict = {"message": "Please send the current_annotation_id"} - ret_status = status.HTTP_400_BAD_REQUEST - return Response(ret_dict, status=ret_status) - for task in queryset: - curr_task_anno = ann_filter1.filter(task=task).order_by("id") - ann_ids = [an.id for an in curr_task_anno] - if curr_anno_id != ann_ids[-1]: - for i, c in enumerate(ann_ids): - if c == curr_anno_id: - next_anno = ann_ids[i + 1] - if next_anno: - queryset = queryset.filter(id=current_task_id) - elif current_task_id != None: + # required_annotators_per_task = project.required_annotators_per_task + # next_anno = "" + # if required_annotators_per_task > 1: + # try: + # curr_anno_id = int(request.data.get("current_annotation_id")) + # except Exception as e: + # ret_dict = {"message": "Please send the current_annotation_id"} + # ret_status = status.HTTP_400_BAD_REQUEST + # return Response(ret_dict, status=ret_status) + # for task in queryset: + # curr_task_anno = ann_filter1.filter(task=task).order_by("id") + # ann_ids = [an.id for an in curr_task_anno] + # if curr_anno_id != ann_ids[-1]: + # for i, c in enumerate(ann_ids): + # if c == curr_anno_id: + # next_anno = ann_ids[i + 1] + # if next_anno: + # queryset = queryset.filter(id=current_task_id) + # elif current_task_id != None: + if current_task_id != None: queryset = queryset.filter(id__gt=current_task_id) for task in queryset: - if next_anno: - task_dict = TaskSerializer(task, many=False).data - task_dict["correct_annotation"] = next_anno - return Response(task_dict) - elif required_annotators_per_task > 1: - next_anno = ann_filter1.filter(task=task).order_by("id") - task_dict = TaskSerializer(task, many=False).data - task_dict["correct_annotation"] = next_anno[0].id - return Response(task_dict) + # if next_anno: + # task_dict = TaskSerializer(task, many=False).data + # task_dict["correct_annotation"] = next_anno + # return Response(task_dict) + # elif required_annotators_per_task > 1: + # next_anno = ann_filter1.filter(task=task).order_by("id") + # task_dict = TaskSerializer(task, many=False).data + # task_dict["correct_annotation"] = next_anno[0].id + # return Response(task_dict) task_dict = TaskSerializer(task, many=False).data return Response(task_dict) ret_dict = {"message": "No more tasks available!"} @@ -1948,34 +1949,39 @@ def assign_new_tasks(self, request, pk, *args, **kwargs): project.max_tasks_per_user - tasks_assigned_to_user, tasks_to_be_assigned, ) + ( + data_items_of_unassigned_tasks, + data_items_of_assigned_tasks, + data_items_vs_tasks_map, + ) = (set(), set(), {}) + for t in tasks: + if not t.annotation_users.all(): + data_items_vs_tasks_map[t.input_data.id] = t + data_items_of_unassigned_tasks.add(t.input_data.id) + for anno in proj_annotations: + data_items_of_assigned_tasks.add(anno.task.input_data.id) + all_unassigned_data_items = ( + data_items_of_unassigned_tasks - data_items_of_assigned_tasks + ) + tasks = [data_items_vs_tasks_map[audt] for audt in all_unassigned_data_items] if max_task_that_can_be_assigned: tasks = tasks[:max_task_that_can_be_assigned] else: tasks = tasks[:tasks_to_be_assigned] - # tasks = tasks.order_by("id") + if not tasks: + project.release_lock(ANNOTATION_LOCK) + return Response( + {"message": "No tasks left for assignment in this project"}, + status=status.HTTP_404_NOT_FOUND, + ) for task in tasks: task.annotation_users.add(cur_user) task.save() result = [] - if project.project_type in [ - "AcousticNormalisedTranscriptionEditing", - "AudioTranscriptionEditing", - "OCRTranscriptionEditing", - ]: - try: - result = convert_prediction_json_to_annotation_result( - task.input_data.id, project.project_type - ) - except Exception as e: - print( - f"The prediction json of the data item-{task.input_data.id} is corrupt." - ) - task.delete() - continue annotator_anno_count = Annotation_model.objects.filter( task_id=task, annotation_type=ANNOTATOR_ANNOTATION ).count() - if annotator_anno_count < project.required_annotators_per_task: + if annotator_anno_count == 0: cur_user_anno_count = Annotation_model.objects.filter( task_id=task, annotation_type=ANNOTATOR_ANNOTATION, @@ -2236,6 +2242,33 @@ def assign_new_review_tasks(self, request, pk, *args, **kwargs): task_ids = task_ids[:task_pull_count] seen = set() required_annotators_per_task = project.required_annotators_per_task + corrupted_tasks = set() + if required_annotators_per_task > 1: + seen_tasks = set(task_ids) + for i in range(len(task_ids)): + ti = task_ids[i] + t = Task.objects.get(id=ti) + similar_tasks = ( + Task.objects.filter(input_data=t.input_data, project_id=project.id) + .filter(task_status=ANNOTATED) + .filter(review_user__isnull=True) + .exclude(id=t.id) + ) + corrupt_tasks = ( + Task.objects.filter(input_data=t.input_data, project_id=project.id) + .filter(task_status=INCOMPLETE) + .filter(review_user__isnull=True) + .exclude(id=t.id) + ) + if corrupt_tasks: + corrupted_tasks.add(task_ids[i]) + continue + for j in range(len(similar_tasks)): + st = similar_tasks[j] + if st.id not in seen_tasks: + task_ids.append(st.id) + task_ids = [t for t in task_ids if t not in corrupted_tasks] + task_ids = task_ids[:task_pull_count] for task_id in task_ids: if task_id in seen: continue @@ -2254,26 +2287,25 @@ def assign_new_review_tasks(self, request, pk, *args, **kwargs): reviewer_anno_count = Annotation_model.objects.filter( task_id=task_id, annotation_type=REVIEWER_ANNOTATION ).count() - for i in range(required_annotators_per_task): - if reviewer_anno_count == 0: - base_annotation_obj = Annotation_model( - result=rec_ann[i].result, - task=task, - completed_by=cur_user, - annotation_status="unreviewed", - parent_annotation=rec_ann[i], - annotation_type=REVIEWER_ANNOTATION, + if reviewer_anno_count == 0: + base_annotation_obj = Annotation_model( + result=rec_ann[0].result, + task=task, + completed_by=cur_user, + annotation_status="unreviewed", + parent_annotation=rec_ann[0], + annotation_type=REVIEWER_ANNOTATION, + ) + try: + base_annotation_obj.save() + except IntegrityError as e: + print( + f"Task, completed_by and parent_annotation fields are same while assigning new review task " + f"for project id-{project.id}, user-{cur_user.email}" ) - try: - base_annotation_obj.save() - except IntegrityError as e: - print( - f"Task, completed_by and parent_annotation fields are same while assigning new review task " - f"for project id-{project.id}, user-{cur_user.email}" - ) - else: - task.review_user = reviewer_anno[i].completed_by - task.save() + else: + task.review_user = reviewer_anno[i].completed_by + task.save() project.release_lock(REVIEW_LOCK) return Response( {"message": "Tasks assigned successfully"}, status=status.HTTP_200_OK @@ -3669,7 +3701,7 @@ def download(self, request, pk=None, *args, **kwargs): ret_status = status.HTTP_200_OK return Response(ret_dict, status=ret_status) tasks_list = [] - required_annotators_per_task = project.required_annotators_per_task + # required_annotators_per_task = project.required_annotators_per_task for task in tasks: ann_list = [] task_dict = model_to_dict(task) @@ -3693,17 +3725,18 @@ def download(self, request, pk=None, *args, **kwargs): )[0] annotator_email = "" - if correct_annotation is not None and required_annotators_per_task < 2: + # if correct_annotation is not None and required_annotators_per_task < 2: + if correct_annotation is not None: try: annotator_email = correct_annotation.completed_by.email except: pass task_dict["annotations"] = [correct_annotation] - elif required_annotators_per_task >= 2: - all_ann = Annotation.objects.filter(task=task) - for a in all_ann: - ann_list.append(a) - task_dict["annotations"] = ann_list + # elif required_annotators_per_task >= 2: + # all_ann = Annotation.objects.filter(task=task) + # for a in all_ann: + # ann_list.append(a) + # task_dict["annotations"] = ann_list else: task_dict["annotations"] = [] diff --git a/backend/tasks/views.py b/backend/tasks/views.py index 398bbd39..2f5e7046 100644 --- a/backend/tasks/views.py +++ b/backend/tasks/views.py @@ -216,7 +216,7 @@ def list(self, request, *args, **kwargs): if exist_req_user: user_id = int(req_user) - required_annotators_per_task = proj_objs[0].required_annotators_per_task + # required_annotators_per_task = proj_objs[0].required_annotators_per_task if "annotation_status" in dict(request.query_params): ann_status = request.query_params["annotation_status"] ann_status = ast.literal_eval(ann_status) @@ -389,32 +389,32 @@ def list(self, request, *args, **kwargs): task_objs.sort(key=lambda x: x["id"]) ordered_tasks = [] final_dict = {} - seen = set() + # seen = set() for task_obj in task_objs: - if task_obj["id"] in seen: - continue - seen.add(task_obj["id"]) + # if task_obj["id"] in seen: + # continue + # seen.add(task_obj["id"]) tas = Task.objects.filter(id=task_obj["id"]) tas = tas.values()[0] tas["review_status"] = task_obj["annotation_status"] tas["user_mail"] = task_obj["user_mail"] - if required_annotators_per_task > 1: - review_ann = [ - a - for a in Annotation.objects.filter( - task_id=tas["id"] - ).order_by("id") - if a.annotation_type == REVIEWER_ANNOTATION - ] - if len(review_ann) > 1: - for r in review_ann: - tas_copy = deepcopy(tas) - tas_copy["correct_annotation_id"] = r.id - tas_copy[ - "annotator_mail" - ] = r.parent_annotation.completed_by.email - ordered_tasks.append(tas_copy) - continue + # if required_annotators_per_task > 1: + # review_ann = [ + # a + # for a in Annotation.objects.filter( + # task_id=tas["id"] + # ).order_by("id") + # if a.annotation_type == REVIEWER_ANNOTATION + # ] + # if len(review_ann) > 1: + # for r in review_ann: + # tas_copy = deepcopy(tas) + # tas_copy["correct_annotation_id"] = r.id + # tas_copy[ + # "annotator_mail" + # ] = r.parent_annotation.completed_by.email + # ordered_tasks.append(tas_copy) + # continue ordered_tasks.append(tas) if page_number is not None: @@ -507,11 +507,11 @@ def list(self, request, *args, **kwargs): task_objs.sort(key=lambda x: x["id"]) ordered_tasks = [] final_dict = {} - seen = set() + # seen = set() for task_obj in task_objs: - if task_obj["id"] in seen: - continue - seen.add(task_obj["id"]) + # if task_obj["id"] in seen: + # continue + # seen.add(task_obj["id"]) tas = Task.objects.filter(id=task_obj["id"]) tas = tas.values()[0] tas["review_status"] = task_obj["annotation_status"] @@ -559,23 +559,23 @@ def list(self, request, *args, **kwargs): else: tas["data"]["output_text"] = "-" del tas["data"]["machine_translation"] - if required_annotators_per_task > 1: - review_ann = [ - a - for a in Annotation.objects.filter( - task_id=tas["id"] - ).order_by("id") - if a.annotation_type == REVIEWER_ANNOTATION - ] - if len(review_ann) > 1: - for r in review_ann: - tas_copy = deepcopy(tas) - tas_copy["correct_annotation_id"] = r.id - tas_copy[ - "annotator_mail" - ] = r.parent_annotation.completed_by.email - ordered_tasks.append(tas_copy) - continue + # if required_annotators_per_task > 1: + # review_ann = [ + # a + # for a in Annotation.objects.filter( + # task_id=tas["id"] + # ).order_by("id") + # if a.annotation_type == REVIEWER_ANNOTATION + # ] + # if len(review_ann) > 1: + # for r in review_ann: + # tas_copy = deepcopy(tas) + # tas_copy["correct_annotation_id"] = r.id + # tas_copy[ + # "annotator_mail" + # ] = r.parent_annotation.completed_by.email + # ordered_tasks.append(tas_copy) + # continue ordered_tasks.append(tas) if page_number is not None: page_object = Paginator(ordered_tasks, records) @@ -1689,17 +1689,15 @@ def partial_update(self, request, pk=None): no_of_annotations = task.annotations.filter( annotation_type=ANNOTATOR_ANNOTATION, annotation_status="labeled" ).count() - if task.project_id.required_annotators_per_task == no_of_annotations: - # if True: - task.task_status = ANNOTATED - if not ( - task.project_id.project_stage == REVIEW_STAGE - or task.project_id.project_stage == SUPERCHECK_STAGE - ): - if no_of_annotations == 1: - task.correct_annotation = annotation + task.task_status = ANNOTATED + if not ( + task.project_id.project_stage == REVIEW_STAGE + or task.project_id.project_stage == SUPERCHECK_STAGE + ): + if no_of_annotations == 1: + task.correct_annotation = annotation - task.save() + task.save() # Review annotation update elif annotation_obj.annotation_type == REVIEWER_ANNOTATION: From 7d141fb73b157e819c8d838a3e811948af08283e Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Wed, 20 Nov 2024 09:30:58 +0530 Subject: [PATCH 2/6] adding final changes for filter, review count and wer scores --- backend/functions/tasks.py | 69 ++++++++++++++++- backend/projects/utils.py | 152 +++++++++++++++++++++++++++++++++---- backend/projects/views.py | 38 +++++++--- 3 files changed, 232 insertions(+), 27 deletions(-) diff --git a/backend/functions/tasks.py b/backend/functions/tasks.py index 9f30ef6e..c3dd79ae 100644 --- a/backend/functions/tasks.py +++ b/backend/functions/tasks.py @@ -196,6 +196,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u result_ann_meta_stats, result_rev_meta_stats, result_sup_meta_stats, + average_ann_vs_rev_WER, + average_rev_vs_sup_WER, + average_ann_vs_sup_WER, ) = get_stats_definitions() for ann_obj in annotations: if ann_obj.annotation_type == ANNOTATOR_ANNOTATION: @@ -208,6 +211,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u result_ann_meta_stats, ann_obj, project_type, + average_ann_vs_rev_WER, + average_rev_vs_sup_WER, + average_ann_vs_sup_WER, ) except: continue @@ -221,6 +227,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u result_rev_meta_stats, ann_obj, project_type, + average_ann_vs_rev_WER, + average_rev_vs_sup_WER, + average_ann_vs_sup_WER, ) except: continue @@ -234,6 +243,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u result_sup_meta_stats, ann_obj, project_type, + average_ann_vs_rev_WER, + average_rev_vs_sup_WER, + average_ann_vs_sup_WER, ) except: continue @@ -247,6 +259,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u anno_stats, meta_stats, complete_stats, + average_ann_vs_rev_WER, + average_rev_vs_sup_WER, + average_ann_vs_sup_WER, proj.id, user, ) @@ -407,6 +422,9 @@ def get_stats_definitions(): result_ann_meta_stats, result_rev_meta_stats, result_sup_meta_stats, + [], + [], + [], ) @@ -420,6 +438,9 @@ def get_modified_stats_result( anno_stats, meta_stats, complete_stats, + average_ann_vs_rev_WER, + average_rev_vs_sup_WER, + average_ann_vs_sup_WER, proj_id, user, ): @@ -467,6 +488,15 @@ def get_modified_stats_result( .exclude(review_user=user.id) .count() ) + result["Average Annotator VS Reviewer Word Error Rate"] = "{:.2f}".format( + get_average_of_a_list(average_ann_vs_rev_WER) + ) + result["Average Reviewer VS Superchecker Word Error Rate"] = "{:.2f}".format( + get_average_of_a_list(average_rev_vs_sup_WER) + ) + result["Average Annotator VS Superchecker Word Error Rate"] = "{:.2f}".format( + get_average_of_a_list(average_rev_vs_sup_WER) + ) return result @@ -476,7 +506,7 @@ def get_average_of_a_list(arr): total_sum = 0 total_length = 0 for num in arr: - if isinstance(num, int): + if isinstance(num, int) or isinstance(num, float): total_sum += num total_length += 1 return total_sum / total_length if total_length > 0 else 0 @@ -547,6 +577,9 @@ def get_stats_helper( result_meta_stats, ann_obj, project_type, + average_ann_vs_rev_WER, + average_rev_vs_sup_WER, + average_ann_vs_sup_WER, ): task_obj = ann_obj.task task_data = task_obj.data @@ -560,6 +593,40 @@ def get_stats_helper( ann_obj, project_type, ) + if task_obj.task_status == REVIEWED: + if ann_obj.annotation_type == REVIEWER_ANNOTATION: + try: + average_ann_vs_rev_WER.append( + calculate_wer_between_two_annotations( + get_most_recent_annotation(ann_obj).result, + get_most_recent_annotation(ann_obj.parent_annotation).result, + ) + ) + except Exception as error: + pass + elif task_obj.task_status == SUPER_CHECKED: + if ann_obj.annotation_type == SUPER_CHECKER_ANNOTATION: + try: + average_ann_vs_rev_WER.append( + calculate_wer_between_two_annotations( + get_most_recent_annotation(ann_obj.parent_annotation).result, + get_most_recent_annotation( + ann_obj.parent_annotation.parent_annotation + ).result, + ) + ) + except Exception as error: + pass + try: + average_rev_vs_sup_WER.append( + calculate_wer_between_two_annotations( + get_most_recent_annotation(ann_obj).result, + get_most_recent_annotation(ann_obj.parent_annotation).result, + ) + ) + except Exception as error: + pass + return 0 diff --git a/backend/projects/utils.py b/backend/projects/utils.py index ad43562d..f38ba2f5 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -7,7 +7,7 @@ from projects.models import Project from rest_framework.response import Response from rest_framework import status -from tasks.models import Annotation as Annotation_model +from tasks.models import Annotation as Annotation_model, LABELED, Task from users.models import User from dataset.models import Instruction, Interaction @@ -189,27 +189,20 @@ def get_audio_segments_count(annotation_result): def calculate_word_error_rate_between_two_audio_transcription_annotation( annotation_result1, annotation_result2 ): - annotation_result1 = sorted(annotation_result1, key=lambda i: (i["value"]["end"])) - annotation_result2 = sorted(annotation_result2, key=lambda i: (i["value"]["end"])) - annotation_result1_text = "" annotation_result2_text = "" for result in annotation_result1: - if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]: - try: - for s in result["value"]["text"]: - annotation_result1_text += s - except: - pass + try: + annotation_result1_text += result["prompt"] + except: + pass for result in annotation_result2: - if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]: - try: - for s in result["value"]["text"]: - annotation_result2_text += s - except: - pass + try: + annotation_result2_text += result["prompt"] + except: + pass if len(annotation_result1_text) == 0 or len(annotation_result2_text) == 0: return 0 return wer(annotation_result1_text, annotation_result2_text) @@ -384,3 +377,130 @@ def get_annotations_for_project( return None, Response( {"message": "Project id not provided"}, status=status.HTTP_400_BAD_REQUEST ) + + +def filter_tasks_for_review_filter_criteria(task_ids): + tasks_to_be_removed = set() + for task_id in task_ids: + task = Task.objects.filter(id=task_id) + ann = Annotation.objects.filter(task=task[0], annotation_status=LABELED)[0] + for r in ann.result: + model_responses_json = r["model_responses_json"] + for mr in model_responses_json: + questions_response = mr["questions_response"] + for qr in questions_response: + if ( + "review_filter_criteria" in qr["question"] + and "review_filter_values" in qr["question"] + and "response" in qr + ): + response = qr["response"] + if not isinstance(response, list) or not isinstance( + qr["question"]["review_filter_values"], list + ): + tasks_to_be_removed.add(task_id) + elif ( + qr["question"]["review_filter_criteria"].lower() == "equals" + ): + if not check_matching_values_equal( + response, qr["question"]["review_filter_values"] + ): + tasks_to_be_removed.add(task_id) + elif ( + qr["question"]["review_filter_criteria"].lower() + == "not_equals" + ): + if check_matching_values_equal( + response, qr["question"]["review_filter_values"] + ): + tasks_to_be_removed.add(task_id) + elif ( + qr["question"]["review_filter_criteria"].lower() + == "greater_than" + ): + if not check_matching_values_greater( + response, + qr["question"]["review_filter_values"], + "greater_than", + ): + tasks_to_be_removed.add(task_id) + elif ( + qr["question"]["review_filter_criteria"].lower() + == "greater_than_equals" + ): + if not check_matching_values_greater( + response, + qr["question"]["review_filter_values"], + "greater_than_equals", + ): + tasks_to_be_removed.add(task_id) + elif ( + qr["question"]["review_filter_criteria"].lower() + == "less_than" + ): + if check_matching_values_greater( + response, + qr["question"]["review_filter_values"], + "greater_than_equals", + ): + tasks_to_be_removed.add(task_id) + elif ( + qr["question"]["review_filter_criteria"].lower() + == "less_than_equals" + ): + if check_matching_values_greater( + response, + qr["question"]["review_filter_values"], + "greater_than", + ): + tasks_to_be_removed.add(task_id) + task_ids = [t for t in task_ids if t not in tasks_to_be_removed] + return task_ids + + +def check_matching_values_equal(list1, list2): + processed_list1 = set() + + for item in list1: + if isinstance(item, str): + processed_list1.add(item.lower()) + elif isinstance(item, int): + processed_list1.add(float(item)) + + for item in list2: + if isinstance(item, str): + if item.lower() in processed_list1: + return True + elif isinstance(item, int): + if float(item) in processed_list1: + return True + return False + + +def check_matching_values_greater(list1, list2, criteria): + integers_list1, integers_list2 = [], [] + for item1 in list1: + if isinstance(item1, int): + integers_list1.append(item1) + elif isinstance(item1, str): + if item1.isdigit(): + integers_list1.append(int(item1)) + for item2 in list2: + if isinstance(item2, int): + integers_list1.append(item2) + elif isinstance(item2, str): + if item2.isdigit(): + integers_list1.append(int(item2)) + + if criteria == "greater_than": + for num1 in integers_list1: + for num2 in integers_list2: + if num1 > num2: + return True + return False + else: + for num1 in integers_list1: + for num2 in integers_list2: + if num1 >= num2: + return True + return False diff --git a/backend/projects/views.py b/backend/projects/views.py index 16631b2d..18876c28 100644 --- a/backend/projects/views.py +++ b/backend/projects/views.py @@ -8,7 +8,7 @@ from django.core.files import File from django.db import IntegrityError -from django.db.models import Count, Q, F, Case, When +from django.db.models import Count, Q, F, Case, When, OuterRef, Exists from django.forms.models import model_to_dict from rest_framework import status, viewsets from rest_framework.decorators import action @@ -46,6 +46,7 @@ get_user_from_query_params, ocr_word_count, get_attributes_for_ModelInteractionEvaluation, + filter_tasks_for_review_filter_criteria, ) from dataset.models import DatasetInstance @@ -864,15 +865,31 @@ def retrieve(self, request, pk, *args, **kwargs): project_response.data["unassigned_task_count"] = get_task_count_unassigned( pk, request.user ) - - # Add a field to specify the no. of labeled tasks - project_response.data["labeled_task_count"] = ( - Task.objects.filter(project_id=pk) - .filter(task_status=ANNOTATED) - .filter(review_user__isnull=True) - .exclude(annotation_users=request.user.id) - .count() - ) + project = Project.objects.get(id=pk) + if project.required_annotators_per_task > 1: + similar_task_incomplete = Task.objects.filter( + project_id=OuterRef("project_id"), + input_data=OuterRef("input_data"), + task_status=INCOMPLETE, + ).exclude(id=OuterRef("id")) + + tasks = ( + Task.objects.filter( + project_id=pk, task_status=ANNOTATED, review_user__isnull=True + ) + .exclude(annotation_users=request.user.id) + .exclude(Exists(similar_task_incomplete)) + .count() + ) + project_response.data["labeled_task_count"] = tasks + else: + project_response.data["labeled_task_count"] = ( + Task.objects.filter(project_id=pk) + .filter(task_status=ANNOTATED) + .filter(review_user__isnull=True) + .exclude(annotation_users=request.user.id) + .count() + ) # Add a field to specify the no. of reviewed tasks project_response.data["reviewed_task_count"] = ( @@ -2269,6 +2286,7 @@ def assign_new_review_tasks(self, request, pk, *args, **kwargs): task_ids.append(st.id) task_ids = [t for t in task_ids if t not in corrupted_tasks] task_ids = task_ids[:task_pull_count] + task_ids = filter_tasks_for_review_filter_criteria(task_ids) for task_id in task_ids: if task_id in seen: continue From 6d4d9884151e67fcd5225ee2eff7966db32020ab Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Wed, 20 Nov 2024 13:14:06 +0530 Subject: [PATCH 3/6] minor_fix --- backend/projects/utils.py | 141 ++++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 68 deletions(-) diff --git a/backend/projects/utils.py b/backend/projects/utils.py index f38ba2f5..0c2ce6a3 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -384,76 +384,81 @@ def filter_tasks_for_review_filter_criteria(task_ids): for task_id in task_ids: task = Task.objects.filter(id=task_id) ann = Annotation.objects.filter(task=task[0], annotation_status=LABELED)[0] + if not isinstance(ann.result, list): + continue for r in ann.result: - model_responses_json = r["model_responses_json"] - for mr in model_responses_json: - questions_response = mr["questions_response"] - for qr in questions_response: - if ( - "review_filter_criteria" in qr["question"] - and "review_filter_values" in qr["question"] - and "response" in qr - ): - response = qr["response"] - if not isinstance(response, list) or not isinstance( - qr["question"]["review_filter_values"], list - ): - tasks_to_be_removed.add(task_id) - elif ( - qr["question"]["review_filter_criteria"].lower() == "equals" - ): - if not check_matching_values_equal( - response, qr["question"]["review_filter_values"] + if "model_responses_json" in r: + model_responses_json = r["model_responses_json"] + for mr in model_responses_json: + if "questions_response" in mr: + questions_response = mr["questions_response"] + for qr in questions_response: + if ( + "review_filter_criteria" in qr["question"] + and "review_filter_values" in qr["question"] + and "response" in qr ): - tasks_to_be_removed.add(task_id) - elif ( - qr["question"]["review_filter_criteria"].lower() - == "not_equals" - ): - if check_matching_values_equal( - response, qr["question"]["review_filter_values"] - ): - tasks_to_be_removed.add(task_id) - elif ( - qr["question"]["review_filter_criteria"].lower() - == "greater_than" - ): - if not check_matching_values_greater( - response, - qr["question"]["review_filter_values"], - "greater_than", - ): - tasks_to_be_removed.add(task_id) - elif ( - qr["question"]["review_filter_criteria"].lower() - == "greater_than_equals" - ): - if not check_matching_values_greater( - response, - qr["question"]["review_filter_values"], - "greater_than_equals", - ): - tasks_to_be_removed.add(task_id) - elif ( - qr["question"]["review_filter_criteria"].lower() - == "less_than" - ): - if check_matching_values_greater( - response, - qr["question"]["review_filter_values"], - "greater_than_equals", - ): - tasks_to_be_removed.add(task_id) - elif ( - qr["question"]["review_filter_criteria"].lower() - == "less_than_equals" - ): - if check_matching_values_greater( - response, - qr["question"]["review_filter_values"], - "greater_than", - ): - tasks_to_be_removed.add(task_id) + response = qr["response"] + if not isinstance(response, list) or not isinstance( + qr["question"]["review_filter_values"], list + ): + tasks_to_be_removed.add(task_id) + elif ( + qr["question"]["review_filter_criteria"].lower() + == "equals" + ): + if not check_matching_values_equal( + response, qr["question"]["review_filter_values"] + ): + tasks_to_be_removed.add(task_id) + elif ( + qr["question"]["review_filter_criteria"].lower() + == "not_equals" + ): + if check_matching_values_equal( + response, qr["question"]["review_filter_values"] + ): + tasks_to_be_removed.add(task_id) + elif ( + qr["question"]["review_filter_criteria"].lower() + == "greater_than" + ): + if not check_matching_values_greater( + response, + qr["question"]["review_filter_values"], + "greater_than", + ): + tasks_to_be_removed.add(task_id) + elif ( + qr["question"]["review_filter_criteria"].lower() + == "greater_than_equals" + ): + if not check_matching_values_greater( + response, + qr["question"]["review_filter_values"], + "greater_than_equals", + ): + tasks_to_be_removed.add(task_id) + elif ( + qr["question"]["review_filter_criteria"].lower() + == "less_than" + ): + if check_matching_values_greater( + response, + qr["question"]["review_filter_values"], + "greater_than_equals", + ): + tasks_to_be_removed.add(task_id) + elif ( + qr["question"]["review_filter_criteria"].lower() + == "less_than_equals" + ): + if check_matching_values_greater( + response, + qr["question"]["review_filter_values"], + "greater_than", + ): + tasks_to_be_removed.add(task_id) task_ids = [t for t in task_ids if t not in tasks_to_be_removed] return task_ids From 9c29a14418166f45d6d32eed5059e5e390f97de2 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Wed, 20 Nov 2024 13:16:06 +0530 Subject: [PATCH 4/6] black formatting --- backend/tasks/urls.py | 8 ++++---- backend/tasks/views.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/backend/tasks/urls.py b/backend/tasks/urls.py index 658eb5da..8a1a2ea1 100644 --- a/backend/tasks/urls.py +++ b/backend/tasks/urls.py @@ -17,8 +17,8 @@ urlpatterns = [ path("get_celery_tasks", get_celery_tasks), path( - "xlit-api/generic/transliteration//", - TransliterationAPIView.as_view(), - name="transliteration-api", - ), + "xlit-api/generic/transliteration//", + TransliterationAPIView.as_view(), + name="transliteration-api", + ), ] + router.urls diff --git a/backend/tasks/views.py b/backend/tasks/views.py index 24762864..3f7ccf13 100644 --- a/backend/tasks/views.py +++ b/backend/tasks/views.py @@ -2496,6 +2496,7 @@ def get_celery_tasks(request): data = paginate_queryset(filtered_tasks, page_number, page_size) return JsonResponse(data["results"], safe=False) + class TransliterationAPIView(APIView): permission_classes = [IsAuthenticated] @@ -2506,4 +2507,4 @@ def get(self, request, target_language, data, *args, **kwargs): ) transliteration_output = response_transliteration.json() - return Response(transliteration_output, status=status.HTTP_200_OK) \ No newline at end of file + return Response(transliteration_output, status=status.HTTP_200_OK) From 670286f8290c459d5ba22515b2ff420a4eb4e502 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Thu, 21 Nov 2024 13:53:04 +0530 Subject: [PATCH 5/6] minor_fix_review_tasks --- backend/projects/utils.py | 5 ++++- backend/projects/views.py | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/backend/projects/utils.py b/backend/projects/utils.py index 0c2ce6a3..b495a67d 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -383,7 +383,10 @@ def filter_tasks_for_review_filter_criteria(task_ids): tasks_to_be_removed = set() for task_id in task_ids: task = Task.objects.filter(id=task_id) - ann = Annotation.objects.filter(task=task[0], annotation_status=LABELED)[0] + try: + ann = Annotation.objects.filter(task=task[0], annotation_status=LABELED) + except Exception as e: + continue if not isinstance(ann.result, list): continue for r in ann.result: diff --git a/backend/projects/views.py b/backend/projects/views.py index 18876c28..557486ca 100644 --- a/backend/projects/views.py +++ b/backend/projects/views.py @@ -2286,7 +2286,8 @@ def assign_new_review_tasks(self, request, pk, *args, **kwargs): task_ids.append(st.id) task_ids = [t for t in task_ids if t not in corrupted_tasks] task_ids = task_ids[:task_pull_count] - task_ids = filter_tasks_for_review_filter_criteria(task_ids) + if required_annotators_per_task > 1: + task_ids = filter_tasks_for_review_filter_criteria(task_ids) for task_id in task_ids: if task_id in seen: continue From 462aa92b92a0d0c343275806c395aa9b5411b759 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Fri, 22 Nov 2024 20:23:51 +0530 Subject: [PATCH 6/6] added minor_fix_390_putils --- backend/projects/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/projects/utils.py b/backend/projects/utils.py index b495a67d..60599203 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -387,6 +387,10 @@ def filter_tasks_for_review_filter_criteria(task_ids): ann = Annotation.objects.filter(task=task[0], annotation_status=LABELED) except Exception as e: continue + try: + ann = ann[0] + except Exception as e: + pass if not isinstance(ann.result, list): continue for r in ann.result: