Skip to content

Commit

Permalink
Merge branch 'final_changes_multiple_ann' into added_minor_fix_wer_draft
Browse files Browse the repository at this point in the history
  • Loading branch information
ishvindersethi22 authored Nov 22, 2024
2 parents 112afc8 + 462aa92 commit 6c9096f
Show file tree
Hide file tree
Showing 5 changed files with 405 additions and 173 deletions.
69 changes: 68 additions & 1 deletion backend/functions/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u
result_ann_meta_stats,
result_rev_meta_stats,
result_sup_meta_stats,
average_ann_vs_rev_WER,
average_rev_vs_sup_WER,
average_ann_vs_sup_WER,
) = get_stats_definitions()
for ann_obj in annotations:
if ann_obj.annotation_type == ANNOTATOR_ANNOTATION:
Expand All @@ -209,6 +212,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u
result_ann_meta_stats,
ann_obj,
project_type,
average_ann_vs_rev_WER,
average_rev_vs_sup_WER,
average_ann_vs_sup_WER,
)
except:
continue
Expand All @@ -222,6 +228,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u
result_rev_meta_stats,
ann_obj,
project_type,
average_ann_vs_rev_WER,
average_rev_vs_sup_WER,
average_ann_vs_sup_WER,
)
except:
continue
Expand All @@ -235,6 +244,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u
result_sup_meta_stats,
ann_obj,
project_type,
average_ann_vs_rev_WER,
average_rev_vs_sup_WER,
average_ann_vs_sup_WER,
)
except:
continue
Expand All @@ -248,6 +260,9 @@ def get_stats(proj_objs, anno_stats, meta_stats, complete_stats, project_type, u
anno_stats,
meta_stats,
complete_stats,
average_ann_vs_rev_WER,
average_rev_vs_sup_WER,
average_ann_vs_sup_WER,
proj.id,
user,
)
Expand Down Expand Up @@ -408,6 +423,9 @@ def get_stats_definitions():
result_ann_meta_stats,
result_rev_meta_stats,
result_sup_meta_stats,
[],
[],
[],
)


Expand All @@ -421,6 +439,9 @@ def get_modified_stats_result(
anno_stats,
meta_stats,
complete_stats,
average_ann_vs_rev_WER,
average_rev_vs_sup_WER,
average_ann_vs_sup_WER,
proj_id,
user,
):
Expand Down Expand Up @@ -468,6 +489,15 @@ def get_modified_stats_result(
.exclude(review_user=user.id)
.count()
)
result["Average Annotator VS Reviewer Word Error Rate"] = "{:.2f}".format(
get_average_of_a_list(average_ann_vs_rev_WER)
)
result["Average Reviewer VS Superchecker Word Error Rate"] = "{:.2f}".format(
get_average_of_a_list(average_rev_vs_sup_WER)
)
result["Average Annotator VS Superchecker Word Error Rate"] = "{:.2f}".format(
get_average_of_a_list(average_rev_vs_sup_WER)
)
return result


Expand All @@ -477,7 +507,7 @@ def get_average_of_a_list(arr):
total_sum = 0
total_length = 0
for num in arr:
if isinstance(num, int):
if isinstance(num, int) or isinstance(num, float):
total_sum += num
total_length += 1
return total_sum / total_length if total_length > 0 else 0
Expand Down Expand Up @@ -551,6 +581,9 @@ def get_stats_helper(
result_meta_stats,
ann_obj,
project_type,
average_ann_vs_rev_WER,
average_rev_vs_sup_WER,
average_ann_vs_sup_WER,
):
task_obj = ann_obj.task
task_data = task_obj.data
Expand All @@ -564,6 +597,40 @@ def get_stats_helper(
ann_obj,
project_type,
)
if task_obj.task_status == REVIEWED:
if ann_obj.annotation_type == REVIEWER_ANNOTATION:
try:
average_ann_vs_rev_WER.append(
calculate_wer_between_two_annotations(
get_most_recent_annotation(ann_obj).result,
get_most_recent_annotation(ann_obj.parent_annotation).result,
)
)
except Exception as error:
pass
elif task_obj.task_status == SUPER_CHECKED:
if ann_obj.annotation_type == SUPER_CHECKER_ANNOTATION:
try:
average_ann_vs_rev_WER.append(
calculate_wer_between_two_annotations(
get_most_recent_annotation(ann_obj.parent_annotation).result,
get_most_recent_annotation(
ann_obj.parent_annotation.parent_annotation
).result,
)
)
except Exception as error:
pass
try:
average_rev_vs_sup_WER.append(
calculate_wer_between_two_annotations(
get_most_recent_annotation(ann_obj).result,
get_most_recent_annotation(ann_obj.parent_annotation).result,
)
)
except Exception as error:
pass

return 0


Expand Down
41 changes: 11 additions & 30 deletions backend/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,29 +199,9 @@ def create_tasks_from_dataitems(items, project):
# Remove data id because it's not needed in task.data
if "id" in item:
del item["id"]
task = Task(data=item, project_id=project, input_data=data)
"""
if is_translation_project or dataset_type1 == "TranslationPair":
if is_conversation_project:
field_name = (
"source_conversation_json"
if is_editing_project
else "conversation_json"
)
task.data["word_count"] = conversation_wordcount(task.data[field_name])
task.data["sentence_count"] = conversation_sentence_count(
task.data[field_name]
)
else:
task.data["word_count"] = no_of_words(task.data["input_text"])
if is_audio_project:
indx = 0
for speaker in task.data["speakers_json"]:
field_name = "speaker_" + str(indx) + "_details"
task.data[field_name] = stringify_json(task.data["speakers_json"][indx])
indx += 1
"""
tasks.append(task)
for _ in range(project.required_annotators_per_task):
task = Task(data=item, project_id=project, input_data=data)
tasks.append(task)
# Bulk create the tasks
Task.objects.bulk_create(tasks)

Expand Down Expand Up @@ -424,19 +404,20 @@ def export_project_in_place(
# List for storing the annotated tasks that have been accepted as correct annotation
annotated_tasks = []
export_excluded_task_ids = []
required_annotators_per_task = project.required_annotators_per_task
# required_annotators_per_task = project.required_annotators_per_task
for task in tasks:
task_dict = model_to_dict(task)
# Rename keys to match label studio converter
# task_dict['id'] = task_dict['task_id']
# del task_dict['task_id']
ann_list = []
if required_annotators_per_task >= 2:
all_ann = Annotation.objects.filter(task=task)
for a in all_ann:
ann_list.append(a)
task_dict["annotations"] = ann_list
elif task.correct_annotation is not None:
# if required_annotators_per_task >= 2:
# all_ann = Annotation.objects.filter(task=task)
# for a in all_ann:
# ann_list.append(a)
# task_dict["annotations"] = ann_list
# elif task.correct_annotation is not None:
if task.correct_annotation is not None:
annotated_tasks.append(task)
annotation_dict = model_to_dict(task.correct_annotation)
# annotation_dict['result'] = annotation_dict['result_json']
Expand Down
164 changes: 148 additions & 16 deletions backend/projects/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from projects.models import Project
from rest_framework.response import Response
from rest_framework import status
from tasks.models import Annotation as Annotation_model
from tasks.models import Annotation as Annotation_model, LABELED, Task
from users.models import User

from dataset.models import Instruction, Interaction
Expand Down Expand Up @@ -189,27 +189,20 @@ def get_audio_segments_count(annotation_result):
def calculate_word_error_rate_between_two_audio_transcription_annotation(
annotation_result1, annotation_result2
):
annotation_result1 = sorted(annotation_result1, key=lambda i: (i["value"]["end"]))
annotation_result2 = sorted(annotation_result2, key=lambda i: (i["value"]["end"]))

annotation_result1_text = ""
annotation_result2_text = ""

for result in annotation_result1:
if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]:
try:
for s in result["value"]["text"]:
annotation_result1_text += s
except:
pass
try:
annotation_result1_text += result["prompt"]
except:
pass

for result in annotation_result2:
if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]:
try:
for s in result["value"]["text"]:
annotation_result2_text += s
except:
pass
try:
annotation_result2_text += result["prompt"]
except:
pass
if len(annotation_result1_text) == 0 or len(annotation_result2_text) == 0:
return 0
return wer(annotation_result1_text, annotation_result2_text)
Expand Down Expand Up @@ -384,3 +377,142 @@ def get_annotations_for_project(
return None, Response(
{"message": "Project id not provided"}, status=status.HTTP_400_BAD_REQUEST
)


def filter_tasks_for_review_filter_criteria(task_ids):
tasks_to_be_removed = set()
for task_id in task_ids:
task = Task.objects.filter(id=task_id)
try:
ann = Annotation.objects.filter(task=task[0], annotation_status=LABELED)
except Exception as e:
continue
try:
ann = ann[0]
except Exception as e:
pass
if not isinstance(ann.result, list):
continue
for r in ann.result:
if "model_responses_json" in r:
model_responses_json = r["model_responses_json"]
for mr in model_responses_json:
if "questions_response" in mr:
questions_response = mr["questions_response"]
for qr in questions_response:
if (
"review_filter_criteria" in qr["question"]
and "review_filter_values" in qr["question"]
and "response" in qr
):
response = qr["response"]
if not isinstance(response, list) or not isinstance(
qr["question"]["review_filter_values"], list
):
tasks_to_be_removed.add(task_id)
elif (
qr["question"]["review_filter_criteria"].lower()
== "equals"
):
if not check_matching_values_equal(
response, qr["question"]["review_filter_values"]
):
tasks_to_be_removed.add(task_id)
elif (
qr["question"]["review_filter_criteria"].lower()
== "not_equals"
):
if check_matching_values_equal(
response, qr["question"]["review_filter_values"]
):
tasks_to_be_removed.add(task_id)
elif (
qr["question"]["review_filter_criteria"].lower()
== "greater_than"
):
if not check_matching_values_greater(
response,
qr["question"]["review_filter_values"],
"greater_than",
):
tasks_to_be_removed.add(task_id)
elif (
qr["question"]["review_filter_criteria"].lower()
== "greater_than_equals"
):
if not check_matching_values_greater(
response,
qr["question"]["review_filter_values"],
"greater_than_equals",
):
tasks_to_be_removed.add(task_id)
elif (
qr["question"]["review_filter_criteria"].lower()
== "less_than"
):
if check_matching_values_greater(
response,
qr["question"]["review_filter_values"],
"greater_than_equals",
):
tasks_to_be_removed.add(task_id)
elif (
qr["question"]["review_filter_criteria"].lower()
== "less_than_equals"
):
if check_matching_values_greater(
response,
qr["question"]["review_filter_values"],
"greater_than",
):
tasks_to_be_removed.add(task_id)
task_ids = [t for t in task_ids if t not in tasks_to_be_removed]
return task_ids


def check_matching_values_equal(list1, list2):
processed_list1 = set()

for item in list1:
if isinstance(item, str):
processed_list1.add(item.lower())
elif isinstance(item, int):
processed_list1.add(float(item))

for item in list2:
if isinstance(item, str):
if item.lower() in processed_list1:
return True
elif isinstance(item, int):
if float(item) in processed_list1:
return True
return False


def check_matching_values_greater(list1, list2, criteria):
integers_list1, integers_list2 = [], []
for item1 in list1:
if isinstance(item1, int):
integers_list1.append(item1)
elif isinstance(item1, str):
if item1.isdigit():
integers_list1.append(int(item1))
for item2 in list2:
if isinstance(item2, int):
integers_list1.append(item2)
elif isinstance(item2, str):
if item2.isdigit():
integers_list1.append(int(item2))

if criteria == "greater_than":
for num1 in integers_list1:
for num2 in integers_list2:
if num1 > num2:
return True
return False
else:
for num1 in integers_list1:
for num2 in integers_list2:
if num1 >= num2:
return True
return False
Loading

0 comments on commit 6c9096f

Please sign in to comment.