Skip to content

Commit

Permalink
added minor changes for ocr_te
Browse files Browse the repository at this point in the history
  • Loading branch information
KunalTiwary committed Dec 12, 2024
1 parent 286b385 commit 8d9fd12
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 9 deletions.
17 changes: 14 additions & 3 deletions backend/projects/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,14 +361,18 @@ def process_speech_tasks(task, is_audio_segmentation, project_type):


def process_ocr_tasks(
task, is_OCRSegmentCategorization, is_OCRSegmentCategorizationEditing
task,
is_OCRSegmentCategorization,
is_OCRSegmentCategorizationEditing,
is_OCRTextlineSegmentation,
):
annotation_result = process_annotation_result(task)
process_ocr_results(
task,
annotation_result,
is_OCRSegmentCategorization,
is_OCRSegmentCategorizationEditing,
is_OCRTextlineSegmentation,
)


Expand Down Expand Up @@ -451,17 +455,24 @@ def process_ocr_results(
annotation_result,
is_OCRSegmentCategorization,
is_OCRSegmentCategorizationEditing,
is_OCRTextlineSegmentation,
):
from projects.views import convert_annotation_result_to_formatted_json

task["data"]["ocr_transcribed_json"] = convert_annotation_result_to_formatted_json(
annotation_result,
None,
False,
is_OCRSegmentCategorization or is_OCRSegmentCategorizationEditing,
is_OCRSegmentCategorization
or is_OCRSegmentCategorizationEditing
or is_OCRTextlineSegmentation,
False,
)
if is_OCRSegmentCategorization or is_OCRSegmentCategorizationEditing:
if (
is_OCRSegmentCategorization
or is_OCRSegmentCategorizationEditing
or is_OCRTextlineSegmentation
):
bboxes_relation_json = []
for ann in annotation_result:
if "type" in ann and ann["type"] == "relation":
Expand Down
14 changes: 11 additions & 3 deletions backend/projects/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ def get_review_reports(proj_id, userid, start_date, end_date):
"OCRTranscription",
"OCRSegmentCategorization",
"OCRSegmentCategorizationEditing",
"OCRTextlineSegmentation",
]:
result["Total Word Count"] = total_word_count
elif proj_type in get_audio_project_types():
Expand Down Expand Up @@ -650,6 +651,7 @@ def get_supercheck_reports(proj_id, userid, start_date, end_date):
"OCRTranscription",
"OCRSegmentCategorization",
"OCRSegmentCategorizationEditing",
"OCRTextlineSegmentation",
]:
result["Validated Word Count"] = validated_word_count
result["Validated With Changes Word Count"] = validated_with_changes_word_count
Expand Down Expand Up @@ -994,7 +996,7 @@ def convert_annotation_result_to_formatted_json(
annotation_result,
speakers_json,
is_SpeechConversation,
is_OCRSegmentCategorizationOROCRSegmentCategorizationEditing,
is_OCRSegmentCategorizationOROCRSegmentCategorizationEditingOROCRTextlineSegmentation,
is_acoustic=False,
):
transcribed_json = []
Expand Down Expand Up @@ -1090,14 +1092,18 @@ def convert_annotation_result_to_formatted_json(
acoustic_transcribed_json, ensure_ascii=False
)
else:
dicts = 2 if is_OCRSegmentCategorizationOROCRSegmentCategorizationEditing else 3
dicts = (
2
if is_OCRSegmentCategorizationOROCRSegmentCategorizationEditingOROCRTextlineSegmentation
else 3
)
for idx1 in range(0, len(annotation_result), dicts):
rectangle_dict = {}
labels_dict = {}
text_dict = {}
if isinstance(annotation_result[idx1], str):
annotation_result[idx1] = json.loads(annotation_result[idx1])
if is_OCRSegmentCategorizationOROCRSegmentCategorizationEditing:
if is_OCRSegmentCategorizationOROCRSegmentCategorizationEditingOROCRTextlineSegmentation:
custom_text_dict = {"value": {"text": ""}}
text_dict = json.dumps(custom_text_dict, indent=2)
for idx2 in range(idx1, idx1 + dicts):
Expand Down Expand Up @@ -4092,6 +4098,7 @@ def download(self, request, pk=None, *args, **kwargs):
is_OCRSegmentCategorizationEditing = (
project_type == "OCRSegmentCategorizationEditing"
)
is_OCRTextlineSegmentation = project_type == "OCRTextlineSegmentation"
is_OCRSegmentCategorization = project_type == "OCRSegmentCategorization"
for task in tasks:
try:
Expand Down Expand Up @@ -4123,6 +4130,7 @@ def download(self, request, pk=None, *args, **kwargs):
curr_task,
is_OCRSegmentCategorization,
is_OCRSegmentCategorizationEditing,
is_OCRTextlineSegmentation,
)
except Exception as e:
continue
Expand Down
10 changes: 7 additions & 3 deletions backend/tasks/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1754,13 +1754,17 @@ def partial_update(self, request, pk=None):
== "AcousticNormalisedTranscriptionEditing"
else False
)
is_ocr_sc_or_sce = (
is_ocr_sc_or_sce_or_ts = (
True
if annotation_obj.task.project_id.project_type
in ["OCRSegmentCategorization", "OCRSegmentCategorizationEditing"]
in [
"OCRSegmentCategorization",
"OCRSegmentCategorizationEditing",
"OCRTextlineSegmentation",
]
else False
)
if is_ocr_sc_or_sce and (
if is_ocr_sc_or_sce_or_ts_or_ts and (
"language" in request.data or "ocr_domain" in request.data
):
language = request.data.get("languages", [])
Expand Down

0 comments on commit 8d9fd12

Please sign in to comment.