diff --git a/backend/projects/utils.py b/backend/projects/utils.py index 4987ed878..2b6e091ed 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -361,7 +361,10 @@ def process_speech_tasks(task, is_audio_segmentation, project_type): def process_ocr_tasks( - task, is_OCRSegmentCategorization, is_OCRSegmentCategorizationEditing + task, + is_OCRSegmentCategorization, + is_OCRSegmentCategorizationEditing, + is_OCRTextlineSegmentation, ): annotation_result = process_annotation_result(task) process_ocr_results( @@ -369,6 +372,7 @@ def process_ocr_tasks( annotation_result, is_OCRSegmentCategorization, is_OCRSegmentCategorizationEditing, + is_OCRTextlineSegmentation, ) @@ -451,6 +455,7 @@ def process_ocr_results( annotation_result, is_OCRSegmentCategorization, is_OCRSegmentCategorizationEditing, + is_OCRTextlineSegmentation, ): from projects.views import convert_annotation_result_to_formatted_json @@ -458,10 +463,16 @@ def process_ocr_results( annotation_result, None, False, - is_OCRSegmentCategorization or is_OCRSegmentCategorizationEditing, + is_OCRSegmentCategorization + or is_OCRSegmentCategorizationEditing + or is_OCRTextlineSegmentation, False, ) - if is_OCRSegmentCategorization or is_OCRSegmentCategorizationEditing: + if ( + is_OCRSegmentCategorization + or is_OCRSegmentCategorizationEditing + or is_OCRTextlineSegmentation + ): bboxes_relation_json = [] for ann in annotation_result: if "type" in ann and ann["type"] == "relation": diff --git a/backend/projects/views.py b/backend/projects/views.py index cb48392f9..4c36887c8 100644 --- a/backend/projects/views.py +++ b/backend/projects/views.py @@ -402,6 +402,7 @@ def get_review_reports(proj_id, userid, start_date, end_date): "OCRTranscription", "OCRSegmentCategorization", "OCRSegmentCategorizationEditing", + "OCRTextlineSegmentation", ]: result["Total Word Count"] = total_word_count elif proj_type in get_audio_project_types(): @@ -650,6 +651,7 @@ def get_supercheck_reports(proj_id, userid, start_date, end_date): "OCRTranscription", "OCRSegmentCategorization", "OCRSegmentCategorizationEditing", + "OCRTextlineSegmentation", ]: result["Validated Word Count"] = validated_word_count result["Validated With Changes Word Count"] = validated_with_changes_word_count @@ -994,7 +996,7 @@ def convert_annotation_result_to_formatted_json( annotation_result, speakers_json, is_SpeechConversation, - is_OCRSegmentCategorizationOROCRSegmentCategorizationEditing, + is_OCRSegmentCategorizationOROCRSegmentCategorizationEditingOROCRTextlineSegmentation, is_acoustic=False, ): transcribed_json = [] @@ -1090,14 +1092,18 @@ def convert_annotation_result_to_formatted_json( acoustic_transcribed_json, ensure_ascii=False ) else: - dicts = 2 if is_OCRSegmentCategorizationOROCRSegmentCategorizationEditing else 3 + dicts = ( + 2 + if is_OCRSegmentCategorizationOROCRSegmentCategorizationEditingOROCRTextlineSegmentation + else 3 + ) for idx1 in range(0, len(annotation_result), dicts): rectangle_dict = {} labels_dict = {} text_dict = {} if isinstance(annotation_result[idx1], str): annotation_result[idx1] = json.loads(annotation_result[idx1]) - if is_OCRSegmentCategorizationOROCRSegmentCategorizationEditing: + if is_OCRSegmentCategorizationOROCRSegmentCategorizationEditingOROCRTextlineSegmentation: custom_text_dict = {"value": {"text": ""}} text_dict = json.dumps(custom_text_dict, indent=2) for idx2 in range(idx1, idx1 + dicts): @@ -4092,6 +4098,7 @@ def download(self, request, pk=None, *args, **kwargs): is_OCRSegmentCategorizationEditing = ( project_type == "OCRSegmentCategorizationEditing" ) + is_OCRTextlineSegmentation = project_type == "OCRTextlineSegmentation" is_OCRSegmentCategorization = project_type == "OCRSegmentCategorization" for task in tasks: try: @@ -4123,6 +4130,7 @@ def download(self, request, pk=None, *args, **kwargs): curr_task, is_OCRSegmentCategorization, is_OCRSegmentCategorizationEditing, + is_OCRTextlineSegmentation, ) except Exception as e: continue diff --git a/backend/tasks/views.py b/backend/tasks/views.py index 200fc1250..320405aa4 100644 --- a/backend/tasks/views.py +++ b/backend/tasks/views.py @@ -1754,13 +1754,17 @@ def partial_update(self, request, pk=None): == "AcousticNormalisedTranscriptionEditing" else False ) - is_ocr_sc_or_sce = ( + is_ocr_sc_or_sce_or_ts = ( True if annotation_obj.task.project_id.project_type - in ["OCRSegmentCategorization", "OCRSegmentCategorizationEditing"] + in [ + "OCRSegmentCategorization", + "OCRSegmentCategorizationEditing", + "OCRTextlineSegmentation", + ] else False ) - if is_ocr_sc_or_sce and ( + if is_ocr_sc_or_sce_or_ts_or_ts and ( "language" in request.data or "ocr_domain" in request.data ): language = request.data.get("languages", [])