Merge pull request #1444 from Signbank/annotated_source_change_1399

Upload different format .eaf files #1399
Signbank · Dec 18, 2024 · e9ed9d0 · e9ed9d0
2 parents 2e0db10 + 8dd2306
commit e9ed9d0
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 22 deletions.
diff --git a/signbank/video/models.py b/signbank/video/models.py
@@ -263,9 +263,9 @@ def get_annotated_video_file_path(instance, filename, version=0):
     dataset = instance.annotatedsentence.get_dataset().acronym
     dataset_dir = os.path.join(dataset, str(instance.annotatedsentence.id))
     if version > 0:
-        filename = str(instance.examplesentence.id) + ext + '.bak' + str(instance.id)
+        filename = str(instance.annotatedsentence.id) + ext + '.bak' + str(instance.id)
     else:
-        filename = str(instance.examplesentence.id) + ext
+        filename = str(instance.annotatedsentence.id) + ext
 
     path = os.path.join(video_dir, dataset_dir, filename)
     if hasattr(settings, 'ESCAPE_UPLOADED_VIDEO_FILE_PATH') and settings.ESCAPE_UPLOADED_VIDEO_FILE_PATH:
@@ -530,6 +530,9 @@ def convert_milliseconds_to_time_format(self, ms):
     def select_annotations(self, eaf, tier_name, start_ms, end_ms):
         """ Select annotations that are within the selected range """
 
+        if tier_name not in eaf.tiers:
+            return
+
         keys_to_remove = []
         for key in eaf.tiers[tier_name][0]:
             annotation_list = list(eaf.tiers[tier_name][0][key])
@@ -590,6 +593,8 @@ def cut_video_and_eaf(self, start_ms, end_ms):
         self.select_annotations(eaf, 'Sentences', start_ms, end_ms)
         self.select_annotations(eaf, 'Glosses R', start_ms, end_ms)
         self.select_annotations(eaf, 'Glosses L', start_ms, end_ms)
+        self.select_annotations(eaf, 'Nederlands', start_ms, end_ms)
+        self.select_annotations(eaf, 'Signbank ID glossen', start_ms, end_ms)
         # shift the timeslots to start at 0
         for key in eaf.timeslots:
             eaf.timeslots[key] -= start_ms

diff --git a/signbank/video/views.py b/signbank/video/views.py
@@ -138,35 +138,50 @@ def get_glosses_from_eaf(eaf, dataset_acronym):
     glosses, labels_not_found, sentences = [], [], []
     sentence_dict = {}
 
-    # Add glosses from the right hand
-    for annotation in eaf.tiers['Glosses R'][0].values():
-        gloss_label = annotation[2]
-        if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
-            start = int(eaf.timeslots[annotation[0]])
-            end = int(eaf.timeslots[annotation[1]])
-            glosses.append([gloss_label, start, end])
-        else:
-            labels_not_found.append(gloss_label)
-
-    # Add glosses from the left hand, if they don't overlap with the right hand
-    for annotation in find_non_overlapping_annotated_glosses(eaf.timeslots, eaf.tiers['Glosses R'][0].values(), eaf.tiers['Glosses L'][0].values()):
-        gloss_label = annotation[2]
-        if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
-            start = int(eaf.timeslots[annotation[0]])
-            end = int(eaf.timeslots[annotation[1]])
-            glosses.append([gloss_label, start, end])
-        else:
-            labels_not_found.append(gloss_label)
+    # check whether to use 'Signbank ID glossen' or 'Glosses R' and 'Glosses L' tiers
+    if 'Signbank ID glossen' in eaf.tiers:
+        # Add glosses from this one tier
+        for annotation in eaf.tiers['Signbank ID glossen'][0].values():
+            gloss_label = annotation[2]
+            if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
+                start = int(eaf.timeslots[annotation[0]])
+                end = int(eaf.timeslots[annotation[1]])
+                glosses.append([gloss_label, start, end])
+            else:
+                labels_not_found.append(gloss_label)
+    else:
+        # Add glosses from the right hand
+        for annotation in eaf.tiers['Glosses R'][0].values():
+            gloss_label = annotation[2]
+            if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
+                start = int(eaf.timeslots[annotation[0]])
+                end = int(eaf.timeslots[annotation[1]])
+                glosses.append([gloss_label, start, end])
+            else:
+                labels_not_found.append(gloss_label)
+
+        # Add glosses from the left hand, if they don't overlap with the right hand
+        for annotation in find_non_overlapping_annotated_glosses(eaf.timeslots, eaf.tiers['Glosses R'][0].values(), eaf.tiers['Glosses L'][0].values()):
+            gloss_label = annotation[2]
+            if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
+                start = int(eaf.timeslots[annotation[0]])
+                end = int(eaf.timeslots[annotation[1]])
+                glosses.append([gloss_label, start, end])
+            else:
+                labels_not_found.append(gloss_label)
 
     # Sort the list of glosses by the "start" value
     glosses = sorted(glosses, key=lambda x: x[1])
 
     if 'Sentences' in eaf.tiers:
         for annotation in eaf.tiers['Sentences'][0].values():
             sentences.append(annotation[2])
+    elif 'Nederlands' in eaf.tiers:
+        for annotation in eaf.tiers['Nederlands'][0].values():
+            sentences.append(annotation[2])
 
     dataset_language = Dataset.objects.get(acronym=dataset_acronym).default_language.language_code_3char
-    for sentence_i, sentence in enumerate(sentences):
+    for sentence in sentences:
         sentence_dict[dataset_language] = sentence
 
     return glosses, labels_not_found, sentence_dict