Skip to content

Commit

Permalink
Merge pull request #1444 from Signbank/annotated_source_change_1399
Browse files Browse the repository at this point in the history
Upload different format .eaf files #1399
  • Loading branch information
susanodd authored Dec 18, 2024
2 parents 2e0db10 + 8dd2306 commit e9ed9d0
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 22 deletions.
9 changes: 7 additions & 2 deletions signbank/video/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,9 @@ def get_annotated_video_file_path(instance, filename, version=0):
dataset = instance.annotatedsentence.get_dataset().acronym
dataset_dir = os.path.join(dataset, str(instance.annotatedsentence.id))
if version > 0:
filename = str(instance.examplesentence.id) + ext + '.bak' + str(instance.id)
filename = str(instance.annotatedsentence.id) + ext + '.bak' + str(instance.id)
else:
filename = str(instance.examplesentence.id) + ext
filename = str(instance.annotatedsentence.id) + ext

path = os.path.join(video_dir, dataset_dir, filename)
if hasattr(settings, 'ESCAPE_UPLOADED_VIDEO_FILE_PATH') and settings.ESCAPE_UPLOADED_VIDEO_FILE_PATH:
Expand Down Expand Up @@ -530,6 +530,9 @@ def convert_milliseconds_to_time_format(self, ms):
def select_annotations(self, eaf, tier_name, start_ms, end_ms):
""" Select annotations that are within the selected range """

if tier_name not in eaf.tiers:
return

keys_to_remove = []
for key in eaf.tiers[tier_name][0]:
annotation_list = list(eaf.tiers[tier_name][0][key])
Expand Down Expand Up @@ -590,6 +593,8 @@ def cut_video_and_eaf(self, start_ms, end_ms):
self.select_annotations(eaf, 'Sentences', start_ms, end_ms)
self.select_annotations(eaf, 'Glosses R', start_ms, end_ms)
self.select_annotations(eaf, 'Glosses L', start_ms, end_ms)
self.select_annotations(eaf, 'Nederlands', start_ms, end_ms)
self.select_annotations(eaf, 'Signbank ID glossen', start_ms, end_ms)
# shift the timeslots to start at 0
for key in eaf.timeslots:
eaf.timeslots[key] -= start_ms
Expand Down
55 changes: 35 additions & 20 deletions signbank/video/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,35 +138,50 @@ def get_glosses_from_eaf(eaf, dataset_acronym):
glosses, labels_not_found, sentences = [], [], []
sentence_dict = {}

# Add glosses from the right hand
for annotation in eaf.tiers['Glosses R'][0].values():
gloss_label = annotation[2]
if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
start = int(eaf.timeslots[annotation[0]])
end = int(eaf.timeslots[annotation[1]])
glosses.append([gloss_label, start, end])
else:
labels_not_found.append(gloss_label)

# Add glosses from the left hand, if they don't overlap with the right hand
for annotation in find_non_overlapping_annotated_glosses(eaf.timeslots, eaf.tiers['Glosses R'][0].values(), eaf.tiers['Glosses L'][0].values()):
gloss_label = annotation[2]
if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
start = int(eaf.timeslots[annotation[0]])
end = int(eaf.timeslots[annotation[1]])
glosses.append([gloss_label, start, end])
else:
labels_not_found.append(gloss_label)
# check whether to use 'Signbank ID glossen' or 'Glosses R' and 'Glosses L' tiers
if 'Signbank ID glossen' in eaf.tiers:
# Add glosses from this one tier
for annotation in eaf.tiers['Signbank ID glossen'][0].values():
gloss_label = annotation[2]
if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
start = int(eaf.timeslots[annotation[0]])
end = int(eaf.timeslots[annotation[1]])
glosses.append([gloss_label, start, end])
else:
labels_not_found.append(gloss_label)
else:
# Add glosses from the right hand
for annotation in eaf.tiers['Glosses R'][0].values():
gloss_label = annotation[2]
if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
start = int(eaf.timeslots[annotation[0]])
end = int(eaf.timeslots[annotation[1]])
glosses.append([gloss_label, start, end])
else:
labels_not_found.append(gloss_label)

# Add glosses from the left hand, if they don't overlap with the right hand
for annotation in find_non_overlapping_annotated_glosses(eaf.timeslots, eaf.tiers['Glosses R'][0].values(), eaf.tiers['Glosses L'][0].values()):
gloss_label = annotation[2]
if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
start = int(eaf.timeslots[annotation[0]])
end = int(eaf.timeslots[annotation[1]])
glosses.append([gloss_label, start, end])
else:
labels_not_found.append(gloss_label)

# Sort the list of glosses by the "start" value
glosses = sorted(glosses, key=lambda x: x[1])

if 'Sentences' in eaf.tiers:
for annotation in eaf.tiers['Sentences'][0].values():
sentences.append(annotation[2])
elif 'Nederlands' in eaf.tiers:
for annotation in eaf.tiers['Nederlands'][0].values():
sentences.append(annotation[2])

dataset_language = Dataset.objects.get(acronym=dataset_acronym).default_language.language_code_3char
for sentence_i, sentence in enumerate(sentences):
for sentence in sentences:
sentence_dict[dataset_language] = sentence

return glosses, labels_not_found, sentence_dict
Expand Down

0 comments on commit e9ed9d0

Please sign in to comment.