Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upload different format .eaf files #1399 #1444

Merged
merged 1 commit into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions signbank/video/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,9 @@ def get_annotated_video_file_path(instance, filename, version=0):
dataset = instance.annotatedsentence.get_dataset().acronym
dataset_dir = os.path.join(dataset, str(instance.annotatedsentence.id))
if version > 0:
filename = str(instance.examplesentence.id) + ext + '.bak' + str(instance.id)
filename = str(instance.annotatedsentence.id) + ext + '.bak' + str(instance.id)
else:
filename = str(instance.examplesentence.id) + ext
filename = str(instance.annotatedsentence.id) + ext

path = os.path.join(video_dir, dataset_dir, filename)
if hasattr(settings, 'ESCAPE_UPLOADED_VIDEO_FILE_PATH') and settings.ESCAPE_UPLOADED_VIDEO_FILE_PATH:
Expand Down Expand Up @@ -537,6 +537,9 @@ def convert_milliseconds_to_time_format(self, ms):
def select_annotations(self, eaf, tier_name, start_ms, end_ms):
""" Select annotations that are within the selected range """

if tier_name not in eaf.tiers:
return

keys_to_remove = []
for key in eaf.tiers[tier_name][0]:
annotation_list = list(eaf.tiers[tier_name][0][key])
Expand Down Expand Up @@ -597,6 +600,8 @@ def cut_video_and_eaf(self, start_ms, end_ms):
self.select_annotations(eaf, 'Sentences', start_ms, end_ms)
self.select_annotations(eaf, 'Glosses R', start_ms, end_ms)
self.select_annotations(eaf, 'Glosses L', start_ms, end_ms)
self.select_annotations(eaf, 'Nederlands', start_ms, end_ms)
self.select_annotations(eaf, 'Signbank ID glossen', start_ms, end_ms)
# shift the timeslots to start at 0
for key in eaf.timeslots:
eaf.timeslots[key] -= start_ms
Expand Down
55 changes: 35 additions & 20 deletions signbank/video/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,35 +138,50 @@ def get_glosses_from_eaf(eaf, dataset_acronym):
glosses, labels_not_found, sentences = [], [], []
sentence_dict = {}

# Add glosses from the right hand
for annotation in eaf.tiers['Glosses R'][0].values():
gloss_label = annotation[2]
if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
start = int(eaf.timeslots[annotation[0]])
end = int(eaf.timeslots[annotation[1]])
glosses.append([gloss_label, start, end])
else:
labels_not_found.append(gloss_label)

# Add glosses from the left hand, if they don't overlap with the right hand
for annotation in find_non_overlapping_annotated_glosses(eaf.timeslots, eaf.tiers['Glosses R'][0].values(), eaf.tiers['Glosses L'][0].values()):
gloss_label = annotation[2]
if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
start = int(eaf.timeslots[annotation[0]])
end = int(eaf.timeslots[annotation[1]])
glosses.append([gloss_label, start, end])
else:
labels_not_found.append(gloss_label)
# check whether to use 'Signbank ID glossen' or 'Glosses R' and 'Glosses L' tiers
if 'Signbank ID glossen' in eaf.tiers:
# Add glosses from this one tier
for annotation in eaf.tiers['Signbank ID glossen'][0].values():
gloss_label = annotation[2]
if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
start = int(eaf.timeslots[annotation[0]])
end = int(eaf.timeslots[annotation[1]])
glosses.append([gloss_label, start, end])
else:
labels_not_found.append(gloss_label)
else:
# Add glosses from the right hand
for annotation in eaf.tiers['Glosses R'][0].values():
gloss_label = annotation[2]
if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
start = int(eaf.timeslots[annotation[0]])
end = int(eaf.timeslots[annotation[1]])
glosses.append([gloss_label, start, end])
else:
labels_not_found.append(gloss_label)

# Add glosses from the left hand, if they don't overlap with the right hand
for annotation in find_non_overlapping_annotated_glosses(eaf.timeslots, eaf.tiers['Glosses R'][0].values(), eaf.tiers['Glosses L'][0].values()):
gloss_label = annotation[2]
if AnnotationIdglossTranslation.objects.filter(gloss__lemma__dataset__acronym=dataset_acronym, text__exact=gloss_label).exists():
start = int(eaf.timeslots[annotation[0]])
end = int(eaf.timeslots[annotation[1]])
glosses.append([gloss_label, start, end])
else:
labels_not_found.append(gloss_label)

# Sort the list of glosses by the "start" value
glosses = sorted(glosses, key=lambda x: x[1])

if 'Sentences' in eaf.tiers:
for annotation in eaf.tiers['Sentences'][0].values():
sentences.append(annotation[2])
elif 'Nederlands' in eaf.tiers:
for annotation in eaf.tiers['Nederlands'][0].values():
sentences.append(annotation[2])

dataset_language = Dataset.objects.get(acronym=dataset_acronym).default_language.language_code_3char
for sentence_i, sentence in enumerate(sentences):
for sentence in sentences:
sentence_dict[dataset_language] = sentence

return glosses, labels_not_found, sentence_dict
Expand Down