From 0175df101edc19772ffdcc2ae492c1b3d2f58647 Mon Sep 17 00:00:00 2001 From: Loann Peurey Date: Tue, 19 Mar 2024 16:36:15 +0100 Subject: [PATCH] add new eaf tests --- tests/test_eaf.py | 85 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 7 deletions(-) diff --git a/tests/test_eaf.py b/tests/test_eaf.py index d3e0f492..a82e60a0 100644 --- a/tests/test_eaf.py +++ b/tests/test_eaf.py @@ -9,7 +9,7 @@ from ChildProject.projects import ChildProject from ChildProject.annotations import AnnotationManager from ChildProject.pipelines.samplers import PeriodicSampler -from ChildProject.pipelines.eafbuilder import EafBuilderPipeline +from ChildProject.pipelines.eafbuilder import EafBuilderPipeline, create_eaf IMP_FROM = 'vtc' PATH = os.path.join('output', 'eaf') @@ -29,8 +29,79 @@ def project(request): yield project -@pytest.mark.parametrize("segments,type,template,context_onset,context_offset,path,import_speech_from", - []) +IMP = pd.DataFrame({'segment_onset': [10], 'segment_offset': [15], 'speaker_type': ['FEM']}) +TEMP = os.path.join('ChildProject', 'templates', 'basic.etf') +@pytest.mark.parametrize(("etf_path,output_dir,recording_filename,timestamps_list,eaf_type,context_on,context_off,speech_segments,imported_set,error"), +[[5, PATH, 'sound.wav', [], 'periodic', 0, 0, IMP, 'vtc', FileNotFoundError], +['README.md', PATH, 'sound.wav', [], 'periodic', 0, 0, IMP, 'vtc', Exception], +[TEMP, 6, 'sound.wav', [], 'periodic', 0, 0, IMP, 'vtc', TypeError], +[TEMP, PATH, 8, [], 'periodic', 0, 0, IMP, 'vtc', TypeError], +[TEMP, PATH, 'sound.wav', 5, 'periodic', 0, 0, IMP, 'vtc', TypeError], +[TEMP, PATH, 'sound.wav', [(5, 'abc')], 'periodic', 0, 0, IMP, 'vtc', ValueError], +[TEMP, PATH, 'sound.wav', [(5, 10)], 'periodic', 'xp', 0, IMP, 'vtc', TypeError], +[TEMP, PATH, 'sound.wav', [(5, 10)], 'periodic', 0, 0, 'x', 'vtc', AttributeError], +[TEMP, PATH, 'sound.wav', [(5, 10)], 'periodic', 0, 0, IMP.drop(columns=['segment_offset']), 'vtc', KeyError], +[TEMP, PATH, 'sound.wav', [(5, 10)], 'periodic', 0, 0, IMP, 5, AttributeError], + ]) +def test_create_eaf_inputs(project, etf_path, output_dir, recording_filename, timestamps_list, eaf_type, context_on, + context_off, speech_segments, imported_set, error): + with pytest.raises(error): + create_eaf(etf_path, 'sound', output_dir, recording_filename, timestamps_list, eaf_type, context_on, context_off, + speech_segments, imported_set, 'vtc_rttm') + +def test_create_eaf(project): + + timestamps_list = [(10, 20), (30, 40), (50, 60)] + + create_eaf(TEMP, 'sound', os.path.join(PATH, 'extra/eaf'), 'sound.wav', timestamps_list, 'periodic', 10, 10, + IMP, 'vtc', 'vtc_rttm') + + eaf = Eaf(os.path.join(PATH, 'extra/eaf/sound.eaf')) + + code = eaf.tiers['code_periodic'][0] + segments = [] + + for pid in code: + (start_ts, end_ts, value, svg_ref) = code[pid] + (start_t, end_t) = (eaf.timeslots[start_ts], eaf.timeslots[end_ts]) + segments.append({'segment_onset': int(start_t), 'segment_offset': int(end_t)}) + + timestamps = [] + for pid in timestamps_list: + timestamps.append({'segment_onset': pid[0], 'segment_offset': pid[1]}) + + segments = pd.DataFrame(segments) + timestamps = pd.DataFrame(timestamps) + + pd.testing.assert_frame_equal( + segments[['segment_onset', 'segment_offset']].sort_values(['segment_onset', 'segment_offset']).reset_index( + drop=True), + timestamps[['segment_onset', 'segment_offset']].sort_values( + ['segment_onset', 'segment_offset']).reset_index(drop=True), + check_dtype=False, + ) + + segments = [] + vtc_speech = eaf.tiers['VTC-FEM'][0] + for pid in vtc_speech: + (start_ts, end_ts, value, svg_ref) = vtc_speech[pid] + (start_t, end_t) = (eaf.timeslots[start_ts], eaf.timeslots[end_ts]) + segments.append({'segment_onset': int(start_t), 'segment_offset': int(end_t)}) + + segments = pd.DataFrame(segments) + + pd.testing.assert_frame_equal( + segments[['segment_onset', 'segment_offset']].sort_values(['segment_onset', 'segment_offset']).reset_index( + drop=True), + IMP[['segment_onset', 'segment_offset']].sort_values(['segment_onset', 'segment_offset']).reset_index( + drop=True) + ) + + assert eaf.media_descriptors[0]['MEDIA_URL'] == 'sound.wav' + + +# @pytest.mark.parametrize("segments,type,template,context_onset,context_offset,path,import_speech_from", +# []) def test_periodic(project): """ os.makedirs('output/eaf', exist_ok = True) @@ -63,7 +134,7 @@ def test_periodic(project): sampler = PeriodicSampler(project, 500, 500, 250, recordings=['sound.wav']) sampler.sample() - sampler.segments.to_csv(PATH / 'segments.csv') + sampler.segments.to_csv(os.path.join(PATH, 'segments.csv')) ranges = sampler.segments.rename( columns={ @@ -77,8 +148,8 @@ def test_periodic(project): eaf_builder = EafBuilderPipeline() eaf_builder.run( - destination=PATH / 'extra' / 'eaf', - segments=PATH / 'segments.csv', + destination=os.path.join(PATH, 'extra', 'eaf'), + segments=os.path.join(PATH, 'segments.csv'), eaf_type='periodic', template='basic', context_onset=250, @@ -87,7 +158,7 @@ def test_periodic(project): import_speech_from='vtc', ) - eaf = Eaf(PATH / 'extra/eaf/sound_periodic_basic.eaf') + eaf = Eaf(os.path.join(PATH, 'extra/eaf/sound_periodic_basic.eaf')) code = eaf.tiers['code_periodic'][0] segments = []