From 0175df101edc19772ffdcc2ae492c1b3d2f58647 Mon Sep 17 00:00:00 2001
From: Loann Peurey <loannpeurey@gmail.com>
Date: Tue, 19 Mar 2024 16:36:15 +0100
Subject: [PATCH] add new eaf tests

---
 tests/test_eaf.py | 85 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 78 insertions(+), 7 deletions(-)

diff --git a/tests/test_eaf.py b/tests/test_eaf.py
index d3e0f492..a82e60a0 100644
--- a/tests/test_eaf.py
+++ b/tests/test_eaf.py
@@ -9,7 +9,7 @@
 from ChildProject.projects import ChildProject
 from ChildProject.annotations import AnnotationManager
 from ChildProject.pipelines.samplers import PeriodicSampler
-from ChildProject.pipelines.eafbuilder import EafBuilderPipeline
+from ChildProject.pipelines.eafbuilder import EafBuilderPipeline, create_eaf
 
 IMP_FROM = 'vtc'
 PATH = os.path.join('output', 'eaf')
@@ -29,8 +29,79 @@ def project(request):
     yield project
 
 
-@pytest.mark.parametrize("segments,type,template,context_onset,context_offset,path,import_speech_from",
-                         [])
+IMP = pd.DataFrame({'segment_onset': [10], 'segment_offset': [15], 'speaker_type': ['FEM']})
+TEMP = os.path.join('ChildProject', 'templates', 'basic.etf')
+@pytest.mark.parametrize(("etf_path,output_dir,recording_filename,timestamps_list,eaf_type,context_on,context_off,speech_segments,imported_set,error"),
+[[5, PATH, 'sound.wav', [], 'periodic', 0, 0, IMP, 'vtc', FileNotFoundError],
+['README.md', PATH, 'sound.wav', [], 'periodic', 0, 0, IMP, 'vtc', Exception],
+[TEMP, 6, 'sound.wav', [], 'periodic', 0, 0, IMP, 'vtc', TypeError],
+[TEMP, PATH, 8, [], 'periodic', 0, 0, IMP, 'vtc', TypeError],
+[TEMP, PATH, 'sound.wav', 5, 'periodic', 0, 0, IMP, 'vtc', TypeError],
+[TEMP, PATH, 'sound.wav', [(5, 'abc')], 'periodic', 0, 0, IMP, 'vtc', ValueError],
+[TEMP, PATH, 'sound.wav', [(5, 10)], 'periodic', 'xp', 0, IMP, 'vtc', TypeError],
+[TEMP, PATH, 'sound.wav', [(5, 10)], 'periodic', 0, 0, 'x', 'vtc', AttributeError],
+[TEMP, PATH, 'sound.wav', [(5, 10)], 'periodic', 0, 0, IMP.drop(columns=['segment_offset']), 'vtc', KeyError],
+[TEMP, PATH, 'sound.wav', [(5, 10)], 'periodic', 0, 0, IMP, 5, AttributeError],
+    ])
+def test_create_eaf_inputs(project, etf_path, output_dir, recording_filename, timestamps_list, eaf_type, context_on,
+                           context_off, speech_segments, imported_set, error):
+    with pytest.raises(error):
+        create_eaf(etf_path, 'sound', output_dir, recording_filename, timestamps_list, eaf_type, context_on, context_off,
+                   speech_segments, imported_set, 'vtc_rttm')
+
+def test_create_eaf(project):
+
+    timestamps_list = [(10, 20), (30, 40), (50, 60)]
+
+    create_eaf(TEMP, 'sound', os.path.join(PATH, 'extra/eaf'), 'sound.wav', timestamps_list, 'periodic', 10, 10,
+               IMP, 'vtc', 'vtc_rttm')
+
+    eaf = Eaf(os.path.join(PATH, 'extra/eaf/sound.eaf'))
+
+    code = eaf.tiers['code_periodic'][0]
+    segments = []
+
+    for pid in code:
+        (start_ts, end_ts, value, svg_ref) = code[pid]
+        (start_t, end_t) = (eaf.timeslots[start_ts], eaf.timeslots[end_ts])
+        segments.append({'segment_onset': int(start_t), 'segment_offset': int(end_t)})
+
+    timestamps = []
+    for pid in timestamps_list:
+        timestamps.append({'segment_onset': pid[0], 'segment_offset': pid[1]})
+
+    segments = pd.DataFrame(segments)
+    timestamps = pd.DataFrame(timestamps)
+
+    pd.testing.assert_frame_equal(
+        segments[['segment_onset', 'segment_offset']].sort_values(['segment_onset', 'segment_offset']).reset_index(
+            drop=True),
+        timestamps[['segment_onset', 'segment_offset']].sort_values(
+            ['segment_onset', 'segment_offset']).reset_index(drop=True),
+        check_dtype=False,
+    )
+
+    segments = []
+    vtc_speech = eaf.tiers['VTC-FEM'][0]
+    for pid in vtc_speech:
+        (start_ts, end_ts, value, svg_ref) = vtc_speech[pid]
+        (start_t, end_t) = (eaf.timeslots[start_ts], eaf.timeslots[end_ts])
+        segments.append({'segment_onset': int(start_t), 'segment_offset': int(end_t)})
+
+    segments = pd.DataFrame(segments)
+
+    pd.testing.assert_frame_equal(
+        segments[['segment_onset', 'segment_offset']].sort_values(['segment_onset', 'segment_offset']).reset_index(
+            drop=True),
+        IMP[['segment_onset', 'segment_offset']].sort_values(['segment_onset', 'segment_offset']).reset_index(
+            drop=True)
+    )
+
+    assert eaf.media_descriptors[0]['MEDIA_URL'] == 'sound.wav'
+
+
+# @pytest.mark.parametrize("segments,type,template,context_onset,context_offset,path,import_speech_from",
+#                          [])
 def test_periodic(project):
     """
     os.makedirs('output/eaf', exist_ok = True)
@@ -63,7 +134,7 @@ def test_periodic(project):
         
     sampler = PeriodicSampler(project, 500, 500, 250, recordings=['sound.wav'])
     sampler.sample()
-    sampler.segments.to_csv(PATH / 'segments.csv')
+    sampler.segments.to_csv(os.path.join(PATH, 'segments.csv'))
     
     ranges = sampler.segments.rename(
                     columns={
@@ -77,8 +148,8 @@ def test_periodic(project):
 
     eaf_builder = EafBuilderPipeline()
     eaf_builder.run(
-        destination=PATH / 'extra' / 'eaf',
-        segments=PATH / 'segments.csv',
+        destination=os.path.join(PATH, 'extra', 'eaf'),
+        segments=os.path.join(PATH, 'segments.csv'),
         eaf_type='periodic',
         template='basic',
         context_onset=250,
@@ -87,7 +158,7 @@ def test_periodic(project):
         import_speech_from='vtc',
     )
 
-    eaf = Eaf(PATH / 'extra/eaf/sound_periodic_basic.eaf')
+    eaf = Eaf(os.path.join(PATH, 'extra/eaf/sound_periodic_basic.eaf'))
 
     code = eaf.tiers['code_periodic'][0]
     segments = []