diff --git a/tests/data/conversations_parameters.yml b/tests/data/conversations_parameters.yml new file mode 100644 index 00000000..b41a89df --- /dev/null +++ b/tests/data/conversations_parameters.yml @@ -0,0 +1,56 @@ +package_version: 0.2.0 +date: '20240704_184950' +parameters: + path: output/conversations + destination: output/conversations/extra/std_conv.csv + pipeline: standard + setname: custom_conv + recordings: null + from_time: null + to_time: null + rec_cols: null + child_cols: null + threads: 1 + dataset_hash: 710ebfe1f4b118f8a48f69d896c6675bb72ef6ba + features_list: + - callable: who_initiated + args: {} + name: initiator + - callable: who_finished + args: {} + name: finisher + - callable: voc_total_dur + args: {} + name: total_duration_of_vocalisations + - callable: voc_speaker_count + args: + speaker: CHI + name: CHI_voc_count + - callable: voc_speaker_count + args: + speaker: FEM + name: FEM_voc_count + - callable: voc_speaker_count + args: + speaker: MAL + name: MAL_voc_count + - callable: voc_speaker_count + args: + speaker: OCH + name: OCH_voc_count + - callable: voc_speaker_dur + args: + speaker: CHI + name: CHI_voc_dur + - callable: voc_speaker_dur + args: + speaker: FEM + name: FEM_voc_dur + - callable: voc_speaker_dur + args: + speaker: MAL + name: MAL_voc_dur + - callable: voc_speaker_dur + args: + speaker: OCH + name: OCH_voc_dur diff --git a/tests/data/list_features_conv.csv b/tests/data/list_features_conv.csv new file mode 100644 index 00000000..1802ad41 --- /dev/null +++ b/tests/data/list_features_conv.csv @@ -0,0 +1,6 @@ +callable,name,speaker +who_initiated,initiator, +who_finished,finisher, +participants,participants, +voc_dur_contribution,chi_dur_contrib,CHI +voc_dur_contribution,fem_dur_contrib,FEM diff --git a/tests/test_conversations.py b/tests/test_conversations.py new file mode 100644 index 00000000..01791ce5 --- /dev/null +++ b/tests/test_conversations.py @@ -0,0 +1,206 @@ +from functools import partial +import numpy as np +import os +import pandas as pd +import pytest +import shutil +from pathlib import Path + +from ChildProject.projects import ChildProject +from ChildProject.annotations import AnnotationManager +from ChildProject.pipelines.conversations import (Conversations, StandardConversations, CustomConversations, + ConversationsSpecificationPipeline) + +from ChildProject.pipelines.conversationFunctions import conversationFunction, RESERVED + +PATH = Path('output/conversations') + + +def fake_vocs(data, filename): + return data + + +@pytest.fixture(scope="function") +def project(request): + if os.path.exists(PATH): + # shutil.copytree(src="examples/valid_raw_data", dst="output/annotations") + shutil.rmtree(PATH) + shutil.copytree(src="examples/valid_raw_data", dst=PATH) + + project = ChildProject(PATH) + project.read() + + yield project + + +@pytest.fixture(scope="function") +def am(request, project): + am = AnnotationManager(project) + project.recordings['duration'] = [100000000, 2000000] #force longer durations to allow for imports + yield am + +@pytest.fixture(scope="function") +def segments(request): + segments = pd.read_csv("tests/data/csv.csv") + segments.loc[2:4, 'conv_count'] = 1 + segments.loc[8:9, 'conv_count'] = 2 + segments.loc[10:11, 'conv_count'] = 3 + + yield segments + + +def test_failures(project): + features = pd.DataFrame([["who_initiated", "initiator", pd.NA], + ["who_finished", "finisher", pd.NA], + ["voc_speaker_count", "CHI_voc_count", 'CHI'], + ], columns=['callable', 'name', 'speaker']) + + exception_caught = False + try: + standard = StandardConversations(project, setname="unknown") + except ValueError as e: + exception_caught = True + + assert ( + exception_caught is True + ), "StandardConversations failed to throw an exception despite an invalid set being provided" + + exception_caught = False + try: + custom = CustomConversations(project, setname="unknown", features='tests/data/list_features_conv.csv') + except ValueError as e: + exception_caught = True + + assert ( + exception_caught is True + ), "CustomConversations failed to throw an exception despite an invalid set being provided" + + +@pytest.mark.parametrize("error,col_change,new_value", + [(ValueError, 'name', 'finisher'), + (ValueError, 'callable', 'made_up_function'), + (TypeError, 'speaker', 'FEM'), + (None, None, None), + ]) +def test_conversations(project, am, segments, error, col_change, new_value): + + am.import_annotations( + pd.DataFrame( + [{ "set": "custom_conv", + "raw_filename": "file.its", + "time_seek": 0, + "recording_filename": "sound.wav", + "range_onset": 0, + "range_offset": 30000000, + "format": "csv", + }] + ), + import_function=partial(fake_vocs, segments), + ) + + features = pd.DataFrame([["who_initiated", "initiator", pd.NA], + ["who_finished", "finisher", pd.NA], + ["voc_speaker_count", "CHI_voc_count", 'CHI'], + ], columns=['callable', 'name', 'speaker']) + + if error: + with pytest.raises(error): + features.iloc[0, features.columns.get_loc(col_change)] = new_value + cm = Conversations(project, 'custom_conv', features) + cm.extract() + else: + cm = Conversations(project, 'custom_conv', features) + results = cm.extract() + + cm.conversations.to_csv("tests/truth/python_conversations.csv",index=False) + truth = pd.read_csv("tests/truth/python_conversations.csv") + + pd.testing.assert_frame_equal(truth, results) + + pd.testing.assert_frame_equal(results, truth, check_like=True) + +#TODO adapt +def test_standard(project, am, segments): + am.import_annotations( + pd.DataFrame( + [{"set": "custom_conv", + "raw_filename": "file.its", + "time_seek": 0, + "recording_filename": "sound.wav", + "range_onset": 0, + "range_offset": 30000000, + "format": "csv", + }] + ), + import_function=partial(fake_vocs, segments), + ) + + std = StandardConversations(project, setname='custom_conv',rec_cols='date_iso', child_cols='experiment,child_dob') + std.extract() + + # std.conversations.to_csv("tests/truth/standard_conversations.csv", index=False) + truth = pd.read_csv("tests/truth/standard_conversations.csv") + + pd.testing.assert_frame_equal(std.conversations, truth, check_like=True) + + +#TODO adapt +def test_custom(project, am, segments): + am.import_annotations( + pd.DataFrame( + [{"set": "custom_conv", + "raw_filename": "file.its", + "time_seek": 0, + "recording_filename": "sound.wav", + "range_onset": 0, + "range_offset": 30000000, + "format": "csv", + }] + ), + import_function=partial(fake_vocs, segments), + ) + + parameters = "tests/data/list_features_conv.csv" + + cm = CustomConversations(project, 'custom_conv', parameters) + cm.extract() + + # cm.conversations.to_csv("tests/truth/custom_conversations.csv", index=False) + truth = pd.read_csv("tests/truth/custom_conversations.csv") + + pd.testing.assert_frame_equal(cm.conversations, truth, check_like=True) + + +#TODO adapt +def test_specs(project, am, segments): + am.import_annotations( + pd.DataFrame( + [{"set": "custom_conv", + "raw_filename": "file.its", + "time_seek": 0, + "recording_filename": "sound.wav", + "range_onset": 0, + "range_offset": 30000000, + "format": "csv", + }] + ), + import_function=partial(fake_vocs, segments), + ) + + csp = ConversationsSpecificationPipeline() + + parameters = "tests/data/conversations_parameters.yml" + csp.run(parameters) + + output = pd.read_csv(csp.destination) + output.to_csv("tests/truth/specs_conversations.csv", index=False) + truth = pd.read_csv("tests/truth/specs_conversations.csv") + + pd.testing.assert_frame_equal(output, truth, check_like=True) + + new_params = csp.parameters_path + csp.run(new_params) + + output = pd.read_csv(csp.destination) + + pd.testing.assert_frame_equal(output, truth, check_like=True) \ No newline at end of file diff --git a/tests/test_metrics.py b/tests/test_metrics.py index a7fed408..275fe5ff 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -36,7 +36,7 @@ def am(request, project): project.recordings['duration'] = [100000000, 2000000] #force longer durations to allow for imports yield am -#decorating functions with reserved kwargs should fail +# decorating functions with reserved kwargs should fail @pytest.mark.parametrize("error", [ValueError, ]) def test_decorator(error): for reserved in RESERVED: diff --git a/tests/truth/custom_conversations.csv b/tests/truth/custom_conversations.csv new file mode 100644 index 00000000..72e34c70 --- /dev/null +++ b/tests/truth/custom_conversations.csv @@ -0,0 +1,4 @@ +conversation_onset,conversation_offset,voc_count,conv_count,interval_last_conv,recording_filename,initiator,finisher,participants,chi_dur_contrib,fem_dur_contrib +1984136,1988951,3,1.0,,sound.wav,CHI,FEM,CHI/OCH/FEM,0.1286786786786787,0.5193693693693694 +28284010,28287945,2,2.0,26295059.0,sound.wav,OCH,OCH,OCH/MAL,, +28288492,28294692,2,3.0,2917.0,sound.wav,OCH,MAL,OCH/MAL,, diff --git a/tests/truth/python_conversations.csv b/tests/truth/python_conversations.csv new file mode 100644 index 00000000..3f1b46cc --- /dev/null +++ b/tests/truth/python_conversations.csv @@ -0,0 +1,4 @@ +conversation_onset,conversation_offset,voc_count,conv_count,interval_last_conv,recording_filename,initiator,finisher,CHI_voc_count +1984136,1988951,3,1.0,,sound.wav,CHI,FEM,1 +28284010,28287945,2,2.0,26295059.0,sound.wav,OCH,OCH,0 +28288492,28294692,2,3.0,2917.0,sound.wav,OCH,MAL,0 diff --git a/tests/truth/standard_conversations.csv b/tests/truth/standard_conversations.csv new file mode 100644 index 00000000..a01f6202 --- /dev/null +++ b/tests/truth/standard_conversations.csv @@ -0,0 +1,4 @@ +conversation_onset,conversation_offset,voc_count,conv_count,interval_last_conv,recording_filename,initiator,finisher,total_duration_of_vocalisations,CHI_voc_count,FEM_voc_count,MAL_voc_count,OCH_voc_count,CHI_voc_dur,FEM_voc_dur,MAL_voc_dur,OCH_voc_dur,child_id,date_iso,experiment,child_dob +1984136,1988951,3,1.0,,sound.wav,CHI,FEM,6660,1,1,0,1,857.0,3459.0,,2344,1,2020-04-20,test,2020-01-01 +28284010,28287945,2,2.0,26295059.0,sound.wav,OCH,OCH,4089,0,0,1,1,,,154.0,3935,1,2020-04-20,test,2020-01-01 +28288492,28294692,2,3.0,2917.0,sound.wav,OCH,MAL,1001,0,0,1,1,,,486.0,515,1,2020-04-20,test,2020-01-01