diff --git a/.gitignore b/.gitignore index 59495d8..a609414 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,8 @@ tests/data/output *events.json *eyetrack.json *eyetrack.tsv - 2eyes.ipynb +test.ipynb tmp diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py index 9b8eb68..5a2ccb0 100644 --- a/eye2bids/edf2bids.py +++ b/eye2bids/edf2bids.py @@ -3,6 +3,7 @@ from __future__ import annotations import gzip +import re import subprocess from pathlib import Path from typing import Any @@ -313,6 +314,92 @@ def _load_asc_file_as_reduced_df(events_asc_file: str | Path) -> pd.DataFrame: return pd.DataFrame(df_ms.iloc[0:, 2:]) +def _df_events_after_start(events: list[str]) -> pd.DataFrame: + + start_index = next( + i for i, line in enumerate(events) if re.match(r"START\s+.*", line) + ) + end_index = next( + i for i in range(len(events) - 1, -1, -1) if re.match(r"END\s+.*", events[i]) + ) + + if end_index > start_index: + data_lines = events[start_index + 1 : end_index] + return pd.DataFrame([line.strip().split("\t") for line in data_lines]) + else: + return print("No 'END' found after the selected 'START'.") + + +def _df_physioevents(events_after_start: pd.DataFrame) -> pd.DataFrame: + events_after_start["Event_Letters"] = ( + events_after_start[0].str.extractall(r"([A-Za-z]+)").groupby(level=0).agg("".join) + ) + events_after_start["Event_Numbers"] = events_after_start[0].str.extract(r"(\d+)") + events_after_start[["msg_timestamp", "message"]] = events_after_start[1].str.split( + n=1, expand=True + ) + events_after_start["message"] = events_after_start["message"].astype(str) + + msg_mask = events_after_start["Event_Letters"] == "MSG" + events_after_start.loc[msg_mask, "Event_Numbers"] = events_after_start.loc[ + msg_mask, "msg_timestamp" + ] + physioevents_reordered = ( + pd.concat( + [ + events_after_start["Event_Numbers"], + events_after_start[2], + events_after_start["Event_Letters"], + events_after_start["message"], + ], + axis=1, + ignore_index=True, + ) + .replace({None: np.nan, "None": np.nan}) + .rename(columns={0: "timestamp", 1: "duration", 2: "trial_type", 3: "message"}) + ) + return physioevents_reordered + + +def _physioevents_for_eye( + physioevents_reordered: pd.DataFrame, eye: str = "L" +) -> pd.DataFrame: + physioevents_eye_list = ["MSG", f"EFIX{eye}", f"ESACC{eye}", f"EBLINK{eye}"] + + physioevents = physioevents_reordered[ + physioevents_reordered["trial_type"].isin(physioevents_eye_list) + ] + + physioevents = physioevents.replace( + {f"EFIX{eye}": "fixation", f"ESACC{eye}": "saccade", "MSG": np.nan, None: np.nan} + ) + + physioevents["blink"] = 0 + last_non_na_trial_type = None + + for i in range(len(physioevents)): + current_trial_type = physioevents.iloc[i]["trial_type"] + if pd.notna(current_trial_type): + if ( + current_trial_type == "saccade" + and last_non_na_trial_type == f"EBLINK{eye}" + ): + physioevents.iloc[i, physioevents.columns.get_loc("blink")] = 1 + last_non_na_trial_type = current_trial_type + + physioevents.loc[physioevents["trial_type"].isna(), "blink"] = np.nan + physioevents["blink"] = physioevents["blink"].astype("Int64") + physioevents = physioevents[physioevents.trial_type != f"EBLINK{eye}"] + + physioevents["timestamp"] = physioevents["timestamp"].astype("Int64") + physioevents["duration"] = physioevents["duration"].astype("Int64") + + physioevents = physioevents[ + ["timestamp", "duration", "trial_type", "blink", "message"] + ] + return physioevents + + def generate_physio_json( input_file: Path, metadata_file: str | Path | None, @@ -454,7 +541,7 @@ def edf2bids( samples = pd.read_csv(samples_asc_file, sep="\t", header=None) samples_eye1 = ( - pd.DataFrame(samples.iloc[:, 0:4]) + pd.DataFrame(samples.iloc[:, [2, 1, 3, 0]]) .map(lambda x: x.strip() if isinstance(x, str) else x) .replace(".", np.nan, regex=False) ) @@ -491,7 +578,62 @@ def edf2bids( e2b_log.info(f"file generated: {output_filename_eye2}") - # Messages and events to physioevents.tsv.gz - tbc + # %% + # Messages and events to dataframes + + events_after_start = _df_events_after_start(events) + physioevents_reordered = _df_physioevents(events_after_start) + physioevents_eye1 = _physioevents_for_eye(physioevents_reordered, eye="L") + physioevents_eye2 = _physioevents_for_eye(physioevents_reordered, eye="R") + + # %% + # Messages and events to physioevents.tsv.gz + + if not _2eyesmode(df_ms_reduced): + output_eventsfilename_eye1 = generate_output_filename( + output_dir=output_dir, + input_file=input_file, + suffix="_recording-eye1_physioevents", + extension="tsv.gz", + ) + if _extract_RecordedEye(df_ms_reduced) == "Left": + data_to_save = physioevents_eye1 + elif _extract_RecordedEye(df_ms_reduced) == "Right": + data_to_save = physioevents_eye2 + content = data_to_save.to_csv(sep="\t", index=False, na_rep="n/a", header=None) + with gzip.open(output_eventsfilename_eye1, "wb") as f: + f.write(content.encode()) + + e2b_log.info(f"file generated: {output_eventsfilename_eye1}") + + else: + output_eventsfilename_eye1 = generate_output_filename( + output_dir=output_dir, + input_file=input_file, + suffix="_recording-eye1_physioevents", + extension="tsv.gz", + ) + content = physioevents_eye1.to_csv( + sep="\t", index=False, na_rep="n/a", header=None + ) + with gzip.open(output_eventsfilename_eye1, "wb") as f: + f.write(content.encode()) + + e2b_log.info(f"file generated: {output_eventsfilename_eye1}") + + output_eventsfilename_eye2 = generate_output_filename( + output_dir=output_dir, + input_file=input_file, + suffix="_recording-eye2_physioevents", + extension="tsv.gz", + ) + content = physioevents_eye2.to_csv( + sep="\t", index=False, na_rep="n/a", header=None + ) + with gzip.open(output_eventsfilename_eye2, "wb") as f: + f.write(content.encode()) + + e2b_log.info(f"file generated: {output_eventsfilename_eye2}") def generate_output_filename( diff --git a/tests/test_edf2bids.py b/tests/test_edf2bids.py index aad9bc8..a753aa8 100644 --- a/tests/test_edf2bids.py +++ b/tests/test_edf2bids.py @@ -37,6 +37,16 @@ def test_convert_edf_to_asc_events(input_file): assert Path(asc_file).exists() +def _check_output_exists(output_dir, input_file, eye=1): + for ending in [ + "_physioevents.json", + "_physio.json", + "_physio.tsv.gz", + "_physioevents.tsv.gz", + ]: + assert (output_dir / f"{input_file.stem}_recording-eye{eye}{ending}").exists() + + @pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing") @pytest.mark.parametrize("metadata_file", [data_dir() / "metadata.yml", None]) def test_edf_end_to_end(metadata_file, eyelink_test_data_dir): @@ -46,103 +56,79 @@ def test_edf_end_to_end(metadata_file, eyelink_test_data_dir): output_dir = data_dir() / "output" output_dir.mkdir(exist_ok=True) - edf2bids( - input_file=input_file, - metadata_file=metadata_file, - output_dir=output_dir, - ) + edf2bids(input_file=input_file, metadata_file=metadata_file, output_dir=output_dir) + + _check_output_exists(output_dir, input_file) expected_events_sidecar = ( output_dir / f"{input_file.stem}_recording-eye1_physioevents.json" ) - assert expected_events_sidecar.exists() with open(expected_events_sidecar) as f: events = json.load(f) assert events["StimulusPresentation"]["ScreenResolution"] == [1919, 1079] expected_data_sidecar = output_dir / f"{input_file.stem}_recording-eye1_physio.json" - assert expected_data_sidecar.exists() with open(expected_data_sidecar) as f: eyetrack = json.load(f) assert eyetrack["SamplingFrequency"] == 500 assert eyetrack["RecordedEye"] == "Right" - expected_events_tsv = output_dir / f"{input_file.stem}_recording-eye1_physio.tsv.gz" - assert expected_events_tsv.exists() - @pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing") -def test_edf_nan_in_tsv(eyelink_test_data_dir): - """Check that dots '.' are converted to NaN in the tsv file.""" - input_dir = eyelink_test_data_dir / "emg" +@pytest.mark.parametrize("metadata_file", [data_dir() / "metadata.yml", None]) +def test_edf_end_to_end_2eyes(metadata_file, eyelink_test_data_dir): + input_dir = eyelink_test_data_dir / "2eyes" input_file = edf_test_files(input_dir=input_dir)[0] output_dir = data_dir() / "output" output_dir.mkdir(exist_ok=True) - edf2bids( - input_file=input_file, - output_dir=output_dir, - ) + edf2bids(input_file=input_file, metadata_file=metadata_file, output_dir=output_dir) - expected_eyetrack_tsv = output_dir / f"{input_file.stem}_recording-eye1_physio.tsv.gz" - df = pd.read_csv(expected_eyetrack_tsv, sep="\t", header=None) - count = sum(i == "." for i in df[0]) - assert count == 0 - - -@pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing") -def test_2files_eye1(eyelink_test_data_dir): - """Check that for datafile with 2eyes 2 eye1 file is created and check input. - - function _2eyesmode - """ - input_dir = eyelink_test_data_dir / "2eyes" - input_file = edf_test_files(input_dir=input_dir)[0] - - output_dir = data_dir() / "output" - output_dir.mkdir(exist_ok=True) + _check_output_exists(output_dir, input_file) - edf2bids( - input_file=input_file, - output_dir=output_dir, + expected_events_sidecar_eye1 = ( + output_dir / f"{input_file.stem}_recording-eye1_physioevents.json" ) + with open(expected_events_sidecar_eye1) as f: + events = json.load(f) + assert events["StimulusPresentation"]["ScreenResolution"] == [1919, 1079] - expected_eyetrack_sidecar = ( + expected_data_sidecar_eye1 = ( output_dir / f"{input_file.stem}_recording-eye1_physio.json" ) - assert expected_eyetrack_sidecar.exists() - with open(expected_eyetrack_sidecar) as f: + with open(expected_data_sidecar_eye1) as f: eyetrack = json.load(f) + assert eyetrack["SamplingFrequency"] == 1000 assert eyetrack["AverageCalibrationError"] == [[0.29]] assert eyetrack["RecordedEye"] == "Left" + _check_output_exists(output_dir, input_file, eye=2) -@pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing") -def test_2files_eye2(eyelink_test_data_dir): - """Check that for datafile with 2eyes 2 eye2 file is created and check input. + expected_data_sidecar_eye2 = ( + output_dir / f"{input_file.stem}_recording-eye2_physio.json" + ) + with open(expected_data_sidecar_eye2) as f: + eyetrack = json.load(f) + assert eyetrack["AverageCalibrationError"] == [[0.35]] + assert eyetrack["RecordedEye"] == "Right" - function _2eyesmode - """ - input_dir = eyelink_test_data_dir / "2eyes" + +@pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing") +def test_edf_nan_in_tsv(eyelink_test_data_dir): + """Check that dots '.' are converted to NaN in the tsv file.""" + input_dir = eyelink_test_data_dir / "emg" input_file = edf_test_files(input_dir=input_dir)[0] output_dir = data_dir() / "output" output_dir.mkdir(exist_ok=True) - edf2bids( - input_file=input_file, - output_dir=output_dir, - ) + edf2bids(input_file=input_file, output_dir=output_dir) - expected_eyetrack_sidecar = ( - output_dir / f"{input_file.stem}_recording-eye2_physio.json" - ) - assert expected_eyetrack_sidecar.exists() - with open(expected_eyetrack_sidecar) as f: - eyetrack = json.load(f) - assert eyetrack["AverageCalibrationError"] == [[0.35]] - assert eyetrack["RecordedEye"] == "Right" + expected_eyetrack_tsv = output_dir / f"{input_file.stem}_recording-eye1_physio.tsv.gz" + df = pd.read_csv(expected_eyetrack_tsv, sep="\t", header=None) + count = sum(i == "." for i in df[0]) + assert count == 0 @pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing") @@ -158,13 +144,10 @@ def test_number_columns_2eyes_tsv(eyelink_test_data_dir): output_dir = data_dir() / "output" output_dir.mkdir(exist_ok=True) - edf2bids( - input_file=input_file, - output_dir=output_dir, - ) + edf2bids(input_file=input_file, output_dir=output_dir) expected_eyetrack_tsv = output_dir / f"{input_file.stem}_recording-eye1_physio.tsv.gz" - df = pd.read_csv(expected_eyetrack_tsv, sep="\t", header=None) + df = pd.read_csv(expected_eyetrack_tsv, sep="\t") number_columns = len(df.columns) assert number_columns == 4 @@ -182,13 +165,10 @@ def test_number_columns_1eye_tsv(eyelink_test_data_dir): output_dir = data_dir() / "output" output_dir.mkdir(exist_ok=True) - edf2bids( - input_file=input_file, - output_dir=output_dir, - ) + edf2bids(input_file=input_file, output_dir=output_dir) expected_eyetrack_tsv = output_dir / f"{input_file.stem}_recording-eye1_physio.tsv.gz" - df = pd.read_csv(expected_eyetrack_tsv, sep="\t", header=None) + df = pd.read_csv(expected_eyetrack_tsv, sep="\t") number_columns = len(df.columns) assert number_columns == 4 @@ -477,3 +457,81 @@ def test_extract_AverageCalibrationError(folder, expected, eyelink_test_data_dir asc_file = asc_test_files(input_dir=input_dir, suffix="*_events")[0] df_ms = _load_asc_file_as_df(asc_file) assert _extract_AverageCalibrationError(df_ms) == expected + + +@pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing") +def test_number_columns_physioevents_tsv(eyelink_test_data_dir): + """Check right number of columns in physioevents.tsv.gz.""" + input_dir = eyelink_test_data_dir / "2eyes" + print(edf_test_files(input_dir=input_dir)) + input_file = edf_test_files(input_dir=input_dir)[0] + + output_dir = data_dir() / "output" + output_dir.mkdir(exist_ok=True) + + edf2bids(input_file=input_file, output_dir=output_dir) + + expected_physioevents_tsv = ( + output_dir / f"{input_file.stem}_recording-eye2_physioevents.tsv.gz" + ) + df = pd.read_csv(expected_physioevents_tsv, sep="\t") + number_columns = len(df.columns) + assert number_columns == 5 + + +@pytest.mark.parametrize( + "folder, expected", + [ + ( + "rest", + [ + "fixation", + "saccade", + "fixation", + "saccade", + "fixation", + "saccade", + "fixation", + ], + ), + ( + "2eyes", + [ + "fixation", + "saccade", + "fixation", + "saccade", + "fixation", + "saccade", + "fixation", + ], + ), + ( + "pitracker", + [ + "saccade", + "fixation", + "saccade", + "fixation", + "saccade", + "fixation", + "saccade", + ], + ), + ], +) +def test_physioevents_value(folder, expected, eyelink_test_data_dir): + """Check content physioevents.tsv.gz.""" + input_dir = eyelink_test_data_dir / folder + input_file = edf_test_files(input_dir=input_dir)[0] + + output_dir = data_dir() / "output" + output_dir.mkdir(exist_ok=True) + + edf2bids(input_file=input_file, output_dir=output_dir) + + expected_eyetrackphysio_tsv = ( + output_dir / f"{input_file.stem}_recording-eye1_physioevents.tsv.gz" + ) + df = pd.read_csv(expected_eyetrackphysio_tsv, sep="\t", header=None) + assert df.iloc[3:10, 2].tolist() == expected