Change output filename & ScreenResolution return values (#34)

* Change output filename & ScreenResolution return values - output filenames: according to issue #24: output file has now the prefix of the edf file. If edf file already contains "_eyetrack" suffix, it is not written again. - ScreenResolution threw an error because it expected integers but it returned strings --> changed to integers * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * include code for extracting eyetrack.tsv samples table * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update ignore * lint * fix some tests * adapt test * run on python 3.12 * only work with full asc files * refactor and fix * add xfail --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Remi Gau <[email protected]>
bids-standard · Nov 7, 2023 · aa55681 · aa55681
1 parent 361d6f3
commit aa55681
Show file tree

Hide file tree

Showing 8 changed files with 162 additions and 69 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -17,7 +17,7 @@ jobs:
         strategy:
             fail-fast: false
             matrix:
-                python-version: ['3.8', '3.9', '3.10', '3.11']
+                python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
 
         runs-on: ubuntu-latest
 

diff --git a/.gitignore b/.gitignore
@@ -1,11 +1,9 @@
-tests/data/test_data.zip
-tests/data/**/*.json
-**/*.asc
-**/*.edf
-**/*.EDF
+tests/data/osf
+tests/data/output
 eye2bids/_version.py
-events.json
-eyetrack.json
+*events.json
+*eyetrack.json
+*eyetrack.tsv
 
 # General
 .DS_Store

diff --git a/eye2bids/config/metadata.yml b/eye2bids/config/metadata.yml
@@ -1,26 +1,26 @@
 ---
 # manual_metadata
-# Please find the BIDS specification here: https://bids-specification--1128.org.readthedocs.build/en/1128/modality-specific-files/eye-tracking.html
-# Please go through the specification and fille the fields below according to the data types given in the specification.
+# Please find the BIDS specification here:
+# https://bids-specification--1128.org.readthedocs.build/en/1128/modality-specific-files/eye-tracking.html
+# Please go through the specification
+# and fill the fields below according to the data types given in the specification.
 # !Note that ScreenSize and EyeTrackerDistance units are in meter, not centimeter!
 
 # REQUIRED, fill according to BIDS specification (!the converter will not run successfully if you leave this empty!):
-SampleCoordinateUnits: !!str
-SampleCoordinateSystem: !!str
-EnvironmentCoordinates: !!str
-ScreenDistance: !!int
-ScreenRefreshRate: !!int
-ScreenSize: [!!float]
+SampleCoordinateUnits: str
+SampleCoordinateSystem: str
+EnvironmentCoordinates: str
+ScreenDistance: int
+ScreenRefreshRate: int
+ScreenSize: [float]
 
 # Recommended (leave empty if not available but put information you want to share with your dataset here!):
-SoftwareVersion: !!str
-ScreenAOIDefinition: [!!str [!!int]]
+SoftwareVersion: str
+ScreenAOIDefinition: [str, [int]]
 EyeCameraSettings:
-EyeTrackerDistance: !!float
+EyeTrackerDistance: float
 FeatureDetectionSettings:
 GazeMappingSettings:
 RawDataFilters:
-InstitutionName: !!str
-InstitutionAddress: !!str
-
-...
+InstitutionName: str
+InstitutionAddress: str
diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py
@@ -96,10 +96,22 @@ def _check_edf2asc_present() -> bool:
         return False
 
 
-def _convert_edf_to_asc(input_file: str | Path) -> Path:
-    """Convert edf to asc."""
-    subprocess.run(["edf2asc", "-y", "-e", input_file])
-    return Path(input_file).with_suffix(".asc")
+def _convert_edf_to_asc_events(input_file: str | Path) -> Path:
+    """Convert edf to asc - events."""
+    if isinstance(input_file, str):
+        input_file = Path(input_file)
+    events_asc_file = (input_file.parent) / (input_file.stem + "_events")
+    subprocess.run(["edf2asc", "-y", "-e", input_file, "-o", events_asc_file])
+    return Path(events_asc_file).with_suffix(".asc")
+
+
+def _convert_edf_to_asc_samples(input_file: str | Path) -> Path:
+    """Convert edf to asc - samples."""
+    if isinstance(input_file, str):
+        input_file = Path(input_file)
+    samples_asc_file = (input_file.parent) / (input_file.stem + "_samples")
+    subprocess.run(["edf2asc", "-y", "-s", input_file, "-o", samples_asc_file])
+    return Path(samples_asc_file).with_suffix(".asc")
 
 
 def _calibrations(df: pd.DataFrame) -> pd.DataFrame:
@@ -221,13 +233,14 @@ def _extract_RecordedEye(df: pd.DataFrame) -> str:
 
 
 def _extract_ScreenResolution(df: pd.DataFrame) -> list[int]:
-    return (
+    list_res = (
         (df[df[2] == "GAZE_COORDS"])
         .iloc[0:1, 3:5]
         .to_string(header=False, index=False)
         .replace(".00", "")
         .split(" ")
     )
+    return [eval(i) for i in list_res]
 
 
 def _extract_TaskName(events: list[str]) -> str:
@@ -247,8 +260,10 @@ def _extract_StartTime(events: list[str]) -> int:
     if len(StartTime) > 1:
         e2b_log.info(
             """Your input file contains multiple start times.\n
-             As this is not seen as good practice in eyetracking experiments, only the first start time will be kept for the metadata file.\n
-             Please consider changing your code accordingly for future eyetracking experiments.\n"""
+             As this is not seen as good practice in eyetracking experiments, \n
+             only the first start time will be kept for the metadata file. \n
+             Please consider changing your code accordingly
+             for future eyetracking experiments.\n"""
         )
         return StartTime[0]
     return StartTime
@@ -263,30 +278,61 @@ def _extract_StopTime(events: list[str]) -> int:
     if len(StopTime) > 1:
         e2b_log.info(
             """Your input file contains multiple stop times.\n
-             As this is not seen as good practice in eyetracking experiments, only the last stop time will be kept for the metadata file.\n
-             Please consider changing your code accordingly for future eyetracking experiments.\n"""
+             As this is not seen as good practice in eyetracking experiments, \n
+             only the last stop time will be kept for the metadata file. \n
+             Please consider changing your code accordingly
+             for future eyetracking experiments.\n"""
         )
         return StopTime[-1]
     return StopTime
 
 
-def _load_asc_file(asc_file: str | Path) -> list[str]:
-    with open(asc_file) as f:
+def _load_asc_file(events_asc_file: str | Path) -> list[str]:
+    with open(events_asc_file) as f:
         return f.readlines()
 
 
-def _load_asc_file_as_df(asc_file: str | Path) -> pd.DataFrame:
+def _load_asc_file_as_df(events_asc_file: str | Path) -> pd.DataFrame:
     # dataframe for events, all
-    events = _load_asc_file(asc_file)
+    events = _load_asc_file(events_asc_file)
     return pd.DataFrame([ms.split() for ms in events if ms.startswith("MSG")])
 
 
-def _load_asc_file_as_reduced_df(asc_file: str | Path) -> pd.DataFrame:
+def _load_asc_file_as_reduced_df(events_asc_file: str | Path) -> pd.DataFrame:
     # reduced dataframe without MSG and sample columns
-    df_ms = _load_asc_file_as_df(asc_file)
+    df_ms = _load_asc_file_as_df(events_asc_file)
     return pd.DataFrame(df_ms.iloc[0:, 2:])
 
 
+def _samples_to_data_frame(samples_asc_file: str | Path) -> pd.DataFrame:
+    column_names = [
+        "eye_timestamp",
+        "eye1_x_coordinate",
+        "eye1_y_coordinate",
+        "eye1_pupil_size",
+        "eye2_x_coordinate",
+        "eye2_y_coordinate",
+        "eye2_pupil_size",
+    ]
+
+    data: dict[str, list[str]] = {name: [] for name in column_names}
+
+    with open(samples_asc_file) as file:
+        for line in file:
+            columns = line.strip().split("\t")
+            for i in range(len(column_names)):
+                if i < len(columns):
+                    data[column_names[i]].append(columns[i])
+
+    data = {
+        key: value
+        for key, value in data.items()
+        if any(val not in ("", "...") for val in value)
+    }
+
+    return pd.DataFrame(data)
+
+
 def edf2bids(
     input_file: str | Path | None = None,
     metadata_file: str | Path | None = None,
@@ -302,19 +348,19 @@ def edf2bids(
     )
 
     # CONVERSION events
-    asc_file = _convert_edf_to_asc(input_file)
+    events_asc_file = _convert_edf_to_asc_events(input_file)
 
-    events = _load_asc_file(asc_file)
-    df_ms = _load_asc_file_as_df(asc_file)
-    df_ms_reduced = _load_asc_file_as_reduced_df(asc_file)
+    events = _load_asc_file(events_asc_file)
+    df_ms = _load_asc_file_as_df(events_asc_file)
+    df_ms_reduced = _load_asc_file_as_reduced_df(events_asc_file)
 
     if metadata_file is None:
         metadata = {}
     else:
         with open(metadata_file) as f:
             metadata = yaml.load(f, Loader=SafeLoader)
 
-    # to json
+    # events.json Metadata
     eyetrack_json = {
         "Manufacturer": "SR-Research",
         "EnvironmentCoordinates": metadata.get("EnvironmentCoordinates"),
@@ -343,11 +389,14 @@ def edf2bids(
         "StopTime": _extract_StopTime(events),
     }
 
-    with open(output_dir / "_eyetrack.json", "w") as outfile:
+    output_filename = generate_output_filename(
+        output_dir=output_dir, input_file=input_file, suffix="_eyetrack", extension="json"
+    )
+    with open(output_filename, "w") as outfile:
         json.dump(eyetrack_json, outfile, indent=4)
-    e2b_log.info(f"file generated: {output_dir / '_eyetrack.json'}")
+    e2b_log.info(f"file generated: {output_filename}")
 
-    # Events.json Metadata
+    # events.json Metadata
     events_json = {
         "InstitutionAddress": metadata.get("InstitutionAddress"),
         "InstitutionName": metadata.get("InstitutionName"),
@@ -360,9 +409,33 @@ def edf2bids(
         "TaskName": _extract_TaskName(events),
     }
 
-    with open(output_dir / "_events.json", "w") as outfile:
+    output_filename = generate_output_filename(
+        output_dir=output_dir, input_file=input_file, suffix="_events", extension="json"
+    )
+    with open(output_filename, "w") as outfile:
         json.dump(events_json, outfile, indent=4)
-    e2b_log.info(f"file generated: {output_dir / '_events.json'}")
+    e2b_log.info(f"file generated: {output_filename}")
+
+    # Samples to eyetrack.tsv
+    samples_asc_file = _convert_edf_to_asc_samples(input_file)
+    eyetrack_tsv = _samples_to_data_frame(samples_asc_file)
+
+    output_filename = generate_output_filename(
+        output_dir=output_dir, input_file=input_file, suffix="_eyetrack", extension="tsv"
+    )
+    with open(output_filename, "w") as outfile:
+        eyetrack_tsv.to_csv(outfile, sep="\t", index=False, compression="gzip")
+    e2b_log.info(f"file generated: {output_filename}")
+
+
+def generate_output_filename(
+    output_dir: Path, input_file: Path, suffix: str, extension: str
+) -> Path:
+    """Generate output filename."""
+    filename = Path(input_file).stem
+    if filename.endswith(suffix):
+        suffix = ""
+    return output_dir / f"{filename}{suffix}.{extension}"
 
 
 if __name__ == "__main__":

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -10,7 +10,13 @@ def data_dir() -> Path:
 
 
 def asc_test_files(input_dir: Path = data_dir()) -> list[Path]:
-    return list(input_dir.glob("**/*.asc"))
+    files = input_dir.glob("**/*.asc")
+    tmp = [
+        f
+        for f in files
+        if (not str(f).endswith("events.asc") and not str(f).endswith("samples.asc"))
+    ]
+    return tmp
 
 
 def edf_test_files(input_dir: Path = data_dir()) -> list[Path]:

diff --git a/tests/data/metadata.yml b/tests/data/metadata.yml
@@ -1,7 +1,9 @@
 ---
 # manual_metadata
-# Please find the BIDS specification here: https://bids-specification--1128.org.readthedocs.build/en/1128/modality-specific-files/eye-tracking.html
-# Please go through the specification and fille the fields below according to the data types given in the specification.
+# Please find the BIDS specification here:
+# https://bids-specification--1128.org.readthedocs.build/en/1128/modality-specific-files/eye-tracking.html
+# Please go through the specification
+# and fill the fields below according to the data types given in the specification.
 
 # REQUIRED, fill according to BIDS specification (!the converter will not run successfully if you leave this empty!):
 SampleCoordinateUnits: pixel

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -15,14 +15,12 @@ def root_dir() -> Path:
     return Path(__file__).parent.parent
 
 
+@pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing")
 @pytest.mark.parametrize("metadata_file", [data_dir() / "metadata.yml", None])
 @pytest.mark.parametrize("output_dir", [data_dir() / "output", None])
 @pytest.mark.parametrize("use_relative_path", [False, True])
 def test_edf_cli(use_relative_path, metadata_file, output_dir, eyelink_test_data_dir):
-    if not _check_edf2asc_present():
-        pytest.skip("edf2asc missing")
-
-    input_dir = eyelink_test_data_dir / "decisions"
+    input_dir = eyelink_test_data_dir / "satf"
     input_file = edf_test_files(input_dir=input_dir)[0]
 
     if use_relative_path:
@@ -42,3 +40,20 @@ def test_edf_cli(use_relative_path, metadata_file, output_dir, eyelink_test_data
         command.extend(["--output_dir", output_dir])
 
     cli(command)
+
+
+@pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing")
+@pytest.mark.parametrize(
+    "input_file", edf_test_files(input_dir=data_dir() / "osf" / "eyelink")
+)
+def test_all_edf_files(input_file):
+    if "decision" in str(input_file):
+        pytest.xfail("Dataset decision is known to fail for now.")
+    command = [
+        "eye2bids",
+        "--input_file",
+        str(input_file),
+        "--output_dir",
+        str(data_dir() / "output"),
+    ]
+    cli(command)
diff --git a/tests/test_edf2bids.py b/tests/test_edf2bids.py
@@ -7,7 +7,7 @@
 
 from eye2bids.edf2bids import (
     _check_edf2asc_present,
-    _convert_edf_to_asc,
+    _convert_edf_to_asc_events,
     _extract_AverageCalibrationError,
     _extract_CalibrationPosition,
     _extract_CalibrationType,
@@ -29,20 +29,17 @@
 from .conftest import asc_test_files, data_dir, edf_test_files
 
 
+@pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing")
 @pytest.mark.parametrize("input_file", edf_test_files())
-def test_convert_edf_to_asc(input_file):
-    if not _check_edf2asc_present():
-        pytest.skip("edf2asc missing")
-    asc_file = _convert_edf_to_asc(input_file)
+def test_convert_edf_to_asc_events(input_file):
+    asc_file = _convert_edf_to_asc_events(input_file)
     assert Path(asc_file).exists()
 
 
+@pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing")
 @pytest.mark.parametrize("metadata_file", [data_dir() / "metadata.yml", None])
 def test_edf_end_to_end(metadata_file, eyelink_test_data_dir):
-    if not _check_edf2asc_present():
-        pytest.skip("edf2asc missing")
-
-    input_dir = eyelink_test_data_dir / "decisions"
+    input_dir = eyelink_test_data_dir / "satf"
     input_file = edf_test_files(input_dir=input_dir)[0]
 
     output_dir = data_dir() / "output"
@@ -54,15 +51,17 @@ def test_edf_end_to_end(metadata_file, eyelink_test_data_dir):
         output_dir=output_dir,
     )
 
-    assert (output_dir / "events.json").exists()
-    with open(output_dir / "events.json") as f:
+    expected_events_sidecar = output_dir / f"{input_file.stem}_events.json"
+    assert expected_events_sidecar.exists()
+    with open(expected_events_sidecar) as f:
         events = json.load(f)
     assert events["StimulusPresentation"]["ScreenResolution"] == [1919, 1079]
 
-    assert (output_dir / "eyetrack.json").exists()
-    with open(output_dir / "eyetrack.json") as f:
+    expected_data_sidecar = output_dir / f"{input_file.stem}_eyetrack.json"
+    assert expected_data_sidecar.exists()
+    with open(expected_data_sidecar) as f:
         eyetrack = json.load(f)
-    assert eyetrack["SamplingFrequency"] == 1000
+    assert eyetrack["SamplingFrequency"] == 500
     assert eyetrack["RecordedEye"] == "Right"