split physioevents and events json (#92)

* change how/if TaskName is extracted * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix taskname function * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix taskname function * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix test_cli.py * get rid of func TaskName and include it instead in metadata.yml * split physioevents and events json * adjust test to new events.json * fix metadata and output consistency * fix * having one nan on a column diff is normal * run end to end tests on all input datasets --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Remi Gau <[email protected]>
bids-standard · Aug 2, 2024 · f939792 · f939792
1 parent ecf2089
commit f939792
Show file tree

Hide file tree

Showing 3 changed files with 150 additions and 35 deletions.
diff --git a/eye2bids/_base.py b/eye2bids/_base.py
@@ -12,7 +12,7 @@
 
 
 class BasePhysioEventsJson(dict[str, Any]):
-    """Handle content of physioevents sidedar."""
+    """Handle content of physioevents sidecar."""
 
     input_file: Path
     two_eyes: bool
@@ -24,14 +24,19 @@ def __init__(self, metadata: None | dict[str, Any] = None) -> None:
         self["ForeignIndexColumn"] = "timestamp"
 
         self["blink"] = {
-            "Description": "One indicates if the eye was closed, zero if open."
+            "Description": "Gives status of the eye.",
+            "Levels": {
+                "0": "Indicates if the eye was open.",
+                "1": "Indicates if the eye was closed.",
+            },
         }
         self["message"] = {"Description": "String messages logged by the eye-tracker."}
         self["trial_type"] = {
-            "Description": (
-                "Event type as identified by the eye-tracker's model "
-                "((either 'n/a' if not applicabble, 'fixation', or 'saccade')."
-            )
+            "Description": "Event type as identified by the eye-tracker's model.",
+            "Levels": {
+                "fixation": "Indicates a fixation.",
+                "saccade": "Indicates a saccade.",
+            },
         }
 
         self.update_from_metadata(metadata)
@@ -61,6 +66,44 @@ def write(
         self,
         output_dir: Path,
         recording: str | None = None,
+    ) -> None:
+        """Write to json."""
+        content = {key: value for key, value in self.items() if self[key] is not None}
+        with open(output_dir / self.output_filename(recording=recording), "w") as outfile:
+            json.dump(content, outfile, indent=4)
+        # e2b_log.info(f"file generated: {self.output_filename(recording=recording)}")
+
+
+class BaseEventsJson(dict[str, Any]):
+    """Handle content of events sidecar."""
+
+    input_file: Path
+
+    def __init__(self, metadata: None | dict[str, Any] = None) -> None:
+        self.update_from_metadata(metadata)
+
+    def update_from_metadata(self, metadata: None | dict[str, Any] = None) -> None:
+        """Update content of json side car based on metadata."""
+        if metadata is None:
+            return None
+
+        self["TaskName"] = metadata.get("TaskName")
+        self["InstitutionAddress"] = metadata.get("InstitutionAddress")
+        self["InstitutionName"] = metadata.get("InstitutionName")
+        self["StimulusPresentation"] = {
+            "ScreenDistance": metadata.get("ScreenDistance"),
+            "ScreenRefreshRate": metadata.get("ScreenRefreshRate"),
+            "ScreenSize": metadata.get("ScreenSize"),
+        }
+
+    def output_filename(self) -> str:
+        """Generate output filename."""
+        filename = self.input_file.stem
+        return f"{filename}_events.json"
+
+    def write(
+        self,
+        output_dir: Path,
         extra_metadata: dict[str, str | list[str] | list[float]] | None = None,
     ) -> None:
         """Write to json."""
@@ -69,7 +112,7 @@ def write(
                 self[key] = value
 
         content = {key: value for key, value in self.items() if self[key] is not None}
-        with open(output_dir / self.output_filename(recording=recording), "w") as outfile:
+        with open(output_dir / self.output_filename(), "w") as outfile:
             json.dump(content, outfile, indent=4)
 
 

diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py
@@ -14,7 +14,7 @@
 from rich.prompt import Prompt
 from yaml.loader import SafeLoader
 
-from eye2bids._base import BasePhysioEventsJson, BasePhysioJson
+from eye2bids._base import BaseEventsJson, BasePhysioEventsJson, BasePhysioJson
 from eye2bids._parser import global_parser
 from eye2bids.logger import eye2bids_logger
 
@@ -324,13 +324,13 @@ def _load_asc_file_as_df(events_asc_file: str | Path) -> pd.DataFrame:
 
 
 def _load_asc_file_as_reduced_df(events_asc_file: str | Path) -> pd.DataFrame:
-    # reduced dataframe without MSG and sample columns
+    """Reduce dataframe without MSG and sample columns."""
     df_ms = _load_asc_file_as_df(events_asc_file)
     return pd.DataFrame(df_ms.iloc[0:, 2:])
 
 
 def _df_events_after_start(events: list[str]) -> pd.DataFrame:
-
+    """Extract data between START and END messages."""
     start_index = next(
         i for i, line in enumerate(events) if re.match(r"START\s+.*", line)
     )
@@ -342,7 +342,7 @@ def _df_events_after_start(events: list[str]) -> pd.DataFrame:
         data_lines = events[start_index + 1 : end_index]
         return pd.DataFrame([line.strip().split("\t") for line in data_lines])
     else:
-        return print("No 'END' found after the selected 'START'.")
+        return e2b_log.warning("No 'END' found after the selected 'START'.")
 
 
 def _df_physioevents(events_after_start: pd.DataFrame) -> pd.DataFrame:
@@ -491,10 +491,12 @@ def generate_physio_json(
             )[1::2]
 
     base_json.write(output_dir=output_dir, recording="eye1", extra_metadata=metadata_eye1)
+    e2b_log.info(f"file generated: {base_json.output_filename()}")
     if base_json.two_eyes:
         base_json.write(
             output_dir=output_dir, recording="eye2", extra_metadata=metadata_eye2
         )
+        e2b_log.info(f"file generated: {base_json.output_filename()}")
 
 
 def edf2bids(
@@ -512,7 +514,7 @@ def edf2bids(
         input_file, metadata_file, output_dir, interactive, force
     )
 
-    # CONVERSION events
+    # CONVERSION events #
     events_asc_file = _convert_edf_to_asc_events(input_file)
 
     if not events_asc_file.exists():
@@ -521,35 +523,46 @@ def edf2bids(
             f"{input_file}"
         )
 
-    # %% Sidecar eye-physio.json
+    # SIDECARS #
+    # %% physio.json
     generate_physio_json(input_file, metadata_file, output_dir, events_asc_file)
-
-    # %% physioevents.json Metadata
+    # %% physioevents.json
     events = _load_asc_file(events_asc_file)
 
     df_ms_reduced = _load_asc_file_as_reduced_df(events_asc_file)
 
+    physioevents_json = BasePhysioEventsJson()
+
+    physioevents_json.input_file = input_file
+    physioevents_json.two_eyes = _2eyesmode(df_ms_reduced)
+
+    physioevents_json.write(output_dir=output_dir, recording="eye1")
+    e2b_log.info(f"file generated: {physioevents_json.output_filename()}")
+    if physioevents_json.two_eyes:
+        physioevents_json.write(output_dir=output_dir, recording="eye2")
+        e2b_log.info(f"file generated: {physioevents_json.output_filename()}")
+    # %% events.json
     if metadata_file is None:
         metadata = {}
     else:
         with open(metadata_file) as f:
             metadata = yaml.load(f, Loader=SafeLoader)
 
-    events_json = BasePhysioEventsJson(metadata)
+    events_json = BaseEventsJson(metadata)
 
     events_json.input_file = input_file
-    events_json.two_eyes = _2eyesmode(df_ms_reduced)
 
     events_json["StimulusPresentation"]["ScreenResolution"] = _extract_ScreenResolution(
         df_ms_reduced
     )
 
-    events_json.write(output_dir=output_dir, recording="eye1")
-    if events_json.two_eyes:
-        events_json.write(output_dir=output_dir, recording="eye2")
+    events_json.input_file = input_file
+
+    events_json.write(output_dir=output_dir)
+    e2b_log.info(f"file generated: {events_json.output_filename()}")
 
-    #  %%
-    # Samples to dataframe
+    # SAMPLES #
+    # samples to dataframe
     samples_asc_file = _convert_edf_to_asc_samples(input_file)
     if not samples_asc_file.exists():
         e2b_log.error(
@@ -596,6 +609,7 @@ def edf2bids(
 
         e2b_log.info(f"file generated: {output_filename_eye2}")
 
+    # MESSAGES AND PHYSIOEVENTS #
     # %%
     # Messages and events to dataframes
 

diff --git a/tests/test_edf2bids.py b/tests/test_edf2bids.py
@@ -30,21 +30,80 @@
 from .conftest import asc_test_files, data_dir, edf_test_files
 
 
-@pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing")
 @pytest.mark.parametrize("input_file", edf_test_files())
 def test_convert_edf_to_asc_events(input_file):
     asc_file = _convert_edf_to_asc_events(input_file)
     assert Path(asc_file).exists()
 
 
-def _check_output_exists(output_dir, input_file, eye=1):
+def _check_output_exists(output_dir: Path, input_file: Path, eye=1):
+    for suffix in [".json", ".tsv.gz"]:
+        for ending in [
+            "_physioevents",
+            "_physio",
+        ]:
+            assert (
+                (output_dir / f"{input_file.stem}_recording-eye{eye}{ending}")
+                .with_suffix(suffix)
+                .exists()
+            )
+
+
+def _check_output_content(output_dir, input_file, eye=1):
+    """Check content of output.
+
+    Make sure each column in the tsv has a description.
+
+    Ensure that all timestamps in physio.tsv.gz are evenly spaced:
+    as they should be regular sampled.
+    """
     for ending in [
-        "_physioevents.json",
-        "_physio.json",
-        "_physio.tsv.gz",
-        "_physioevents.tsv.gz",
+        "_physioevents",
+        "_physio",
     ]:
-        assert (output_dir / f"{input_file.stem}_recording-eye{eye}{ending}").exists()
+        tsv_file = (
+            output_dir / f"{input_file.stem}_recording-eye{eye}{ending}"
+        ).with_suffix(".tsv.gz")
+        json_file = (
+            output_dir / f"{input_file.stem}_recording-eye{eye}{ending}"
+        ).with_suffix(".json")
+
+        df = pd.read_csv(tsv_file, sep="\t", header=None)
+        with open(json_file) as f:
+            metadata = json.load(f)
+        assert len(df.columns) == len(metadata["Columns"])
+
+        # space between timestamps should always be the same.
+        if ending == "_physio":
+            # length is because  first rwo will give a nan
+            assert len(df[0].diff().unique()) == 2
+
+
+@pytest.mark.parametrize(
+    "folder",
+    [
+        "emg",
+        "lt",
+        "pitracker",
+        "rest",
+        "vergence",
+    ],
+)
+@pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing")
+def test_edf_end_to_end_all(eyelink_test_data_dir, folder):
+    """Run conversion of all test datasets and check output."""
+    metadata_file = data_dir() / "metadata.yml"
+
+    input_dir = eyelink_test_data_dir / folder
+    input_file = edf_test_files(input_dir=input_dir)[0]
+
+    output_dir = data_dir() / "output"
+    output_dir.mkdir(exist_ok=True)
+
+    edf2bids(input_file=input_file, metadata_file=metadata_file, output_dir=output_dir)
+
+    _check_output_exists(output_dir, input_file)
+    _check_output_content(output_dir, input_file)
 
 
 @pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing")
@@ -60,10 +119,9 @@ def test_edf_end_to_end(eyelink_test_data_dir):
     edf2bids(input_file=input_file, metadata_file=metadata_file, output_dir=output_dir)
 
     _check_output_exists(output_dir, input_file)
+    _check_output_content(output_dir, input_file)
 
-    expected_events_sidecar = (
-        output_dir / f"{input_file.stem}_recording-eye1_physioevents.json"
-    )
+    expected_events_sidecar = output_dir / f"{input_file.stem}_events.json"
     with open(expected_events_sidecar) as f:
         events = json.load(f)
     assert events["StimulusPresentation"]["ScreenResolution"] == [1919, 1079]
@@ -107,10 +165,9 @@ def test_edf_end_to_end_2eyes(eyelink_test_data_dir):
     edf2bids(input_file=input_file, metadata_file=metadata_file, output_dir=output_dir)
 
     _check_output_exists(output_dir, input_file)
+    _check_output_content(output_dir, input_file)
 
-    expected_events_sidecar_eye1 = (
-        output_dir / f"{input_file.stem}_recording-eye1_physioevents.json"
-    )
+    expected_events_sidecar_eye1 = output_dir / f"{input_file.stem}_events.json"
     with open(expected_events_sidecar_eye1) as f:
         events = json.load(f)
     assert events["StimulusPresentation"]["ScreenResolution"] == [1919, 1079]
@@ -125,6 +182,7 @@ def test_edf_end_to_end_2eyes(eyelink_test_data_dir):
     assert eyetrack["RecordedEye"] == "Left"
 
     _check_output_exists(output_dir, input_file, eye=2)
+    _check_output_content(output_dir, input_file, eye=2)
 
     expected_data_sidecar_eye2 = (
         output_dir / f"{input_file.stem}_recording-eye2_physio.json"