From f95a9c52b7c3068e756cc35622012606f4736539 Mon Sep 17 00:00:00 2001
From: paulroujansky <paul@roujansky.eu>
Date: Tue, 5 Sep 2023 15:51:39 +0200
Subject: [PATCH] Fix bug with subject_info when loading from and exporting to
 EDF file (#11952)

Co-authored-by: Paul ROUJANSKY <paul.roujansky@bioserenity.com>
---
 doc/changes/1.5.inc             |  1 +
 mne/export/_edf.py              | 29 ++++++++++-------
 mne/export/tests/test_export.py | 14 ++++++++-
 mne/io/edf/edf.py               | 55 ++++++++++++++++++++++++++++++++-
 mne/io/edf/tests/test_edf.py    | 43 +++++++++++++++++++++++---
 mne/io/edf/tests/test_gdf.py    | 10 ++++--
 6 files changed, 133 insertions(+), 19 deletions(-)

diff --git a/doc/changes/1.5.inc b/doc/changes/1.5.inc
index f76f09d8824..a69f0a53f2e 100644
--- a/doc/changes/1.5.inc
+++ b/doc/changes/1.5.inc
@@ -26,6 +26,7 @@ Bugs
 - Fix bug with multi-plot 3D rendering where only one plot was updated (:gh:`11896` by `Eric Larson`_)
 - Fix bug with :func:`mne.chpi.compute_head_pos` for CTF data where digitization points were modified in-place, producing an incorrect result during a save-load round-trip (:gh:`11934` by `Eric Larson`_)
 - Fix bug with notebooks when using PyVista 0.42 by implementing ``trame`` backend support (:gh:`11956` by `Eric Larson`_)
+- Fix bug with ``subject_info`` when loading data from and exporting to EDF file (:gh:`11952` by `Paul Roujansky`_)
 
 
 .. _changes_1_5_0:
diff --git a/mne/export/_edf.py b/mne/export/_edf.py
index 3666aae30fe..da2acb72ff8 100644
--- a/mne/export/_edf.py
+++ b/mne/export/_edf.py
@@ -189,21 +189,27 @@ def _export_raw(fname, raw, physical_range, add_ch_type):
         # set patient info
         subj_info = raw.info.get("subject_info")
         if subj_info is not None:
-            birthday = subj_info.get("birthday")
-
             # get the full name of subject if available
-            first_name = subj_info.get("first_name")
-            last_name = subj_info.get("last_name")
-            first_name = first_name or ""
-            last_name = last_name or ""
-            joiner = ""
-            if len(first_name) and len(last_name):
-                joiner = " "
-            name = joiner.join([first_name, last_name])
+            first_name = subj_info.get("first_name", "")
+            middle_name = subj_info.get("middle_name", "")
+            last_name = subj_info.get("last_name", "")
+            name = " ".join(filter(None, [first_name, middle_name, last_name]))
 
+            birthday = subj_info.get("birthday")
             hand = subj_info.get("hand")
+            weight = subj_info.get("weight")
+            height = subj_info.get("height")
             sex = subj_info.get("sex")
 
+            additional_patient_info = []
+            for key, value in [("height", height), ("weight", weight), ("hand", hand)]:
+                if value:
+                    additional_patient_info.append(f"{key}={value}")
+            if len(additional_patient_info) == 0:
+                additional_patient_info = None
+            else:
+                additional_patient_info = " ".join(additional_patient_info)
+
             if birthday is not None:
                 if hdl.setPatientBirthDate(birthday[0], birthday[1], birthday[2]) != 0:
                     raise RuntimeError(
@@ -211,9 +217,10 @@ def _export_raw(fname, raw, physical_range, add_ch_type):
                         f"returned an error"
                     )
             for key, val in [
+                ("PatientCode", subj_info.get("his_id", "")),
                 ("PatientName", name),
                 ("PatientGender", sex),
-                ("AdditionalPatientInfo", f"hand={hand}"),
+                ("AdditionalPatientInfo", additional_patient_info),
             ]:
                 # EDFwriter compares integer encodings of sex and will
                 # raise a TypeError if value is None as returned by
diff --git a/mne/export/tests/test_export.py b/mne/export/tests/test_export.py
index 96fafd204ef..8959ad5f84e 100644
--- a/mne/export/tests/test_export.py
+++ b/mne/export/tests/test_export.py
@@ -134,11 +134,19 @@ def test_double_export_edf(tmp_path):
         "bio",
     ]
     info = create_info(len(ch_types), sfreq=1000, ch_types=ch_types)
+    info = info.set_meas_date("2023-09-04 14:53:09.000")
     data = rng.random(size=(len(ch_types), 1000)) * 1e-5
 
     # include subject info and measurement date
     info["subject_info"] = dict(
-        first_name="mne", last_name="python", birthday=(1992, 1, 20), sex=1, hand=3
+        his_id="12345",
+        first_name="mne",
+        last_name="python",
+        birthday=(1992, 1, 20),
+        sex=1,
+        weight=78.3,
+        height=1.75,
+        hand=3,
     )
     raw = RawArray(data, info)
 
@@ -163,6 +171,10 @@ def test_double_export_edf(tmp_path):
     )
     assert_allclose(raw.times, raw_read.times[:orig_raw_len], rtol=0, atol=1e-5)
 
+    # check info
+    for key in set(raw.info) - {"chs"}:
+        assert raw.info[key] == raw_read.info[key]
+
     # check channel types except for 'bio', which loses its type
     orig_ch_types = raw.get_channel_types()
     read_ch_types = raw_read.get_channel_types()
diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py
index 1a616f1f733..40312414df8 100644
--- a/mne/io/edf/edf.py
+++ b/mne/io/edf/edf.py
@@ -644,6 +644,49 @@ def _get_info(
     info["chs"] = chs
     info["ch_names"] = ch_names
 
+    # Subject information
+    info["subject_info"] = {}
+
+    # String subject identifier
+    if edf_info["subject_info"].get("id") is not None:
+        info["subject_info"]["his_id"] = edf_info["subject_info"]["id"]
+    # Subject sex (0=unknown, 1=male, 2=female)
+    if edf_info["subject_info"].get("sex") is not None:
+        if edf_info["subject_info"]["sex"] == "M":
+            info["subject_info"]["sex"] = 1
+        elif edf_info["subject_info"]["sex"] == "F":
+            info["subject_info"]["sex"] = 2
+        else:
+            info["subject_info"]["sex"] = 0
+    # Subject names (first, middle, last).
+    if edf_info["subject_info"].get("name") is not None:
+        sub_names = edf_info["subject_info"]["name"].split("_")
+        if len(sub_names) < 2 or len(sub_names) > 3:
+            info["subject_info"]["last_name"] = edf_info["subject_info"]["name"]
+        elif len(sub_names) == 2:
+            info["subject_info"]["first_name"] = sub_names[0]
+            info["subject_info"]["last_name"] = sub_names[1]
+        else:
+            info["subject_info"]["first_name"] = sub_names[0]
+            info["subject_info"]["middle_name"] = sub_names[1]
+            info["subject_info"]["last_name"] = sub_names[2]
+    # Birthday in (year, month, day) format.
+    if isinstance(edf_info["subject_info"].get("birthday"), datetime):
+        info["subject_info"]["birthday"] = (
+            edf_info["subject_info"]["birthday"].year,
+            edf_info["subject_info"]["birthday"].month,
+            edf_info["subject_info"]["birthday"].day,
+        )
+    # Handedness (1=right, 2=left, 3=ambidextrous).
+    if edf_info["subject_info"].get("hand") is not None:
+        info["subject_info"]["hand"] = int(edf_info["subject_info"]["hand"])
+    # Height in meters.
+    if edf_info["subject_info"].get("height") is not None:
+        info["subject_info"]["height"] = float(edf_info["subject_info"]["height"])
+    # Weight in kilograms.
+    if edf_info["subject_info"].get("weight") is not None:
+        info["subject_info"]["weight"] = float(edf_info["subject_info"]["weight"])
+
     # Filter settings
     highpass = edf_info["highpass"]
     lowpass = edf_info["lowpass"]
@@ -766,7 +809,7 @@ def _read_edf_header(fname, exclude, infer_types, include=None):
         id_info = id_info.split(" ")
         if len(id_info):
             patient["id"] = id_info[0]
-            if len(id_info) == 4:
+            if len(id_info) >= 4:
                 try:
                     birthdate = datetime.strptime(id_info[2], "%d-%b-%Y")
                 except ValueError:
@@ -774,6 +817,16 @@ def _read_edf_header(fname, exclude, infer_types, include=None):
                 patient["sex"] = id_info[1]
                 patient["birthday"] = birthdate
                 patient["name"] = id_info[3]
+                if len(id_info) > 4:
+                    for info in id_info[4:]:
+                        if "=" in info:
+                            key, value = info.split("=")
+                            if key in ["weight", "height"]:
+                                patient[key] = float(value)
+                            elif key in ["hand"]:
+                                patient[key] = int(value)
+                            else:
+                                warn(f"Invalid patient information {key}")
 
         # Recording ID
         meas_id = {}
diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py
index 24fc09d6d49..a93d0debcdd 100644
--- a/mne/io/edf/tests/test_edf.py
+++ b/mne/io/edf/tests/test_edf.py
@@ -7,6 +7,7 @@
 #
 # License: BSD-3-Clause
 
+import datetime
 from contextlib import nullcontext
 from functools import partial
 from pathlib import Path
@@ -117,19 +118,53 @@ def _first_chan_temp(*args, **kwargs):
     assert raw.get_channel_types()[0] == "temperature"
 
 
+@testing.requires_testing_data
 def test_subject_info(tmp_path):
     """Test exposure of original channel units."""
-    raw = read_raw_edf(edf_path)
-    assert raw.info["subject_info"] is None  # XXX this is arguably a bug
+    raw = read_raw_edf(edf_stim_resamp_path, preload=True)
+
+    # check subject_info from `info`
+    assert raw.info["subject_info"] is not None
+    want = {
+        "his_id": "X",
+        "sex": 1,
+        "birthday": (1967, 10, 9),
+        "last_name": "X",
+    }
+    for key, val in want.items():
+        assert raw.info["subject_info"][key] == val, key
+
+    # check "subject_info" from `_raw_extras`
     edf_info = raw._raw_extras[0]
     assert edf_info["subject_info"] is not None
-    want = {"id": "X", "sex": "X", "birthday": "X", "name": "X"}
+    want = {
+        "id": "X",
+        "sex": "M",
+        "birthday": datetime.datetime(1967, 10, 9, 0, 0),
+        "name": "X",
+    }
     for key, val in want.items():
         assert edf_info["subject_info"][key] == val, key
+
+    # add information
+    raw.info["subject_info"]["hand"] = 0
+
+    # save raw to FIF and load it back
     fname = tmp_path / "test_raw.fif"
     raw.save(fname)
     raw = read_raw_fif(fname)
-    assert raw.info["subject_info"] is None  # XXX should eventually round-trip
+
+    # check subject_info from `info`
+    assert raw.info["subject_info"] is not None
+    want = {
+        "his_id": "X",
+        "sex": 1,
+        "birthday": (1967, 10, 9),
+        "last_name": "X",
+        "hand": 0,
+    }
+    for key, val in want.items():
+        assert raw.info["subject_info"][key] == val
 
 
 def test_bdf_data():
diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py
index 55b2cece23a..211230ca9b1 100644
--- a/mne/io/edf/tests/test_gdf.py
+++ b/mne/io/edf/tests/test_gdf.py
@@ -100,8 +100,14 @@ def test_gdf2_birthday(tmp_path):
         assert np.fromfile(fid, np.uint64, 1)[0] == d
     raw = read_raw_gdf(new_fname, eog=None, misc=None, preload=True)
     assert raw._raw_extras[0]["subject_info"]["age"] == 44
-    # XXX this is a bug, it should be populated...
-    assert raw.info["subject_info"] is None
+    assert raw.info["subject_info"] is not None
+
+    birthdate = datetime(1, 1, 1, tzinfo=timezone.utc) + offset_44_yr
+    assert raw.info["subject_info"]["birthday"] == (
+        birthdate.year,
+        birthdate.month,
+        birthdate.day,
+    )
 
 
 @testing.requires_testing_data