From f95a9c52b7c3068e756cc35622012606f4736539 Mon Sep 17 00:00:00 2001 From: paulroujansky Date: Tue, 5 Sep 2023 15:51:39 +0200 Subject: [PATCH] Fix bug with subject_info when loading from and exporting to EDF file (#11952) Co-authored-by: Paul ROUJANSKY --- doc/changes/1.5.inc | 1 + mne/export/_edf.py | 29 ++++++++++------- mne/export/tests/test_export.py | 14 ++++++++- mne/io/edf/edf.py | 55 ++++++++++++++++++++++++++++++++- mne/io/edf/tests/test_edf.py | 43 +++++++++++++++++++++++--- mne/io/edf/tests/test_gdf.py | 10 ++++-- 6 files changed, 133 insertions(+), 19 deletions(-) diff --git a/doc/changes/1.5.inc b/doc/changes/1.5.inc index f76f09d8824..a69f0a53f2e 100644 --- a/doc/changes/1.5.inc +++ b/doc/changes/1.5.inc @@ -26,6 +26,7 @@ Bugs - Fix bug with multi-plot 3D rendering where only one plot was updated (:gh:`11896` by `Eric Larson`_) - Fix bug with :func:`mne.chpi.compute_head_pos` for CTF data where digitization points were modified in-place, producing an incorrect result during a save-load round-trip (:gh:`11934` by `Eric Larson`_) - Fix bug with notebooks when using PyVista 0.42 by implementing ``trame`` backend support (:gh:`11956` by `Eric Larson`_) +- Fix bug with ``subject_info`` when loading data from and exporting to EDF file (:gh:`11952` by `Paul Roujansky`_) .. _changes_1_5_0: diff --git a/mne/export/_edf.py b/mne/export/_edf.py index 3666aae30fe..da2acb72ff8 100644 --- a/mne/export/_edf.py +++ b/mne/export/_edf.py @@ -189,21 +189,27 @@ def _export_raw(fname, raw, physical_range, add_ch_type): # set patient info subj_info = raw.info.get("subject_info") if subj_info is not None: - birthday = subj_info.get("birthday") - # get the full name of subject if available - first_name = subj_info.get("first_name") - last_name = subj_info.get("last_name") - first_name = first_name or "" - last_name = last_name or "" - joiner = "" - if len(first_name) and len(last_name): - joiner = " " - name = joiner.join([first_name, last_name]) + first_name = subj_info.get("first_name", "") + middle_name = subj_info.get("middle_name", "") + last_name = subj_info.get("last_name", "") + name = " ".join(filter(None, [first_name, middle_name, last_name])) + birthday = subj_info.get("birthday") hand = subj_info.get("hand") + weight = subj_info.get("weight") + height = subj_info.get("height") sex = subj_info.get("sex") + additional_patient_info = [] + for key, value in [("height", height), ("weight", weight), ("hand", hand)]: + if value: + additional_patient_info.append(f"{key}={value}") + if len(additional_patient_info) == 0: + additional_patient_info = None + else: + additional_patient_info = " ".join(additional_patient_info) + if birthday is not None: if hdl.setPatientBirthDate(birthday[0], birthday[1], birthday[2]) != 0: raise RuntimeError( @@ -211,9 +217,10 @@ def _export_raw(fname, raw, physical_range, add_ch_type): f"returned an error" ) for key, val in [ + ("PatientCode", subj_info.get("his_id", "")), ("PatientName", name), ("PatientGender", sex), - ("AdditionalPatientInfo", f"hand={hand}"), + ("AdditionalPatientInfo", additional_patient_info), ]: # EDFwriter compares integer encodings of sex and will # raise a TypeError if value is None as returned by diff --git a/mne/export/tests/test_export.py b/mne/export/tests/test_export.py index 96fafd204ef..8959ad5f84e 100644 --- a/mne/export/tests/test_export.py +++ b/mne/export/tests/test_export.py @@ -134,11 +134,19 @@ def test_double_export_edf(tmp_path): "bio", ] info = create_info(len(ch_types), sfreq=1000, ch_types=ch_types) + info = info.set_meas_date("2023-09-04 14:53:09.000") data = rng.random(size=(len(ch_types), 1000)) * 1e-5 # include subject info and measurement date info["subject_info"] = dict( - first_name="mne", last_name="python", birthday=(1992, 1, 20), sex=1, hand=3 + his_id="12345", + first_name="mne", + last_name="python", + birthday=(1992, 1, 20), + sex=1, + weight=78.3, + height=1.75, + hand=3, ) raw = RawArray(data, info) @@ -163,6 +171,10 @@ def test_double_export_edf(tmp_path): ) assert_allclose(raw.times, raw_read.times[:orig_raw_len], rtol=0, atol=1e-5) + # check info + for key in set(raw.info) - {"chs"}: + assert raw.info[key] == raw_read.info[key] + # check channel types except for 'bio', which loses its type orig_ch_types = raw.get_channel_types() read_ch_types = raw_read.get_channel_types() diff --git a/mne/io/edf/edf.py b/mne/io/edf/edf.py index 1a616f1f733..40312414df8 100644 --- a/mne/io/edf/edf.py +++ b/mne/io/edf/edf.py @@ -644,6 +644,49 @@ def _get_info( info["chs"] = chs info["ch_names"] = ch_names + # Subject information + info["subject_info"] = {} + + # String subject identifier + if edf_info["subject_info"].get("id") is not None: + info["subject_info"]["his_id"] = edf_info["subject_info"]["id"] + # Subject sex (0=unknown, 1=male, 2=female) + if edf_info["subject_info"].get("sex") is not None: + if edf_info["subject_info"]["sex"] == "M": + info["subject_info"]["sex"] = 1 + elif edf_info["subject_info"]["sex"] == "F": + info["subject_info"]["sex"] = 2 + else: + info["subject_info"]["sex"] = 0 + # Subject names (first, middle, last). + if edf_info["subject_info"].get("name") is not None: + sub_names = edf_info["subject_info"]["name"].split("_") + if len(sub_names) < 2 or len(sub_names) > 3: + info["subject_info"]["last_name"] = edf_info["subject_info"]["name"] + elif len(sub_names) == 2: + info["subject_info"]["first_name"] = sub_names[0] + info["subject_info"]["last_name"] = sub_names[1] + else: + info["subject_info"]["first_name"] = sub_names[0] + info["subject_info"]["middle_name"] = sub_names[1] + info["subject_info"]["last_name"] = sub_names[2] + # Birthday in (year, month, day) format. + if isinstance(edf_info["subject_info"].get("birthday"), datetime): + info["subject_info"]["birthday"] = ( + edf_info["subject_info"]["birthday"].year, + edf_info["subject_info"]["birthday"].month, + edf_info["subject_info"]["birthday"].day, + ) + # Handedness (1=right, 2=left, 3=ambidextrous). + if edf_info["subject_info"].get("hand") is not None: + info["subject_info"]["hand"] = int(edf_info["subject_info"]["hand"]) + # Height in meters. + if edf_info["subject_info"].get("height") is not None: + info["subject_info"]["height"] = float(edf_info["subject_info"]["height"]) + # Weight in kilograms. + if edf_info["subject_info"].get("weight") is not None: + info["subject_info"]["weight"] = float(edf_info["subject_info"]["weight"]) + # Filter settings highpass = edf_info["highpass"] lowpass = edf_info["lowpass"] @@ -766,7 +809,7 @@ def _read_edf_header(fname, exclude, infer_types, include=None): id_info = id_info.split(" ") if len(id_info): patient["id"] = id_info[0] - if len(id_info) == 4: + if len(id_info) >= 4: try: birthdate = datetime.strptime(id_info[2], "%d-%b-%Y") except ValueError: @@ -774,6 +817,16 @@ def _read_edf_header(fname, exclude, infer_types, include=None): patient["sex"] = id_info[1] patient["birthday"] = birthdate patient["name"] = id_info[3] + if len(id_info) > 4: + for info in id_info[4:]: + if "=" in info: + key, value = info.split("=") + if key in ["weight", "height"]: + patient[key] = float(value) + elif key in ["hand"]: + patient[key] = int(value) + else: + warn(f"Invalid patient information {key}") # Recording ID meas_id = {} diff --git a/mne/io/edf/tests/test_edf.py b/mne/io/edf/tests/test_edf.py index 24fc09d6d49..a93d0debcdd 100644 --- a/mne/io/edf/tests/test_edf.py +++ b/mne/io/edf/tests/test_edf.py @@ -7,6 +7,7 @@ # # License: BSD-3-Clause +import datetime from contextlib import nullcontext from functools import partial from pathlib import Path @@ -117,19 +118,53 @@ def _first_chan_temp(*args, **kwargs): assert raw.get_channel_types()[0] == "temperature" +@testing.requires_testing_data def test_subject_info(tmp_path): """Test exposure of original channel units.""" - raw = read_raw_edf(edf_path) - assert raw.info["subject_info"] is None # XXX this is arguably a bug + raw = read_raw_edf(edf_stim_resamp_path, preload=True) + + # check subject_info from `info` + assert raw.info["subject_info"] is not None + want = { + "his_id": "X", + "sex": 1, + "birthday": (1967, 10, 9), + "last_name": "X", + } + for key, val in want.items(): + assert raw.info["subject_info"][key] == val, key + + # check "subject_info" from `_raw_extras` edf_info = raw._raw_extras[0] assert edf_info["subject_info"] is not None - want = {"id": "X", "sex": "X", "birthday": "X", "name": "X"} + want = { + "id": "X", + "sex": "M", + "birthday": datetime.datetime(1967, 10, 9, 0, 0), + "name": "X", + } for key, val in want.items(): assert edf_info["subject_info"][key] == val, key + + # add information + raw.info["subject_info"]["hand"] = 0 + + # save raw to FIF and load it back fname = tmp_path / "test_raw.fif" raw.save(fname) raw = read_raw_fif(fname) - assert raw.info["subject_info"] is None # XXX should eventually round-trip + + # check subject_info from `info` + assert raw.info["subject_info"] is not None + want = { + "his_id": "X", + "sex": 1, + "birthday": (1967, 10, 9), + "last_name": "X", + "hand": 0, + } + for key, val in want.items(): + assert raw.info["subject_info"][key] == val def test_bdf_data(): diff --git a/mne/io/edf/tests/test_gdf.py b/mne/io/edf/tests/test_gdf.py index 55b2cece23a..211230ca9b1 100644 --- a/mne/io/edf/tests/test_gdf.py +++ b/mne/io/edf/tests/test_gdf.py @@ -100,8 +100,14 @@ def test_gdf2_birthday(tmp_path): assert np.fromfile(fid, np.uint64, 1)[0] == d raw = read_raw_gdf(new_fname, eog=None, misc=None, preload=True) assert raw._raw_extras[0]["subject_info"]["age"] == 44 - # XXX this is a bug, it should be populated... - assert raw.info["subject_info"] is None + assert raw.info["subject_info"] is not None + + birthdate = datetime(1, 1, 1, tzinfo=timezone.utc) + offset_44_yr + assert raw.info["subject_info"]["birthday"] == ( + birthdate.year, + birthdate.month, + birthdate.day, + ) @testing.requires_testing_data