From 41861068d7f97aa68385d272833067c3bd91fde2 Mon Sep 17 00:00:00 2001 From: AliceJoubert Date: Thu, 10 Oct 2024 16:38:47 +0200 Subject: [PATCH 1/7] First pass --- .../oasis_to_bids/oasis_to_bids_utils.py | 88 ++++++------------- .../oasis_to_bids/test_oasis_to_bids_utils.py | 24 ++--- 2 files changed, 36 insertions(+), 76 deletions(-) diff --git a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py index 89022e6a0..c30a5ef37 100644 --- a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py +++ b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py @@ -1,12 +1,9 @@ -import os from pathlib import Path from typing import Iterable -import numpy as np import pandas as pd -from clinica.iotools.bids_utils import StudyName, get_bids_sess_list -from clinica.utils.stream import cprint +from clinica.iotools.bids_utils import StudyName, bids_id_factory __all__ = ["create_sessions_dict", "write_sessions_tsv"] @@ -28,7 +25,7 @@ def create_sessions_dict( The path to the BIDS directory. clinical_specifications_folder : Path - The path to the clinical file. + The path to the clinical file folder. bids_ids : list of str The list of bids ids. @@ -39,63 +36,34 @@ def create_sessions_dict( Session dict. """ - location = f"{StudyName.OASIS.value} location" - sessions = pd.read_csv(clinical_specifications_folder / "sessions.tsv", sep="\t") - sessions_fields = sessions[StudyName.OASIS.value] - field_location = sessions[location] - sessions_fields_bids = sessions["BIDS CLINICA"] - fields_dataset = [] - fields_bids = [] + study = StudyName.OASIS.value + location = f"{study} location" + spec = pd.read_csv(clinical_specifications_folder / "sessions.tsv", sep="\t")[ + [study, location, "BIDS CLINICA"] + ].dropna() sessions_dict = {} - for i in range(0, len(sessions_fields)): - if not pd.isnull(sessions_fields[i]): - fields_bids.append(sessions_fields_bids[i]) - fields_dataset.append(sessions_fields[i]) - - for i in range(0, len(sessions_fields)): - # If the i-th field is available - if not pd.isnull(sessions_fields[i]): - # Load the file - tmp = field_location[i].split("/") - location = tmp[0] - sheet = tmp[1] if len(tmp) > 1 else 0 - file_to_read_path = clinical_data_dir / location - file_ext = os.path.splitext(location)[1] - if file_ext == ".xlsx": - file_to_read = pd.read_excel(file_to_read_path, sheet_name=sheet) - elif file_ext == ".csv": - file_to_read = pd.read_csv(file_to_read_path) - else: - raise ValueError( - f"Unknown file extension {file_ext}. Expecting either .xlsx or .csv." - ) - - for r in range(0, len(file_to_read.values)): - # Extracts the subject ids columns from the dataframe - subj_id = file_to_read.iloc[r]["ID"] - if hasattr(subj_id, "dtype"): - if subj_id.dtype == np.int64: - subj_id = str(subj_id) - # Removes all the - from - subj_id_alpha = str(subj_id[0:3] + "IS" + subj_id[3] + subj_id[5:9]) - - # Extract the corresponding BIDS id and create the output file if doesn't exist - subj_bids = [s for s in bids_ids if subj_id_alpha in s] - if subj_bids: - subj_bids = subj_bids[0] - subj_dir = bids_dir / subj_bids - session_names = get_bids_sess_list(subj_dir) - for s in session_names: - s_name = s.replace("ses-", "") - row = file_to_read.iloc[r] - if subj_bids not in sessions_dict: - sessions_dict.update({subj_bids: {}}) - if s_name not in sessions_dict[subj_bids].keys(): - sessions_dict[subj_bids].update({s_name: {"session_id": s}}) - (sessions_dict[subj_bids][s_name]).update( - {sessions_fields_bids[i]: row[sessions_fields[i]]} - ) + for loc in spec[location].unique(): + file = pd.read_excel(clinical_data_dir / loc) + file["BIDS ID"] = file.ID.apply( + lambda x: bids_id_factory(StudyName.OASIS).from_original_study_id(x) + ) + file.set_index("BIDS ID", drop=True, inplace=True) + result = pd.DataFrame() + for _, row in spec[spec[location] == loc].iterrows(): + result[row["BIDS CLINICA"]] = file[row[[study]]] + + # todo : what happens if one subject is not in the metadata ? at this point, I could add a line + # but I have to be sure that it has a corresponding image OR that the bids_ids list was properly + # managed before + + result = result.loc[bids_ids] + result["session_id"] = "ses-M000" + + for bids_id, row in result.iterrows(): + sessions_dict.update( + {bids_id: {"M000": {label: value for label, value in row.items()}}} + ) return sessions_dict diff --git a/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py b/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py index 72e9de84a..d6fd4d7f0 100644 --- a/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py +++ b/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py @@ -21,6 +21,8 @@ def clinical_data_path(tmp_path: Path) -> Path: def _build_clinical_data(clinical_data_path: Path) -> None: clinical_data_path.mkdir() + # todo :what happens if nan instead of value ? (handling of float...) + df = pd.DataFrame( { "ID": ["OAS1_0001_MR1", "OAS1_0002_MR1"], @@ -37,9 +39,9 @@ def _build_clinical_data(clinical_data_path: Path) -> None: "Delay": [float("nan"), float("nan")], } ) - df.to_csv(clinical_data_path / "oasis_cross-sectional.csv", index=False) - - # todo : future with excel + df.to_excel( + clinical_data_path / "oasis_cross-sectional-5708aa0a98d82080.xlsx", index=False + ) @pytest.fixture @@ -57,9 +59,9 @@ def _build_spec_sessions_success(sessions_path_success: Path) -> None: "ADNI": [np.nan, np.nan, np.nan, "foo"], "OASIS": ["CDR", "MMSE", "CDR", np.nan], "OASIS location": [ - "oasis_cross-sectional.csv", - "oasis_cross-sectional.csv", - "oasis_cross-sectional.csv", + "oasis_cross-sectional-5708aa0a98d82080.xlsx", + "oasis_cross-sectional-5708aa0a98d82080.xlsx", + "oasis_cross-sectional-5708aa0a98d82080.xlsx", np.nan, ], } @@ -111,12 +113,6 @@ def expected() -> dict: "MMS": 29, "diagnosis": 0, }, - "M006": { - "session_id": "ses-M006", - "cdr_global": 0, - "MMS": 29, - "diagnosis": 0, - }, }, "sub-OASIS10002": { "M000": { @@ -138,8 +134,6 @@ def test_create_sessions_dict_success( sessions_path_success: Path, expected: dict, ): - # todo : how does it handle nan inside excel/csv ? verify with excel - result = create_sessions_dict( clinical_data_path, bids_dir, @@ -157,8 +151,6 @@ def test_create_sessions_dict_error( sessions_path_error: Path, expected: dict, ): - # todo : how does it handle nan inside excel/csv ? verify with excel - with pytest.raises(FileNotFoundError): create_sessions_dict( clinical_data_path, From a5ace770e1947b9d5a6a724bba9905e22ff408e4 Mon Sep 17 00:00:00 2001 From: AliceJoubert Date: Thu, 10 Oct 2024 16:45:40 +0200 Subject: [PATCH 2/7] Apply diagnosis mapping inside function --- clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py | 5 ----- .../iotools/converters/oasis_to_bids/oasis_to_bids_utils.py | 3 +++ .../converters/oasis_to_bids/test_oasis_to_bids_utils.py | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py index 9f0a1808d..1500b433e 100644 --- a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py +++ b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py @@ -117,11 +117,6 @@ def _create_sessions_tsv( bids_ids=bids_ids, ) - # todo : when tested add to create_sessions_dict bc specific to oasis1 - for bids_id in bids_ids: - sessions_dict[bids_id]["M000"]["diagnosis"] = ( - "AD" if sessions_dict[bids_id]["M000"]["diagnosis"] > 0 else "CN" - ) write_sessions_tsv(bids_dir, sessions_dict) return sessions_dict diff --git a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py index c30a5ef37..a7f78950e 100644 --- a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py +++ b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py @@ -58,6 +58,9 @@ def create_sessions_dict( # managed before result = result.loc[bids_ids] + result["diagnosis"] = result["diagnosis"].apply( + lambda x: "AD" if x > 0 else "CN" + ) result["session_id"] = "ses-M000" for bids_id, row in result.iterrows(): diff --git a/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py b/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py index d6fd4d7f0..4e716de95 100644 --- a/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py +++ b/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py @@ -111,7 +111,7 @@ def expected() -> dict: "session_id": "ses-M000", "cdr_global": 0, "MMS": 29, - "diagnosis": 0, + "diagnosis": "CN", }, }, "sub-OASIS10002": { @@ -119,7 +119,7 @@ def expected() -> dict: "session_id": "ses-M000", "cdr_global": 0, "MMS": 29, - "diagnosis": 0, + "diagnosis": "CN", } }, } From 07cab7645015ca0bc29c42d1c56069614605e7a2 Mon Sep 17 00:00:00 2001 From: AliceJoubert Date: Thu, 10 Oct 2024 16:54:51 +0200 Subject: [PATCH 3/7] Remove unnecessary argument --- clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py index a7f78950e..5084fb29b 100644 --- a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py +++ b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py @@ -10,7 +10,6 @@ def create_sessions_dict( clinical_data_dir: Path, - bids_dir: Path, clinical_specifications_folder: Path, bids_ids: Iterable[str], ) -> dict: From e8f9ac296bbfdeda9bd9ad1a8b22f499f2a68bb1 Mon Sep 17 00:00:00 2001 From: AliceJoubert Date: Thu, 10 Oct 2024 17:28:49 +0200 Subject: [PATCH 4/7] Fix --- clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py | 1 - 1 file changed, 1 deletion(-) diff --git a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py index 1500b433e..5832966ff 100644 --- a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py +++ b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py @@ -112,7 +112,6 @@ def _create_sessions_tsv( sessions_dict = create_sessions_dict( clinical_data_dir=clinical_data_dir, - bids_dir=bids_dir, clinical_specifications_folder=Path(__file__).parents[1] / "specifications", bids_ids=bids_ids, ) From 3e71919bb57d9c8834015e2a43fc2ebd8f5ba0bf Mon Sep 17 00:00:00 2001 From: AliceJoubert Date: Thu, 10 Oct 2024 17:31:58 +0200 Subject: [PATCH 5/7] Fix 2 --- .../converters/oasis_to_bids/test_oasis_to_bids_utils.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py b/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py index 4e716de95..3020c6838 100644 --- a/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py +++ b/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py @@ -130,13 +130,11 @@ def expected() -> dict: def test_create_sessions_dict_success( tmp_path, clinical_data_path: Path, - bids_dir: Path, sessions_path_success: Path, expected: dict, ): result = create_sessions_dict( clinical_data_path, - bids_dir, sessions_path_success, ["sub-OASIS10001", "sub-OASIS10002"], ) @@ -147,14 +145,12 @@ def test_create_sessions_dict_success( def test_create_sessions_dict_error( tmp_path, clinical_data_path: Path, - bids_dir: Path, sessions_path_error: Path, expected: dict, ): with pytest.raises(FileNotFoundError): create_sessions_dict( clinical_data_path, - bids_dir, sessions_path_error, ["sub-OASIS10001", "sub-OASIS10002"], ) @@ -169,7 +165,6 @@ def test_write_sessions_tsv( ): sessions = create_sessions_dict( clinical_data_path, - bids_dir, sessions_path_success, ["sub-OASIS10001", "sub-OASIS10002"], ) From b8a3d4e1e8352cd39b867f1c7ea7a0976660bdcf Mon Sep 17 00:00:00 2001 From: AliceJoubert Date: Thu, 10 Oct 2024 17:41:21 +0200 Subject: [PATCH 6/7] Change name --- .../converters/oasis_to_bids/oasis_to_bids_utils.py | 12 ++++++------ .../oasis_to_bids/test_oasis_to_bids_utils.py | 2 -- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py index 5084fb29b..dda8769b1 100644 --- a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py +++ b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py @@ -48,21 +48,21 @@ def create_sessions_dict( lambda x: bids_id_factory(StudyName.OASIS).from_original_study_id(x) ) file.set_index("BIDS ID", drop=True, inplace=True) - result = pd.DataFrame() + sessions_df = pd.DataFrame() for _, row in spec[spec[location] == loc].iterrows(): - result[row["BIDS CLINICA"]] = file[row[[study]]] + sessions_df[row["BIDS CLINICA"]] = file[row[[study]]] # todo : what happens if one subject is not in the metadata ? at this point, I could add a line # but I have to be sure that it has a corresponding image OR that the bids_ids list was properly # managed before - result = result.loc[bids_ids] - result["diagnosis"] = result["diagnosis"].apply( + sessions_df = sessions_df.loc[bids_ids] + sessions_df["diagnosis"] = sessions_df["diagnosis"].apply( lambda x: "AD" if x > 0 else "CN" ) - result["session_id"] = "ses-M000" + sessions_df["session_id"] = "ses-M000" - for bids_id, row in result.iterrows(): + for bids_id, row in sessions_df.iterrows(): sessions_dict.update( {bids_id: {"M000": {label: value for label, value in row.items()}}} ) diff --git a/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py b/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py index 3020c6838..671977eab 100644 --- a/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py +++ b/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py @@ -21,8 +21,6 @@ def clinical_data_path(tmp_path: Path) -> Path: def _build_clinical_data(clinical_data_path: Path) -> None: clinical_data_path.mkdir() - # todo :what happens if nan instead of value ? (handling of float...) - df = pd.DataFrame( { "ID": ["OAS1_0001_MR1", "OAS1_0002_MR1"], From cdae75b6366c40515a774c381098bd323f276509 Mon Sep 17 00:00:00 2001 From: AliceJoubert Date: Fri, 11 Oct 2024 10:27:52 +0200 Subject: [PATCH 7/7] last changes --- .../converters/oasis_to_bids/oasis_to_bids_utils.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py index dda8769b1..451d81261 100644 --- a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py +++ b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py @@ -20,9 +20,6 @@ def create_sessions_dict( clinical_data_dir : Path The path to the input folder. - bids_dir : Path - The path to the BIDS directory. - clinical_specifications_folder : Path The path to the clinical file folder. @@ -52,10 +49,6 @@ def create_sessions_dict( for _, row in spec[spec[location] == loc].iterrows(): sessions_df[row["BIDS CLINICA"]] = file[row[[study]]] - # todo : what happens if one subject is not in the metadata ? at this point, I could add a line - # but I have to be sure that it has a corresponding image OR that the bids_ids list was properly - # managed before - sessions_df = sessions_df.loc[bids_ids] sessions_df["diagnosis"] = sessions_df["diagnosis"].apply( lambda x: "AD" if x > 0 else "CN"