diff --git a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py index 206bbbc3f..5f99ded39 100644 --- a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py +++ b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py @@ -102,23 +102,24 @@ def _create_participants_tsv( encoding="utf-8", ) + @staticmethod def _create_sessions_tsv( - self, clinical_data_dir: Path, bids_dir: Path, bids_ids: list[str], ) -> None: - from .oasis_to_bids_utils import create_sessions_dict, write_sessions_tsv + from .oasis_to_bids_utils import create_sessions_df, write_sessions_tsv - sessions_dict = create_sessions_dict( + sessions_df = create_sessions_df( clinical_data_dir=clinical_data_dir, clinical_specifications_folder=Path(__file__).parents[1] / "specifications", bids_ids=bids_ids, ) - write_sessions_tsv(bids_dir, sessions_dict) + write_sessions_tsv(bids_dir, sessions_df) - def _create_scans_tsv(self, bids_dir: Path) -> None: + @staticmethod + def _create_scans_tsv(bids_dir: Path) -> None: from .oasis_to_bids_utils import write_scans_tsv write_scans_tsv(bids_dir) diff --git a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py index 7621f8146..290a5fc7b 100644 --- a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py +++ b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids_utils.py @@ -1,19 +1,31 @@ from pathlib import Path -from typing import Iterable +from typing import Iterable, Union import pandas as pd +from jinja2.utils import missing from clinica.iotools.bids_utils import StudyName, bids_id_factory +from clinica.utils.stream import cprint -__all__ = ["create_sessions_dict", "write_sessions_tsv", "write_scans_tsv"] +__all__ = ["create_sessions_df", "write_sessions_tsv", "write_scans_tsv"] -def create_sessions_dict( +def _convert_cdr_to_diagnosis(cdr: Union[int, str]) -> str: + # todo : test + if cdr == int(0): + return "CN" + elif cdr == "n/a": + return cdr + else: + return "AD" + + +def create_sessions_df( clinical_data_dir: Path, clinical_specifications_folder: Path, bids_ids: Iterable[str], -) -> dict: - """Extract the information regarding the sessions and store them in a dictionary (session M000 only). +) -> pd.DataFrame: + """Extract the information regarding sessions M000 and store them in a dataframe. Parameters ---------- @@ -24,12 +36,12 @@ def create_sessions_dict( The path to the clinical file folder. bids_ids : list of str - The list of bids ids. + The list of bids ids which are in the BIDS directory. Returns ------- - dict : - Session dict. + pd.Dataframe : + Session df. """ study = StudyName.OASIS.value @@ -37,72 +49,65 @@ def create_sessions_dict( spec = pd.read_csv(clinical_specifications_folder / "sessions.tsv", sep="\t")[ [study, location, "BIDS CLINICA"] ].dropna() - sessions_dict = {} - for loc in spec[location].unique(): - file = pd.read_excel(clinical_data_dir / loc) - file["BIDS ID"] = file.ID.apply( - lambda x: bids_id_factory(StudyName.OASIS).from_original_study_id(x) + sessions_df = pd.DataFrame() + if len(spec[location].unique()) == 1: + loc = spec[location].unique()[0] + else: + raise ValueError( + f"OASIS1 metadata is supposed to be contained in only 1 file, {len(spec[location].unique())} were detected : {spec[location].unique()}" ) - file.set_index("BIDS ID", drop=True, inplace=True) - sessions_df = pd.DataFrame() - for _, row in spec[spec[location] == loc].iterrows(): - sessions_df[row["BIDS CLINICA"]] = file[row[[study]]] - - sessions_df = sessions_df.loc[bids_ids] - sessions_df["diagnosis"] = sessions_df["diagnosis"].apply( - lambda x: "AD" if x > 0 else "CN" - ) - sessions_df["session_id"] = "ses-M000" - for bids_id, row in sessions_df.iterrows(): - sessions_dict.update( - {bids_id: {"M000": {label: value for label, value in row.items()}}} - ) + file = pd.read_excel(clinical_data_dir / loc) + file["BIDS ID"] = file.ID.apply( + lambda x: bids_id_factory(StudyName.OASIS).from_original_study_id(x) + ) + file.set_index("BIDS ID", drop=True, inplace=True) - return sessions_dict + for _, row in spec[spec[location] == loc].iterrows(): + sessions_df[row["BIDS CLINICA"]] = file[row[[study]]] + missing_subjects = set(bids_ids) - set(sessions_df.index) + for ms in missing_subjects: + sessions_df.loc[ms] = ["n/a" for _ in sessions_df.columns] -def write_sessions_tsv(bids_dir: Path, sessions_dict: dict) -> None: - """Create _sessions.tsv files. + sessions_df = sessions_df.loc[bids_ids] - Basically writes the content of the function - `clinica.iotools.bids_utils.create_sessions_dict` in several TSV files - following the BIDS specification. + sessions_df["diagnosis"] = sessions_df["diagnosis"].apply( + lambda x: _convert_cdr_to_diagnosis(x) + ) + sessions_df["session_id"] = "ses-M000" + + return sessions_df + + +def write_sessions_tsv(bids_dir: Path, sessions_df: pd.DataFrame) -> None: + """Writes the content of the function `clinica.iotools.bids_utils.create_sessions_df` + in several TSV files following the BIDS specification. Parameters ---------- bids_dir : Path The path to the BIDS directory. - sessions_dict : dict - Dictionary containing sessions metadata. + sessions_df : DataFrame + Contains sessions metadata. .. note:: This is the output of the function - `clinica.iotools.bids_utils.create_sessions_dict`. + `clinica.iotools.bids_utils.create_sessions_df`. See also -------- - create_sessions_dict + create_sessions_df """ - for subject_path in bids_dir.glob("sub-*"): - if subject_path.name in sessions_dict: - session_df = pd.DataFrame.from_dict( - sessions_dict[subject_path.name], orient="index" - ) - cols = session_df.columns.tolist() - cols = cols[-1:] + cols[:-1] - session_df = session_df[cols] - else: - print(f"No session data available for {subject_path}") - session_df = pd.DataFrame(columns=["session_id"]) - session_df["session_id"] = pd.Series("M000") - session_df = session_df.set_index("session_id").fillna("n/a") - session_df.to_csv( - subject_path / f"{subject_path.name}_sessions.tsv", + for subject, data in sessions_df.iterrows(): + session_path = bids_dir / subject + data.to_frame().T.to_csv( + session_path / f"{subject}_sessions.tsv", sep="\t", encoding="utf8", + index=False, ) diff --git a/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py b/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py index fe4ebff6d..2a5637f2f 100644 --- a/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py +++ b/test/unittests/iotools/converters/oasis_to_bids/test_oasis_to_bids_utils.py @@ -7,7 +7,7 @@ from pandas.testing import assert_frame_equal from clinica.iotools.converters.oasis_to_bids.oasis_to_bids_utils import ( - create_sessions_dict, + create_sessions_df, write_scans_tsv, write_sessions_tsv, ) @@ -104,52 +104,76 @@ def _build_bids_dir(bids_dir: Path) -> None: @pytest.fixture -def expected() -> dict: +def expected() -> pd.DataFrame: expected = { "sub-OASIS10001": { - "M000": { - "session_id": "ses-M000", - "cdr_global": 0, - "MMS": 29, - "diagnosis": "CN", - }, + "session_id": "ses-M000", + "cdr_global": 0, + "MMS": 29, + "diagnosis": "CN", }, "sub-OASIS10002": { - "M000": { - "session_id": "ses-M000", - "cdr_global": 0, - "MMS": 29, - "diagnosis": "CN", - } + "session_id": "ses-M000", + "cdr_global": 0, + "MMS": 29, + "diagnosis": "CN", }, } + expected = pd.DataFrame.from_dict(expected).T + expected.index.names = ["BIDS ID"] + return expected -def test_create_sessions_dict_success( +def test_create_sessions_df_success( tmp_path, clinical_data_path: Path, sessions_path_success: Path, - expected: dict, + expected: pd.DataFrame, ): - result = create_sessions_dict( + result = create_sessions_df( clinical_data_path, sessions_path_success, ["sub-OASIS10001", "sub-OASIS10002"], ) + assert_frame_equal(expected, result, check_like=True, check_dtype=False) + + +def test_create_sessions_df_missing_clinical_data( + tmp_path, + clinical_data_path: Path, + sessions_path_success: Path, + expected: pd.DataFrame, +): + result = create_sessions_df( + clinical_data_path, + sessions_path_success, + ["sub-OASIS10001", "sub-OASIS10002", "sub-OASIS10004"], + ) + missing_line = pd.DataFrame.from_dict( + { + "sub-OASIS10004": { + "session_id": "ses-M000", + "diagnosis": "n/a", + "cdr_global": "n/a", + "MMS": "n/a", + } + } + ).T + missing_line.index.names = ["BIDS ID"] - assert result == expected + expected = pd.concat([expected, missing_line]) + assert_frame_equal(expected, result, check_like=True, check_dtype=False) -def test_create_sessions_dict_error( +def test_create_sessions_df_file_not_found( tmp_path, clinical_data_path: Path, sessions_path_error: Path, - expected: dict, ): with pytest.raises(FileNotFoundError): - create_sessions_dict( + create_sessions_df( clinical_data_path, sessions_path_error, ["sub-OASIS10001", "sub-OASIS10002"], @@ -161,22 +185,21 @@ def test_write_sessions_tsv( clinical_data_path: Path, bids_dir: Path, sessions_path_success: Path, - expected: dict, + expected: pd.DataFrame, ): - sessions = create_sessions_dict( + sessions = create_sessions_df( clinical_data_path, sessions_path_success, ["sub-OASIS10001", "sub-OASIS10002"], ) - write_sessions_tsv(tmp_path / "BIDS", sessions) - sessions_files = list((tmp_path / "BIDS").rglob("*.tsv")) + write_sessions_tsv(bids_dir, sessions) + sessions_files = list(bids_dir.rglob("*.tsv")) + assert len(sessions_files) == 2 for file in sessions_files: assert_frame_equal( - pd.read_csv(file, sep="\t").set_index("session_id", drop=False), - pd.DataFrame(expected[file.parent.name]).T.set_index( - "session_id", drop=False - ), + pd.read_csv(file, sep="\t").reset_index(drop=True), + expected.loc[[file.parent.name]].reset_index(drop=True), check_like=True, check_dtype=False, )