From 150d8082b49e883d577dcc3d76fe5a58228dd62b Mon Sep 17 00:00:00 2001 From: AliceJoubert <158147135+AliceJoubert@users.noreply.github.com> Date: Thu, 24 Oct 2024 15:37:59 +0200 Subject: [PATCH] [FIX] ADNI-to-BIDS : incorporate the fix for KeyError "APGEN" (#1342) * Proposition * Changes upon suggestions * Modify unit test * Small fix --- clinica/iotools/bids_utils.py | 15 +++++++++ test/unittests/iotools/test_bids_utils.py | 39 ++++++++++++++++++----- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/clinica/iotools/bids_utils.py b/clinica/iotools/bids_utils.py index 324ef4571..e4eb8cedf 100644 --- a/clinica/iotools/bids_utils.py +++ b/clinica/iotools/bids_utils.py @@ -401,6 +401,21 @@ def create_participants_df( file_to_read = load_clinical_csv( clinical_data_dir, location.split(".")[0] ) + # Condition to handle ADNI modification of file APOERES.csv + # See issue https://github.com/aramis-lab/clinica/issues/1294 + if study_name == StudyName.ADNI and location == "APOERES.csv": + if ( + participant_fields_db[i] not in file_to_read.columns + and "GENOTYPE" in file_to_read.columns + ): + # Split the 'GENOTYPE' column into 'APGEN1' and 'APGEN2' + genotype = file_to_read["GENOTYPE"].str.split( + "/", expand=True + ) + file_to_read = file_to_read.assign( + APGEN1=genotype[0], APGEN2=genotype[1] + ) + prev_location = location prev_sheet = sheet diff --git a/test/unittests/iotools/test_bids_utils.py b/test/unittests/iotools/test_bids_utils.py index a26dbe03b..c014dbc7e 100644 --- a/test/unittests/iotools/test_bids_utils.py +++ b/test/unittests/iotools/test_bids_utils.py @@ -1,6 +1,6 @@ from pathlib import Path from string import Template -from typing import Union +from typing import Optional, Union import numpy as np import pandas as pd @@ -112,7 +112,7 @@ def test_bids_to_study(study, bids_id, source_id): assert bids_id_factory(study)(bids_id).to_original_study_id() == source_id -def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path: +def create_participants_spec(tmp_path: Path) -> Path: spec_df = pd.DataFrame( { "BIDS CLINICA": [ @@ -142,6 +142,12 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path: ) spec_df.to_csv(tmp_path / "participant.tsv", sep="\t", index=False) + return tmp_path + + +def create_clinical_data( + tmp_path: Path, study_name: StudyName, adni_genotype: Optional[bool] = False +) -> Path: clinical_path = tmp_path / "clinical_data" clinical_path.mkdir() @@ -160,14 +166,25 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path: "AGE": ["40", "50", "60", "70", "80", None], } ) + df_apoeres = pd.DataFrame( { "APGEN1": ["3", "3", "3", "3", None, "3"], "GEN2": ["2", "2", "2", "2", None, "2"], } ) + + if adni_genotype: + df_apoeres = pd.DataFrame( + { + "GENOTYPE": ["3/2", "3/2", "3/2", "3/2", None, "3/2"], + "GEN2": ["2", "2", "2", "2", None, "2"], + } + ) + df_adnimerge.to_csv(clinical_path / "ADNIMERGE.csv", index=False) df_apoeres.to_csv(clinical_path / "APOERES.csv", index=False) + if study_name == StudyName.OASIS: df_oasis = pd.DataFrame( { @@ -189,7 +206,7 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path: @pytest.mark.parametrize( - "study_name, bids_ids, expected", + "study_name, bids_ids, expected, adni_genotype", [ ( StudyName.OASIS, @@ -201,6 +218,7 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path: "sex": ["F"], } ), + False, ), ( StudyName.ADNI, @@ -210,9 +228,10 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path: "participant_id": ["sub-ADNI001S0001"], "alternative_id_1": ["001_S_0001"], "sex": ["Male"], - "apoegen1": [3.0], + "apoegen1": ["3"], } ), + True, ), ( StudyName.OASIS, @@ -224,6 +243,7 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path: "sex": ["M", "M"], } ), + False, ), ( StudyName.ADNI, @@ -236,6 +256,7 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path: "apoegen1": ["n/a"], } ), + False, ), ( StudyName.ADNI, @@ -248,18 +269,20 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path: "apoegen1": [3.0], } ), + False, ), ], ) -def test_create_participants_df(tmp_path, bids_ids, expected, study_name): +def test_create_participants_df( + tmp_path, bids_ids, expected, study_name, adni_genotype +): from clinica.iotools.bids_utils import create_participants_df - clinical_path = create_clinical_data(tmp_path, study_name) assert ( create_participants_df( study_name, - clinical_specifications_folder=tmp_path, - clinical_data_dir=clinical_path, + clinical_specifications_folder=create_participants_spec(tmp_path), + clinical_data_dir=create_clinical_data(tmp_path, study_name, adni_genotype), bids_ids=bids_ids, ) .reset_index(drop=True)