From 888c5f948938935c475623c2ba3c20201e10889e Mon Sep 17 00:00:00 2001 From: Adam SL Graefe <109136019+aslgraefe@users.noreply.github.com> Date: Mon, 6 Jan 2025 16:45:59 +0300 Subject: [PATCH] 105 reusableclassses linkml (#107) * updated * update * update * dwa * fix * upddates * updated cli * added label fetching * updates * update * added bioportal .env file and updated docs * update * updated fetching displays from oaklib and bioportal * implemented display fetching * updated api config and added add_prefix function * added prefix code fetch * bug fix * updated cli download records command and schema * updated downloard records functin * typo * updated licsense * fixed schema validation * fixed schema * deleted_api_super token test * fix * update * fix * fix * fix * edit workflow * fix * fix --- .github/workflows/cli_tests.yml | 4 + .github/workflows/python_ci.yml | 13 +- .gitignore | 5 + README.md | 66 +++- .../2_rarelink_framework/2_2_rarelink_cdm.rst | 338 ++++++++++++++++++ .../2_2_rarelink_cdm_instruments.rst | 230 ------------ .../3_1_setup_rarelink_framework.rst | 54 ++- docs/index.rst | 4 +- pyproject.toml | 6 +- pytest.ini | 4 + src/.DS_Store | Bin 6148 -> 6148 bytes src/rarelink/.DS_Store | Bin 6148 -> 6148 bytes src/rarelink/__main__.py | 1 - src/rarelink/cli/redcap_setup/api_config.py | 37 +- .../cli/redcap_tools/download_records.py | 64 +++- src/rarelink/utils/.DS_Store | Bin 6148 -> 6148 bytes .../utils/ontology_requests/__init__.py | 6 - .../ontology_requests/geno_api_request.py | 6 - .../ontology_requests/hpo_api_request.py | 6 - .../ontology_requests/iso3166_api_request.py | 6 - .../ontology_requests/mondo_api_request.py | 6 - .../ontology_requests/ncit_api_request.py | 6 - .../ontology_requests/orpha_api_request.py | 6 - .../utils/ontology_requests/so_api_request.py | 6 - .../utils/ontology_requests/uo_api_request.py | 6 - src/rarelink/utils/preprocessing/__init__.py | 8 +- .../utils/preprocessing/add_prefixes.py | 40 +++ .../utils/preprocessing/fetch_displays.py | 130 +++++++ ..._redcap_codes.py => parse_redcap_codes.py} | 0 .../preprocess_redcap_for_phenopackets.py | 5 + .../v2_0_0_dev0/processing/__init__.py | 3 +- .../processing/map_to_linkml_schema.py | 265 ++++++++++++++ .../processing/preprocess_redcap_json.py | 232 +----------- .../rarelink_6_1_genetic_findings.yaml | 6 - .../rarelink_6_3_measurements.yaml | 87 ++++- .../rarelink_repeated_elements.yaml | 4 +- tests/cli/redcap_setup/test_api.py | 19 - tests/conftest.py | 21 +- .../preprocessing/test_fetch_displays.py | 95 +++++ 39 files changed, 1179 insertions(+), 616 deletions(-) create mode 100644 docs/2_rarelink_framework/2_2_rarelink_cdm.rst delete mode 100644 docs/2_rarelink_framework/2_2_rarelink_cdm_instruments.rst delete mode 100644 src/rarelink/utils/ontology_requests/__init__.py delete mode 100644 src/rarelink/utils/ontology_requests/geno_api_request.py delete mode 100644 src/rarelink/utils/ontology_requests/hpo_api_request.py delete mode 100644 src/rarelink/utils/ontology_requests/iso3166_api_request.py delete mode 100644 src/rarelink/utils/ontology_requests/mondo_api_request.py delete mode 100644 src/rarelink/utils/ontology_requests/ncit_api_request.py delete mode 100644 src/rarelink/utils/ontology_requests/orpha_api_request.py delete mode 100644 src/rarelink/utils/ontology_requests/so_api_request.py delete mode 100644 src/rarelink/utils/ontology_requests/uo_api_request.py create mode 100644 src/rarelink/utils/preprocessing/add_prefixes.py create mode 100644 src/rarelink/utils/preprocessing/fetch_displays.py rename src/rarelink/utils/preprocessing/{preprocess_redcap_codes.py => parse_redcap_codes.py} (100%) create mode 100644 src/rarelink_cdm/v2_0_0_dev0/processing/map_to_linkml_schema.py create mode 100644 tests/utils/preprocessing/test_fetch_displays.py diff --git a/.github/workflows/cli_tests.yml b/.github/workflows/cli_tests.yml index b5c33333..3c844bb8 100644 --- a/.github/workflows/cli_tests.yml +++ b/.github/workflows/cli_tests.yml @@ -47,3 +47,7 @@ jobs: with: name: cli-status path: rarelink/tests/cli/status.json + + +env: + BIOPORTAL_API_TOKEN: ${{ secrets.BIOPORTAL_API_TOKEN }} diff --git a/.github/workflows/python_ci.yml b/.github/workflows/python_ci.yml index 552bb2c7..3530b924 100644 --- a/.github/workflows/python_ci.yml +++ b/.github/workflows/python_ci.yml @@ -17,8 +17,8 @@ jobs: - name: Checkout code uses: actions/checkout@v3 with: - submodules: true # This ensures submodules are checked out - name: Checkout code - + submodules: true + - name: Set up Python uses: actions/setup-python@v4 with: @@ -29,7 +29,14 @@ jobs: python3 -m pip install --upgrade pip python3 -m pip install --editable .[test,docs] python3 -m pip install --editable ./submodules/phenopacket_mapper # Install the submodule package - + + - name: Configure BioPortal API Key for Oaklib + run: | + mkdir -p ~/.config/ontology-access-kit + echo "${{ secrets.BIOPORTAL_API_TOKEN }}" > ~/.config/ontology-access-kit/bioportal-apikey.txt + - name: Run tests with pytest + env: + BIOPORTAL_API_TOKEN: ${{ secrets.BIOPORTAL_API_TOKEN }} run: | pytest diff --git a/.gitignore b/.gitignore index af9b145c..9056efe7 100644 --- a/.gitignore +++ b/.gitignore @@ -174,6 +174,11 @@ Public/ # DS Store .DS_Store/ +.DS_Store? +.DS_Store +.src/.DS_Store? +.src/rarelink/.DS_Store? +.src/rarelink/utils/.DS_Store? # FHIR SUSHI echo "/fsh-generated" >> .gitignore diff --git a/README.md b/README.md index 56cfe122..209d922c 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,9 @@ Rare Disease Interoperability Framework in REDCap linking international registries, FHIR and Phenopackets. -> ⚠️ **Note:** RareLink v2.0.0.dev0 is currently under development, and many things are subject to change. Please reach out before implementing or using the software to ensure you have the latest updates and guidance. +> ⚠️ **Note:** RareLink v2.0.0.dev0 is currently under development, and many +things are subject to change. Please reach out before implementing or using the +software to ensure you have the latest updates and guidance. [![Python CI](https://github.com/BIH-CEI/rarelink/actions/workflows/python_ci.yml/badge.svg)](https://github.com/BIH-CEI/rarelink/actions/workflows/python_ci.yml) @@ -91,11 +93,12 @@ running REDCap server. For more information, visit the official REDCap site: If your institution already provides a REDCap instance, proceed to the RareLink Documentation on [Setting Up a REDCap Project](https://rarelink.readthedocs.io/en/latest/3_installation/3_2_setup_redcap_project.html#). + ## Installation -RareLink can be set up using various Python project management approaches. One -common method is to use a virtual environment. Below is an example where the -virtual environment is named `rarelink-venv`, but you can name it as you prefer: +RareLink can be set up using various Python project management approaches. One + common method is to use a virtual environment. Below is an example where the + virtual environment is named `rarelink-venv`, but you can name it as you prefer: ```bash python3 -m venv rarelink-venv @@ -103,11 +106,50 @@ source rarelink-venv/bin/activate pip install --upgrade pip ``` -Next, clone the RareLink repository, navigate to its root directory, and install RareLink using: +Next, clone the RareLink repository, navigate to its root directory, and + install RareLink using: + +```bash +git clone https://github.com/BIH-CEI/rarelink.git +cd rarelink +pip install . +``` + +If you want to install development dependencies (e.g., `pytest`), use: + +```bash +pip install .[dev] +``` + +### Setting Up the `.env` File + +Create a `.env` file in the project root directory to store your BioPortal API + token securely. Add the following line: + +```plaintext +BIOPORTAL_API_TOKEN=your_api_token_here +``` + +> You can create your free BioPortal account here: [https://bioportal.bioontology.org/](https://bioportal.bioontology.org/) + Then replace `your_api_token_here` with your actual BioPortal API token. + +### Running Tests + +To ensure everything is set up correctly, run the test suite using `pytest`: + ```bash -pip install rarelink +pytest ``` +--- + +### Notes + +- Ensure that your `.env` file is not committed to version control by adding + it to `.gitignore`. +- If you encounter issues, verify you are using the correct Python version and + have installed all dependencies properly. + ### Framework setup To ensure you have the latest version of RareLink installed and to check the current version, run: @@ -196,7 +238,7 @@ use of REDCap for rare disease research and care. ## License -This project is licensed under the terms of the [MIT License](https://github.com/BIH-CEI/RareLink/blob/develop/LICENSE) +This project is licensed under the terms of the [BSD 3-Clause License](https://github.com/BIH-CEI/RareLink/blob/develop/LICENSE) ## Acknowledgements @@ -208,7 +250,9 @@ We would like to extend our thanks to everyone in the last three years for their - Authors: - [Adam SL Graefe](https://github.com/aslgraefe) - [Filip Rehburg](https://github.com/frehburg) - - Daniel Danis, PhD - - Prof. Peter N. Robinson - - Prof. Sylvia Thun - - Prof. Oya Beyan + - [Samer Alkarkoukly](https://github.com/alkarkoukly) + - [Daniel R Korn](https://github.com/DnlRKorn) + - [Daniel Danis](https://github.com/ielis) + - [Peter N. Robinson](https://github.com/pnrobinson) + - Sylvia Thun + - [Oya Beyan](https://github.com/oyadenizbeyan) diff --git a/docs/2_rarelink_framework/2_2_rarelink_cdm.rst b/docs/2_rarelink_framework/2_2_rarelink_cdm.rst new file mode 100644 index 00000000..241f5173 --- /dev/null +++ b/docs/2_rarelink_framework/2_2_rarelink_cdm.rst @@ -0,0 +1,338 @@ +.. _2_2: + +RareLink-CDM +============================= + +.. warning:: + RareLink v2.0.0.dev0 is currently under development, and many things are + subject to change. Please reach out before implementing or using the + software to ensure you have the latest updates and guidance. + +In this section, we provide an overview of the instruments that are part of the +RareLink Common Data Model (CDM), which is based on the :ref:`1_5`. +We have implemented the :ref:`1_5`'s definitions, codes, and mappins +into the REDCap instruments by encoding the variables and value sets in the +REDCap data dictionary. Each section of the model corresponds to a distinct +instrument, ensuring consistency and comprehensive data capture. + +.. hint:: + - Read the :ref:`1_6` page to understand how REDCap instruments data dictionaries work. + - Read the :ref:`1_5` page for more information on the Rare Disease Common Data Model. + + +RareLink-CDM Data Dictionary +---------------------------- + + +- :download:`Download RareLink CDM Data Dictionary v2.0.0.dev0 <../../res/rarelink_cdm_v2_0_0_dev0_datadictionary.csv>` + + +RuleSet for Codes and Codesystems +__________________________________ + +REDCap variables and choice codes have specific limitations and requirements: + +- REDCap recommends a maximum of 26 characters for variable names. We have shortened the variable names to adhere to this limitation. +- REDCap variables must be unique and must not contain spaces or special characters, i.e. only alphanumeric characters and underscores. +- REDCap choice codes must be unique and must not contain spaces or special characters, i.e. only alphanumeric characters and underscores. + +To address these, we have defined a set of rules for the REDCap variables and +choice codes in the RareLink CDM Data Dictionary. The rules are as follows: + +1) The REDCap variable names are based on the :ref:`1_5` codes and display names. +2) The REDCap choices are based on the :ref:`1_5` codes and display names. +3) The REDCap variable names are shortened to adhere to the 26-character limit. +4) The REDCap variable names are unique and do not contain spaces or special characters. +5) The REDCap choice codes are unique and do not contain spaces or special characters. +6) All codes begin with the official codesystem prefix (e.g. HP, SNOMED, etc.) as a lower case string followed by an underscore and the code. +7) All codes are defined in the Field Annotations of each data element. + +Return to `Top <#top>`_. + +Download +________ + +The RareLink CDM Data Dictionary is available for download as a CSV file: + +:download:`Download RareLink CDM Data Dictionary v2.0.0.dev0 <../..//res/rarelink_v2_0_0_dev0_datadictionary.csv>` + +.. tip:: + Read :ref:`3_1` for more information on how to import the RareLink CDM Data Dictionary into your local REDCap project. + +Return to `Top <#top>`_. + +Field Annotations +_________________ + +Witin the Field Annotation field of each REDCap element, we have defined each +element's metadata according to the :ref:`1_5` standard, including the following: + +- **Variable**: Corresponding to the data element code, codesystem, and display name. +- **Choices**: If applicable, the corresponding choices codesystem, and display name. +- **Version(s)**: If applicable, the corresponding codesystem versions used in the REDCap data element +- **Mapping**: If applicable, the corresponding mapping to the :ref:`1_4` or :ref:`1_3` standard. + +Example Field Annotation of 6.2.6 Temporal Pattern: + +.. code-block:: text + + Variable: + HP:0011008 | Temporal Pattern + Choices: + - HP:0011009 | Acute + - HP:0011010 | Chronic + - HP:0031914 | Fluctuating + - HP:0025297 | Prolonged + - HP:0031796 | Recurrent + - HP:0031915 | Stable + - HP:0011011 | Subactue + - HP:0025153 | Transient + Version(s): + - HPO Version 2024-08-13 + Mapping: + - HL7 FHIR Expression v4.0.1: Observation.interpretation + - GA4GH Phenopacket Schema v2.0 Element: PhenotypicFeature.modifiers + +Return to `Top <#top>`_. + +.. _cdm-instruments-overview: + +RareLink-CDM Instruments +------------------------ + +The RareLink-CDM instruments translate the `ontology-based Rare Disease Common Data Model (RD-CDM) `_ +into REDCap instruments, ensuring usability for registry implementation while +aligning with the `HL7 FHIR International Patient Summary (IPS) `_ +and the `GA4GH Phenopacket Schema `_. +Each instrument corresponds to a specific section of the RD-CDM and has been +adapted for REDCap's technical requirements. + +- `1. Formal Criteria <#formal-criteria>`_ +- `2. Personal Information <#personal-information>`_ +- `3. Patient Status <#patient-status>`_ +- `4. Care Pathway <#care-pathway>`_ +- `5. Disease <#disease>`_ +- `6.1 Genetic Findings <#genetic-findings>`_ +- `6.2 Phenotypic Features <#phenotypic-features>`_ +- `6.3 Measurements <#measurements>`_ +- `6.4 Family History <#family-history>`_ +- `7. Consent <#consent>`_ +- `8. Disability <#disability>`_ + +Return to `Top <#top>`_. + +.. note:: + to be implemented. + +.. _formal-criteria: + +(1) Formal Criteria +------------------- + +**Purpose**: Captures eligibility and registration information for individuals. + +**Core Variables**: +- [Insert Core Variables Here] + +**Adjustments for REDCap**: +- [Insert Adjustments for REDCap Here] + +**Adjustments from the RD-CDM**: +- [Insert Adjustments from the RD-CDM Here] + +Return to `RareLink-CDM Instruments Overview <#cdm-instruments-overview>`_. + +.. _personal-information: + +(2) Personal Information +------------------------ + +**Purpose**: Records demographic and personal data. + +**Core Variables**: +- [Insert Core Variables Here] + +**Adjustments for REDCap**: +- [Insert Adjustments for REDCap Here] + +**Adjustments from the RD-CDM**: +- [Insert Adjustments from the RD-CDM Here] + +Return to `RareLink-CDM Instruments Overview <#cdm-instruments-overview>`_. + +.. _patient-status: + +(3) Patient Status +------------------ + +**Purpose**: Tracks changes in patient conditions over time. + +**Core Variables**: +- [Insert Core Variables Here] + +**Adjustments for REDCap**: +- [Insert Adjustments for REDCap Here] + +**Adjustments from the RD-CDM**: +- [Insert Adjustments from the RD-CDM Here] + +Return to `RareLink-CDM Instruments Overview <#cdm-instruments-overview>`_. + +.. _care-pathway: + +(4) Care Pathway +---------------- + +**Purpose**: Logs encounter-specific data. + +**Core Variables**: +- [Insert Core Variables Here] + +**Adjustments for REDCap**: +- [Insert Adjustments for REDCap Here] + +**Adjustments from the RD-CDM**: +- [Insert Adjustments from the RD-CDM Here] + +Return to `RareLink-CDM Instruments Overview <#cdm-instruments-overview>`_. + +.. _disease: + +(5) Disease +----------- + +**Purpose**: Details disease history and ontology mappings. + +**Core Variables**: +- [Insert Core Variables Here] + +**Adjustments for REDCap**: +- [Insert Adjustments for REDCap Here] + +**Adjustments from the RD-CDM**: +- [Insert Adjustments from the RD-CDM Here] + +Return to `RareLink-CDM Instruments Overview <#cdm-instruments-overview>`_. + +.. _genetic-findings: + +(6.1) Genetic Findings +----------------------- + +**Purpose**: Captures genetic variant information. + +**Core Variables**: +- [Insert Core Variables Here] + +**Adjustments for REDCap**: +- [Insert Adjustments for REDCap Here] + +**Adjustments from the RD-CDM**: +- [Insert Adjustments from the RD-CDM Here] + +Return to `RareLink-CDM Instruments Overview <#cdm-instruments-overview>`_. + +.. _phenotypic-features: + +(6.2) Phenotypic Features +------------------------- + +**Purpose**: Encodes phenotypes and their modifiers. + +**Core Variables**: +- [Insert Core Variables Here] + +**Adjustments for REDCap**: +- **Simplified Field Names**: Shortened and formatted for REDCap constraints: + - `snomed_439272007_704321009_363778006` -> `snomed_8116006_date` + - `ga4gh_phenotypicfeature_excluded` -> `ga4gh_pheno_excluded` +- **Modifiers Grouping**: Introduced consistent naming for modifiers: + - HPO Modifiers: + - `ga4gh_phenotypicfeature_modifier_hp_1` -> `ga4gh_pheno_mod_hp1` + - `ga4gh_phenotypicfeature_modifier_hp_2` -> `ga4gh_pheno_mod_hp2` + - `ga4gh_phenotypicfeature_modifier_hp_3` -> `ga4gh_pheno_mod_hp3` + - NCBITaxon Modifiers: + - `ga4gh_phenotypicfeature_modifier_ncbitaxon_1` -> `ga4gh_pheno_mod_ncbitax1` + - `ga4gh_phenotypicfeature_modifier_ncbitaxon_2` -> `ga4gh_pheno_mod_ncbitax2` + - `ga4gh_phenotypicfeature_modifier_ncbitaxon_3` -> `ga4gh_pheno_mod_ncbitax3` + - SNOMED Modifiers: + - `ga4gh_phenotypicfeature_modifier_snomed_1` -> `ga4gh_pheno_mod_snomed1` + - `ga4gh_phenotypicfeature_modifier_snomed_2` -> `ga4gh_pheno_mod_snomed2` + - `ga4gh_phenotypicfeature_modifier_snomed_3` -> `ga4gh_pheno_mod_snomed3` + +**Adjustments from the RD-CDM**: +- [Insert Adjustments from the RD-CDM Here] + +Return to `RareLink-CDM Instruments Overview <#cdm-instruments-overview>`_. + +.. _measurements: + +(6.3) Measurements +------------------ + +**Purpose**: Records clinical and laboratory data. + +**Core Variables**: +- [Insert Core Variables Here] + +**Adjustments for REDCap**: +- [Insert Adjustments for REDCap Here] + +**Adjustments from the RD-CDM**: +- [Insert Adjustments from the RD-CDM Here] + +Return to `RareLink-CDM Instruments Overview <#cdm-instruments-overview>`_. + +.. _family-history: + +(6.4) Family History +-------------------- + +**Purpose**: Details familial relationships and genetic predispositions. + +**Core Variables**: +- [Insert Core Variables Here] + +**Adjustments for REDCap**: +- [Insert Adjustments for REDCap Here] + +**Adjustments from the RD-CDM**: +- [Insert Adjustments from the RD-CDM Here] + +Return to `RareLink-CDM Instruments Overview <#cdm-instruments-overview>`_. + +.. _consent: + +(7) Consent +----------- + +**Purpose**: Documents patient consent details. + +**Core Variables**: +- [Insert Core Variables Here] + +**Adjustments for REDCap**: +- [Insert Adjustments for REDCap Here] + +**Adjustments from the RD-CDM**: +- [Insert Adjustments from the RD-CDM Here] + +Return to `RareLink-CDM Instruments Overview <#cdm-instruments-overview>`_. + +.. _disability: + +(8) Disability +-------------- + +**Purpose**: Captures ICF-encoded functional and disability data. + +**Core Variables**: +- [Insert Core Variables Here] + +**Adjustments for REDCap**: +- [Insert Adjustments for REDCap Here] + +**Adjustments from the RD-CDM**: +- [Insert Adjustments from the RD-CDM Here] + +Return to `RareLink-CDM Instruments Overview <#cdm-instruments-overview>`_. \ No newline at end of file diff --git a/docs/2_rarelink_framework/2_2_rarelink_cdm_instruments.rst b/docs/2_rarelink_framework/2_2_rarelink_cdm_instruments.rst deleted file mode 100644 index da4ebfa9..00000000 --- a/docs/2_rarelink_framework/2_2_rarelink_cdm_instruments.rst +++ /dev/null @@ -1,230 +0,0 @@ -.. _2_2: - -RareLink-CDM -============================= - -.. warning:: - RareLink v2.0.0.dev0 is currently under development, and many things are - subject to change. Please reach out before implementing or using the - software to ensure you have the latest updates and guidance. - -In this section, we provide an overview of the instruments that are part of the -RareLink Common Data Model (CDM), which is based on the :ref:`1_5`. -We have implemented the :ref:`1_5`'s definitions, codes, and mappins -into the REDCap instruments by encoding the variables and value sets in the -REDCap data dictionary. Each section of the model corresponds to a distinct -instrument, ensuring consistency and comprehensive data capture. - -.. hint:: - - Read the :ref:`1_6` page to understand how REDCap instruments data dictionaries work. - - Read the :ref:`1_5` page for more information on the Rare Disease Common Data Model. - - -RareLink CDM Data Dictionary ----------------------------- - - -- :download:`Download RareLink CDM Data Dictionary v2.0.0.dev0 <../../res/rarelink_cdm_v2_0_0_dev0_datadictionary.csv>` -- :download:`Download all RareLink CDM instruments seperately as a .zip file <../../res/rarelink_cdm_v2_0_0_dev0_instruments.zip>` - - -RuleSet for Codes and Codesystems -__________________________________ -REDCap variables and choice codes have specific limitations and requirements: - -- REDCap recommends a maximum of 26 characters for variable names. We have shortened the variable names to adhere to this limitation. -- REDCap variables must be unique and must not contain spaces or special characters, i.e. only alphanumeric characters and underscores. -- REDCap choice codes must be unique and must not contain spaces or special characters, i.e. only alphanumeric characters and underscores. - -To address these, we have defined a set of rules for the REDCap variables and -choice codes in the RareLink CDM Data Dictionary. The rules are as follows: - -1) The REDCap variable names are based on the :ref:`1_5` codes and display names. -2) The REDCap choices are based on the :ref:`1_5` codes and display names. -3) The REDCap variable names are shortened to adhere to the 26-character limit. -4) The REDCap variable names are unique and do not contain spaces or special characters. -5) The REDCap choice codes are unique and do not contain spaces or special characters. -6) All codes begin with the official codesystem prefix (e.g. HP, SNOMED, etc.) as a lower case string followed by an underscore and the code. -7) All codes are defined in the Field Annotations of each data element. - -Download -________ - -The RareLink CDM Data Dictionary is available for download as a CSV file: - -:download:`Download RareLink CDM Data Dictionary v2.0.0.dev0 <../..//res/rarelink_v2_0_0_dev0_datadictionary.csv>` - -.. tip:: - Read :ref:`3_1` for more information on how to import the RareLink CDM Data Dictionary into your local REDCap project. - -Field Annotations -_________________ - -Witin the Field Annotation field of each REDCap element, we have defined each -element's metadata according to the :ref:`1_5` standard, including the following: - -- **Variable**: Corresponding to the data element code, codesystem, and display name. -- **Choices**: If applicable, the corresponding choices codesystem, and display name. -- **Version(s)**: If applicable, the corresponding codesystem versions used in the REDCap data element -- **Mapping**: If applicable, the corresponding mapping to the :ref:`1_4` or :ref:`1_3` standard. - -Example Field Annotation of 6.2.6 Temporal Pattern: - -.. code-block:: text - - Variable: - HP:0011008 | Temporal Pattern - Choices: - - HP:0011009 | Acute - - HP:0011010 | Chronic - - HP:0031914 | Fluctuating - - HP:0025297 | Prolonged - - HP:0031796 | Recurrent - - HP:0031915 | Stable - - HP:0011011 | Subactue - - HP:0025153 | Transient - Version(s): - - HPO Version 2024-08-13 - Mapping: - - HL7 FHIR Expression v4.0.1: Observation.interpretation - - GA4GH Phenopacket Schema v2.0 Element: PhenotypicFeature.modifiers - - -RareLink CDM Instruments ------------------------- - -The RareLink CDM instruments are based on the :ref:`1_5` and are designed to -capture the data elements of the RD-CDM. Each instrument corresponds to a -section of the model, ensuring consistency and comprehensive data capture. - -- `1. Formal Criteria <#formal-criteria>`_ -- `2. Personal Information <#personal-information>`_ -- `3. Patient Status <#patient-status>`_ -- `4. Care Pathway <#care-pathway>`_ -- `5. Disease <#disease>`_ -- `6.1 Genetic Findings <#genetic-findings>`_ -- `6.2 Phenotypic Features <#phenotypic-features>`_ -- `6.3 Measurements <#measurements>`_ -- `6.4 Family History <#family-history>`_ -- `7. Consent <#consent>`_ -- `8. Disability <#disability>`_ - -.. _formal-criteria: - -(1) Formal Criteria -____________________ -Content for Formal Criteria goes here. - -.. _go-back-top: - -Return to `Top <#top>`_. - -.. _personal-information: - -(2) Personal Information -__________________________ -Content for Personal Information goes here. - -Return to `Top <#top>`_. - -.. _patient-status: - -(3) Patient Status -___________________ -Content for Patient Status goes here. - -Return to `Top <#top>`_. - -.. _care-pathway: - -(4) Care Pathway -_________________ -Content for Care Pathway goes here. - -hl7fhir_encounter_period_start -> hl7fhir_enc_period_start -hl7fhir_encounter_period_end -> hl7fhir_enc_period_end - -Return to `Top <#top>`_. - - - -.. _disease: - -(5) Disease -____________ -Content for Disease goes here. - -Return to `Top <#top>`_. - -.. _genetic-findings: - -(6.1) Genetic Findings -________________________ -Content for Genetic Findings goes here. - - -ga4gh_interpretation_status -> ga4gh_interp_status -ga4gh_therapeutic_actionability -> ga4gh_therap_action - - -Return to `Top <#top>`_. - - -.. _phenotypic-features: - -(6.2) Phenotypic Features -__________________________ -Content for Phenotypic Features goes here. - -snomed_439272007_704321009_363778006 -> snomed_8116006_date -ga4gh_phenotypicfeature_excluded -> ga4gh_pheno_excluded -ga4gh_phenotypicfeature_modifier_hp_1 -> ga4gh_pheno_mod_hp1 -ga4gh_phenotypicfeature_modifier_hp_2 -> ga4gh_pheno_mod_hp2 -ga4gh_phenotypicfeature_modifier_hp_3 -> ga4gh_pheno_mod_hp3 -ga4gh_phenotypicfeature_modifier_ncbitaxon_1 -> ga4gh_pheno_mod_ncbitax1 -ga4gh_phenotypicfeature_modifier_ncbitaxon_2 -> ga4gh_pheno_mod_ncbitax2 -ga4gh_phenotypicfeature_modifier_ncbitaxon_3 -> ga4gh_pheno_mod_ncbitax3 -ga4gh_phenotypicfeature_modifier_snomed_1 -> ga4gh_pheno_mod_snomed1 -ga4gh_phenotypicfeature_modifier_snomed_2 -> ga4gh_pheno_mod_snomed2 -ga4gh_phenotypicfeature_modifier_snomed_3 -> ga4gh_pheno_mod_snomed3 - - -(6.3) Measurements -____________________ - - -Return to `Top <#top>`_. - -.. _family-history: - -(6.4) Family History -______________________ -Content for Family History goes here. - -hl7fhir_familymemberhistory_status -> hl7fhir_fmh_status - -Return to `Top <#top>`_. - - -.. _consent: - -(7) Consent -____________ -Content for Consent goes here. -customcode_consent_contact_research -> customcode_consent_contact -customcode_conset_data_reuse -> customcode_consent_data - -Return to `Top <#top>`_. - - -.. _disability: - -(8) Disability -______________ -Content for Disability goes here. - -Return to `Top <#top>`_. - - - - diff --git a/docs/3_installation/3_1_setup_rarelink_framework.rst b/docs/3_installation/3_1_setup_rarelink_framework.rst index 4694661b..81413589 100644 --- a/docs/3_installation/3_1_setup_rarelink_framework.rst +++ b/docs/3_installation/3_1_setup_rarelink_framework.rst @@ -8,24 +8,55 @@ Set up the RareLink Framework subject to change. Please reach out before implementing or using the software to ensure you have the latest updates and guidance. -To set up the RareLink framework, follow these steps: +Getting Started +--------------- -**Install the RareLink framework dependencies.** -------------------------------------------------- +Follow these steps to set up the project locally and run tests. -Clone the repository and install the dependencies: +1. Clone the repository: .. code-block:: bash - pip install . + git clone https://github.com/BIH-CEI/rarelink.git + cd rarelink -Via the RareLink CLI, type: +2. Create a virtual environment: .. code-block:: bash - pip install rarelink + python3 -m venv .venv + source .venv/bin/activate # On macOS/Linux + .venv\Scripts\activate # On Windows -This command installs all necessary RareLink framework dependencies and functionalities. +3. Install dependencies: + +.. code-block:: bash + + pip install . + +4. Configure the `.env` file: + Create a `.env` file in the project root directory. Add the following line: + +.. code-block:: ini + + BIOPORTAL_API_TOKEN=your_api_token_here + + Replace `your_api_token_here` with your actual BioPortal API token. + +5. Run tests: + Use `pytest` to run the test suite. + +.. code-block:: bash + + pytest + +.. note:: + You can create your free BioPortal account here: `BioPortal `_ + +_____________________________________________________________________________________ + +RareLink-CLI Framework Configuration +------------------------------------ To update the RareLink framework: @@ -52,6 +83,9 @@ To reset the framework to its initial state: This command clears all framework configurations and reverts it to its initial setup state. +_____________________________________________________________________________________ + + Import Mapper Configuration ___________________________ @@ -67,6 +101,8 @@ You will be prompted to enter: - Your REDCap project URL and API token. - Your location where to store the Import Mapper configurations. +_____________________________________________________________________________________ + Phenopacket Pipeline Configuration ___________________________________ @@ -80,6 +116,8 @@ This command guides you through setting up the Phenopacket pipeline for RareLink You will be prompted to enter: - Your location where to store the Phenopackets. +_____________________________________________________________________________________ + FHIR Pipeline Configuration ___________________________ diff --git a/docs/index.rst b/docs/index.rst index 6dab0f10..4631431c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -75,7 +75,7 @@ Sections - :doc:`2_rarelink_framework/2_0_rarelink_file` - :doc:`2_rarelink_framework/2_1_rarelink_overview` -- :doc:`2_rarelink_framework/2_2_rarelink_cdm_instruments` +- :doc:`2_rarelink_framework/2_2_rarelink_cdm` - :doc:`2_rarelink_framework/2_3_rarelink_core_redcap_project` - :doc:`2_rarelink_framework/2_4_rarelink_cli` @@ -136,7 +136,7 @@ Additional Information 2_rarelink_framework/2_0_rarelink_file 2_rarelink_framework/2_1_rarelink_overview - 2_rarelink_framework/2_2_rarelink_cdm_instruments + 2_rarelink_framework/2_2_rarelink_cdm 2_rarelink_framework/2_3_rarelink_core_redcap_project 2_rarelink_framework/2_4_rarelink_cli diff --git a/pyproject.toml b/pyproject.toml index b251670b..74cd0313 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,11 @@ dependencies = [ "tqdm", "node", "npm", - "linkml" + "linkml", + "schema-automator", + "oak", + "oaklib", + "python-dotenv" ] dynamic = ["version"] diff --git a/pytest.ini b/pytest.ini index 0ab89fe8..f51a4634 100644 --- a/pytest.ini +++ b/pytest.ini @@ -8,6 +8,10 @@ python_files = _test*.py test*.py addopts = --doctest-modules --doctest-glob *.rst --ignore=submodules/phenopacket_mapper/ --ignore=submodules/rd-cdm/ --ignore=rarelink/src/rarelink_cdm/v2_0_0_dev0/datamodel/rarelink_cdm.py --ignore=rarelink/src/rarelink_cdm/v2_0_0_dev0/datamodel/rarelink_repeated_elements.py filterwarnings = + ignore:pkg_resources is deprecated:DeprecationWarning + ignore:Deprecated call to `pkg_resources.declare_namespace.*`:DeprecationWarning ignore::DeprecationWarning:google.protobuf.internal.well_known_types + pythonpath = src + diff --git a/src/.DS_Store b/src/.DS_Store index 7d4f8d210077d21852391c5a81138482b48d7f6f..b8efddc027838afe2dc6321140b194f1543b719e 100644 GIT binary patch delta 64 zcmZoMXfc@J&&a(oU^gQp_hufZt&B=M3`GoyK%B~u!;s04$B@ks&ydWJ!jPL%oSc)C SpP$3H`5?0j%Vu_tzx)6pX%TGz delta 32 ocmZoMXfc@J&&ahgU^gQp*Jd83t&E$$G7GUxY%tl(&heKY0IBr~W&i*H diff --git a/src/rarelink/.DS_Store b/src/rarelink/.DS_Store index 5a1746df1e566d3078a005550990b820739234ab..24d474e78561139707c3814a864d0c56420150d4 100644 GIT binary patch delta 22 ecmZoMXfc?OH1VM3WCIbFiT%Qi3>z0GiUR;%+6S=! delta 24 gcmZoMXfc?O#K<`DpvGha5tfPl!iAjHu~2NHo+1YW5HK<@2yDK{XvVhr0b>>8W_AvK4xj>{$am(+{342+ UKzW7)kiy9(Jj$D6L{=~Z06T*Zn*aa+ delta 264 zcmZoMXfc=|#>B`mF;Q%yo}wrV0|Nsi1A_nqLk>eeLpnnxLp(###Kh(GAPGUBP#!}G zkVKMXNM$HwC Coding: - """Request a term from the Monarch Initiative API""" - raise NotImplementedError \ No newline at end of file diff --git a/src/rarelink/utils/ontology_requests/hpo_api_request.py b/src/rarelink/utils/ontology_requests/hpo_api_request.py deleted file mode 100644 index d6596559..00000000 --- a/src/rarelink/utils/ontology_requests/hpo_api_request.py +++ /dev/null @@ -1,6 +0,0 @@ -from phenopacket_mapper.data_standards import Coding - - -def mondo_api_request(code: str) -> Coding: - """Request a term from the Monarch Initiative API""" - raise NotImplementedError \ No newline at end of file diff --git a/src/rarelink/utils/ontology_requests/iso3166_api_request.py b/src/rarelink/utils/ontology_requests/iso3166_api_request.py deleted file mode 100644 index 7afe8883..00000000 --- a/src/rarelink/utils/ontology_requests/iso3166_api_request.py +++ /dev/null @@ -1,6 +0,0 @@ -from phenopacket_mapper.data_standards import Coding - - -def mondo_api_request(code: str) -> Coding: - """Request a term from the ISO 3166 API""" - raise NotImplementedError \ No newline at end of file diff --git a/src/rarelink/utils/ontology_requests/mondo_api_request.py b/src/rarelink/utils/ontology_requests/mondo_api_request.py deleted file mode 100644 index d6596559..00000000 --- a/src/rarelink/utils/ontology_requests/mondo_api_request.py +++ /dev/null @@ -1,6 +0,0 @@ -from phenopacket_mapper.data_standards import Coding - - -def mondo_api_request(code: str) -> Coding: - """Request a term from the Monarch Initiative API""" - raise NotImplementedError \ No newline at end of file diff --git a/src/rarelink/utils/ontology_requests/ncit_api_request.py b/src/rarelink/utils/ontology_requests/ncit_api_request.py deleted file mode 100644 index d6596559..00000000 --- a/src/rarelink/utils/ontology_requests/ncit_api_request.py +++ /dev/null @@ -1,6 +0,0 @@ -from phenopacket_mapper.data_standards import Coding - - -def mondo_api_request(code: str) -> Coding: - """Request a term from the Monarch Initiative API""" - raise NotImplementedError \ No newline at end of file diff --git a/src/rarelink/utils/ontology_requests/orpha_api_request.py b/src/rarelink/utils/ontology_requests/orpha_api_request.py deleted file mode 100644 index d6596559..00000000 --- a/src/rarelink/utils/ontology_requests/orpha_api_request.py +++ /dev/null @@ -1,6 +0,0 @@ -from phenopacket_mapper.data_standards import Coding - - -def mondo_api_request(code: str) -> Coding: - """Request a term from the Monarch Initiative API""" - raise NotImplementedError \ No newline at end of file diff --git a/src/rarelink/utils/ontology_requests/so_api_request.py b/src/rarelink/utils/ontology_requests/so_api_request.py deleted file mode 100644 index d6596559..00000000 --- a/src/rarelink/utils/ontology_requests/so_api_request.py +++ /dev/null @@ -1,6 +0,0 @@ -from phenopacket_mapper.data_standards import Coding - - -def mondo_api_request(code: str) -> Coding: - """Request a term from the Monarch Initiative API""" - raise NotImplementedError \ No newline at end of file diff --git a/src/rarelink/utils/ontology_requests/uo_api_request.py b/src/rarelink/utils/ontology_requests/uo_api_request.py deleted file mode 100644 index d6596559..00000000 --- a/src/rarelink/utils/ontology_requests/uo_api_request.py +++ /dev/null @@ -1,6 +0,0 @@ -from phenopacket_mapper.data_standards import Coding - - -def mondo_api_request(code: str) -> Coding: - """Request a term from the Monarch Initiative API""" - raise NotImplementedError \ No newline at end of file diff --git a/src/rarelink/utils/preprocessing/__init__.py b/src/rarelink/utils/preprocessing/__init__.py index 2f660f67..92adbfa2 100644 --- a/src/rarelink/utils/preprocessing/__init__.py +++ b/src/rarelink/utils/preprocessing/__init__.py @@ -1,9 +1,15 @@ """This module implements utility functions to preprocess medical datasets in tabular formats.""" #from .preprocess_redcap_for_phenopackets import preprocess_redcap_for_phenopackets -from .preprocess_redcap_codes import parse_redcap_code +from .parse_redcap_codes import parse_redcap_code +from .fetch_displays import fetch_label_for_code, fetch_label_directly +from .add_prefixes import add_prefix_to_code, process_prefix __all__ = [ #"preprocess_redcap_for_phenopackets", "parse_redcap_code", + "fetch_label_for_code", + "fetch_label_directly", + "add_prefix_to_code", + "process_prefix" ] diff --git a/src/rarelink/utils/preprocessing/add_prefixes.py b/src/rarelink/utils/preprocessing/add_prefixes.py new file mode 100644 index 00000000..6ffca26c --- /dev/null +++ b/src/rarelink/utils/preprocessing/add_prefixes.py @@ -0,0 +1,40 @@ +def add_prefix_to_code(code: str, prefix: str = "") -> str: + """ + Adds a specific prefix to the REDCap code if not already present. + + Args: + code (str): The original code (e.g., "G46.4", "62374-4"). + prefix (str): The prefix to add (e.g., "ICD10CM"). + + Returns: + str: The code with the appropriate prefix (e.g., "ICD10CM:G46.4"). + """ + if not code: + return code + if prefix and not code.startswith(f"{prefix}:"): + return f"{prefix}:{code}" + return code + + +def process_prefix(code: str, prefix: str) -> str: + """ + Processes a code by ensuring it has the correct prefix format. + Specifically, replaces underscores with colons where applicable. + + Args: + code (str): The code to process (e.g., "UO_1234"). + prefix (str): The expected prefix (e.g., "UO"). + + Returns: + str: The processed code with the correct prefix format (e.g., "UO:1234"). + """ + if not code or not prefix: + return code + + if code.startswith(f"{prefix}_"): + return code.replace("_", ":", 1) + + if code.startswith(f"{prefix}:"): + return code + + return code diff --git a/src/rarelink/utils/preprocessing/fetch_displays.py b/src/rarelink/utils/preprocessing/fetch_displays.py new file mode 100644 index 00000000..5805e195 --- /dev/null +++ b/src/rarelink/utils/preprocessing/fetch_displays.py @@ -0,0 +1,130 @@ +from urllib.parse import quote +from oaklib import get_adapter +from typing import List, Optional +import os +import requests +from dotenv import load_dotenv + +# Load the API token from the environment file +load_dotenv() +BIOPORTAL_API_TOKEN = os.getenv("BIOPORTAL_API_TOKEN") + +if not BIOPORTAL_API_TOKEN: + raise ValueError("BioPortal API token not found. Please set it in the .env file.") + +adapter = get_adapter(f"bioportal:{BIOPORTAL_API_TOKEN}") + +def fetch_label_directly(code): + """ + Fetch the label for a specific code directly using the BioPortal API. + + Args: + code (str): Ontology code (e.g., "Thesaurus:C3262", "NCBITAXON:1279"). + + Returns: + str: The label (preferred name) of the term if found, or None if not + resolvable. + """ + base_url = "https://data.bioontology.org/ontologies" + + # Split ontology prefix and identifier + if ":" not in code: + print(f"Invalid code format: {code}") + return None + + ontology, identifier = code.split(":", 1) + + # Adjust ontology prefix and IRI for specific cases + if ontology == "ORPHA": + ontology = "ORDO" + iri = f"http://www.orpha.net/ORDO/Orphanet_{identifier}" + elif ontology == "HGNC": + ontology = "HGNC-NR" + iri = f"http://identifiers.org/hgnc/{identifier}" + elif ontology == "Thesaurus": # Map "Thesaurus" to "NCIT" + ontology = "NCIT" + iri = f"http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#{identifier}" + elif ontology == "NCBITAXON": + iri = f"http://purl.bioontology.org/ontology/NCBITAXON/{identifier}" + else: + print(f"Unsupported ontology: {ontology}") + return None + + # Correctly encode the IRI for URL construction + encoded_iri = quote(iri, safe="") # Encodes special characters + url = f"{base_url}/{ontology}/classes/{encoded_iri}?apikey={BIOPORTAL_API_TOKEN}" + + # Debugging: Print constructed URL + print(f"Constructed URL: {url}") + + # Make the API request + response = requests.get(url) + if response.status_code == 200: + data = response.json() + return data.get("prefLabel", None) + else: + print(f"Error fetching label for {code}. \ + Status code: {response.status_code}") + return None + +def list_ontologies(): + """ + Lists all available ontologies in BioPortal via Oaklib. + """ + try: + print("Fetching available ontologies from BioPortal...") + ontologies = adapter.ontologies() + print("Available Ontologies:") + for ontology in ontologies: + print(ontology) + except Exception as e: + print(f"Error listing ontologies: {e}") + + +def fetch_label_for_code(code: str) -> Optional[str]: + """ + Fetch the label for a single ontology code using Oaklib and BioPortal. + + Args: + code (str): The ontology code to resolve. + + Returns: + str: The label for the code if found, otherwise None. + """ + try: + label = adapter.label(code) + if label: + print(f"Code: {code}, Label: {label}") + return label + else: + print(f"Code {code} could not be resolved.") + return None + except Exception as e: + print(f"Error fetching label for {code}: {e}") + return None + +def batch_fetch_labels(codes: List[str]) -> List[Optional[str]]: + """ + Fetch labels for a batch of ontology codes. + + Args: + codes (List[str]): List of ontology codes to resolve. + + Returns: + List[Optional[str]]: A list of labels corresponding to the + provided codes. + """ + labels = [] + for code in codes: + # Use fetch_label_directly for specific ontologies + if ( + code.startswith("ORPHA:") + or code.startswith("HGNC:") + or code.startswith("Thesaurus:") + or code.startswith("NCBITAXON:") + ): + label = fetch_label_directly(code) + else: + label = fetch_label_for_code(code) + labels.append(label) + return labels diff --git a/src/rarelink/utils/preprocessing/preprocess_redcap_codes.py b/src/rarelink/utils/preprocessing/parse_redcap_codes.py similarity index 100% rename from src/rarelink/utils/preprocessing/preprocess_redcap_codes.py rename to src/rarelink/utils/preprocessing/parse_redcap_codes.py diff --git a/src/rarelink/utils/preprocessing/preprocess_redcap_for_phenopackets.py b/src/rarelink/utils/preprocessing/preprocess_redcap_for_phenopackets.py index eff0f754..db18ab92 100644 --- a/src/rarelink/utils/preprocessing/preprocess_redcap_for_phenopackets.py +++ b/src/rarelink/utils/preprocessing/preprocess_redcap_for_phenopackets.py @@ -1,3 +1,8 @@ +""" +for now not necessary, all codes are processed to the LinkML schema automatically. +only mapping will be needed between linkml schema and phenopacket schema +""" + # from typing import List # import pandas as pd diff --git a/src/rarelink_cdm/v2_0_0_dev0/processing/__init__.py b/src/rarelink_cdm/v2_0_0_dev0/processing/__init__.py index 2c76f6d1..80cfdae6 100644 --- a/src/rarelink_cdm/v2_0_0_dev0/processing/__init__.py +++ b/src/rarelink_cdm/v2_0_0_dev0/processing/__init__.py @@ -14,8 +14,9 @@ """ # Importing functions from individual modules -from .preprocess_redcap_json import preprocess_flat_data, MAPPING_FUNCTIONS +from .preprocess_redcap_json import preprocess_flat_data from .transform_to_redcap_json import transform_to_redcap_json +from .map_to_linkml_schema import MAPPING_FUNCTIONS __all__ = [ "preprocess_flat_data", diff --git a/src/rarelink_cdm/v2_0_0_dev0/processing/map_to_linkml_schema.py b/src/rarelink_cdm/v2_0_0_dev0/processing/map_to_linkml_schema.py new file mode 100644 index 00000000..3a562a0d --- /dev/null +++ b/src/rarelink_cdm/v2_0_0_dev0/processing/map_to_linkml_schema.py @@ -0,0 +1,265 @@ +from rarelink.utils.preprocessing.add_prefixes import add_prefix_to_code, process_prefix + + +def map_formal_criteria(entry): + """ + Maps a flat REDCap entry to the FormalCriteria schema. + """ + return { + "snomed_422549004": entry.get("snomed_422549004", ""), + "snomed_399423000": entry.get("snomed_399423000", ""), + "rarelink_1_formal_criteria_complete": entry.get( + "rarelink_1_formal_criteria_complete", "") + } + + +def map_personal_information(entry): + """ + Maps a flat REDCap entry to the PersonalInformation schema. + """ + return { + "snomed_184099003": entry.get("snomed_184099003", ""), + "snomed_281053000": entry.get("snomed_281053000", ""), + "snomed_1296886006": entry.get("snomed_1296886006", ""), + "snomed_263495000": entry.get("snomed_263495000", ""), + "snomed_370159000": entry.get("snomed_370159000", ""), + "rarelink_2_personal_information_complete": entry.get( + "rarelink_2_personal_information_complete", "") + } + + +def map_patient_status(entry): + """ + Maps a flat REDCap entry to the PatientStatus schema. + """ + def convert_to_boolean(value): + """ + Convert SNOMED codes for yes/no to Boolean values. + """ + yes_no_mapping = { + "snomed_373066001": True, # Yes + "snomed_373067005": False # No + } + return yes_no_mapping.get(value, None) + + return { + "patient_status_date": entry.get("patient_status_date", ""), + "snomed_278844005": entry.get("snomed_278844005", ""), + "snomed_398299004": entry.get("snomed_398299004", ""), + "snomed_184305005": add_prefix_to_code(entry.get( + "snomed_184305005", ""), "ICD10CM"), + "snomed_105727008": entry.get("snomed_105727008", ""), + "snomed_412726003": entry.get("snomed_412726003", ""), + "snomed_723663001": convert_to_boolean(entry.get( + "snomed_723663001", "")), + "rarelink_3_patient_status_complete": entry.get( + "rarelink_3_patient_status_complete", "") + } + + +def map_care_pathway(entry): + """ + Maps a flat REDCap entry to the CarePathway schema. + """ + return { + "hl7fhir_enc_period_start": entry.get("hl7fhir_enc_period_start", ""), + "hl7fhir_enc_period_end": entry.get("hl7fhir_enc_period_end", ""), + "snomed_305058001": entry.get("snomed_305058001", ""), + "hl7fhir_encounter_class": entry.get("hl7fhir_encounter_class", ""), + "rarelink_4_care_pathway_complete": entry.get( + "rarelink_4_care_pathway_complete", "") + } + +def map_disease(entry): + """ + Maps a flat REDCap entry to the Disease schema. + """ + return { + "disease_coding": entry.get("disease_coding", ""), + "snomed_64572001_mondo": entry.get("snomed_64572001_mondo", ""), + "snomed_64572001_ordo": entry.get("snomed_64572001_ordo", ""), + "snomed_64572001_icd10cm": add_prefix_to_code(entry.get( + "snomed_64572001_icd10cm", ""), "ICD10CM"), + "snomed_64572001_icd11": add_prefix_to_code(entry.get( + "snomed_64572001_icd11", ""), "ICD11"), + "snomed_64572001_omim_p": add_prefix_to_code(entry.get( + "snomed_64572001_omim_p", ""), "OMIM"), + "loinc_99498_8": entry.get("loinc_99498_8", ""), + "snomed_424850005": entry.get("snomed_424850005", ""), + "snomed_298059007": entry.get("snomed_298059007", ""), + "snomed_423493009": entry.get("snomed_423493009", ""), + "snomed_432213005": entry.get("snomed_432213005", ""), + "snomed_363698007": add_prefix_to_code(entry.get("snomed_363698007", ""), + "SNOMEDCT"), + "snomed_263493007": entry.get("snomed_263493007", ""), + "snomed_246112005": entry.get("snomed_246112005", ""), + "rarelink_5_disease_complete": entry.get("rarelink_5_disease_complete", "") + } + +def map_genetic_findings(entry): + """ + Maps a flat REDCap entry to the GeneticFindings schema. + """ + + def convert_to_boolean(value): + """ + Convert SNOMED codes for yes/no to Boolean values. + """ + yes_no_mapping = { + "yes": True, # Yes + "no": False # No + } + return yes_no_mapping.get(value, None) + + return { + "genetic_diagnosis_code": entry.get("genetic_diagnosis_code", ""), + "snomed_106221001_mondo": entry.get("snomed_106221001_mondo", ""), + "snomed_106221001_omim_p": add_prefix_to_code(entry.get( + "snomed_106221001_omim_p", ""), "OMIM"), + "ga4gh_progress_status": entry.get("ga4gh_progress_status", ""), + "ga4gh_interp_status": entry.get("ga4gh_interp_status", ""), + "loinc_81304_8": entry.get("loinc_81304_8", ""), + "loinc_62374_4": entry.get("loinc_62374_4", ""), + "loinc_lp7824_8": entry.get("loinc_lp7824_8", ""), + "variant_expression": entry.get("variant_expression", ""), + "loinc_81290_9": entry.get("loinc_81290_9", ""), + "loinc_48004_6": entry.get("loinc_48004_6", ""), + "loinc_48005_3": entry.get("loinc_48005_3", ""), + "variant_validation": convert_to_boolean(entry.get( + "variant_validation", "")), + "loinc_48018_6": entry.get("loinc_48018_6", ""), + "loinc_53034_5": entry.get("loinc_53034_5", ""), + "loinc_53034_5_other": add_prefix_to_code(entry.get( + "loinc_53034_5_other", ""), "LOINC"), + "loinc_48002_0": entry.get("loinc_48002_0", ""), + "loinc_48019_4": entry.get("loinc_48019_4", ""), + "loinc_48019_4_other": add_prefix_to_code(entry.get( + "loinc_48019_4_other", ""), "LOINC"), + "loinc_53037_8": entry.get("loinc_53037_8", ""), + "ga4gh_therap_action": entry.get("ga4gh_therap_action", ""), + "loinc_93044_6": entry.get("loinc_93044_6", ""), + "rarelink_6_1_genetic_findings_complete": entry.get( + "rarelink_6_1_genetic_findings_complete", "") + } + +def map_phenotypic_feature(entry): + """ + Maps a flat REDCap entry to the PhenotypicFeature schema. + """ + return { + "snomed_8116006": entry.get("snomed_8116006", ""), + "snomed_363778006": entry.get("snomed_363778006", ""), + "snomed_8116006_onset": entry.get("snomed_8116006_onset", ""), + "snomed_8116006_resolution": entry.get("snomed_8116006_resolution", ""), + "hp_0003674": entry.get("hp_0003674", ""), + "hp_0011008": entry.get("hp_0011008", ""), + "hp_0012824": entry.get("hp_0012824", ""), + "hp_0012823_hp1": entry.get("hp_0012823_hp1", ""), + "hp_0012823_hp2": entry.get("hp_0012823_hp2", ""), + "hp_0012823_hp3": entry.get("hp_0012823_hp3", ""), + "hp_0012823_ncbitaxon": add_prefix_to_code( + entry.get("hp_0012823_ncbitaxon", ""), "NCBITAXON"), + "hp_0012823_snomed": add_prefix_to_code( + entry.get("hp_0012823_snomed", ""), "SNOMEDCT"), + "phenotypicfeature_evidence": entry.get("phenotypicfeature_evidence", ""), + "rarelink_6_2_phenotypic_feature_complete": entry.get( + "rarelink_6_2_phenotypic_feature_complete", "") + } + + +def map_measurements(entry): + """ + Maps a flat REDCap entry to the Measurement schema with appropriate prefixes + and additional fields. + """ + return { + "measurement_category": entry.get("measurement_category", ""), + "measurement_status": entry.get("measurement_status", ""), + "ncit_c60819": add_prefix_to_code(entry.get("ncit_c60819", ""), "LOINC"), + "ln_85353_1": add_prefix_to_code(entry.get( + "ln_85353_1", ""), "LOINC"), + "ln_85353_1_other": entry.get( + "ln_85353_1_other", ""), + "ncit_c25712": float(entry.get( + "ncit_c25712", 0)) if entry.get("ncit_c25712") else None, + "ncit_c92571": process_prefix(entry.get("ncit_c92571", ""), "UO"), + "ncit_c41255": entry.get( "ncit_c41255", ""), + "ncit_c82577": entry.get("ncit_c82577", ""), + "snomed_122869004_ncit": entry.get( + "snomed_122869004_ncit", ""), + "snomed_122869004_snomed": add_prefix_to_code(entry.get( + "snomed_122869004_snomed", ""), "SNOMEDCT"), + "snomed_122869004": add_prefix_to_code(entry.get( + "snomed_122869004", ""), "SNOMEDCT"), + "snomed_122869004_bodysite": add_prefix_to_code(entry.get( + "snomed_122869004_bodysite", ""), "SNOMEDCT"), + "snomed_122869004_status": entry.get("snomed_122869004_status", ""), + "rarelink_6_3_measurements_complete": entry.get( + "rarelink_6_3_measurements_complete", ""), + } + +def map_family_history(entry): + """ + Maps a flat REDCap entry to the FamilyHistory schema. + """ + return { + "family_history_pseudonym": entry.get("family_history_pseudonym", ""), + "propositus": entry.get("snomed_64245008", ""), + "relationship_to_index_case": entry.get("snomed_408732007", ""), + "consanguinity": entry.get("snomed_842009", ""), + "family_member_relationship": entry.get("snomed_444018008", ""), + "family_member_record_status": entry.get("hl7fhir_fmh_status", ""), + "family_member_sex": entry.get("loinc_54123_5", ""), + "family_member_age": int(entry.get("loinc_54141_7", 0)) if entry.get( + "loinc_54141_7") else None, + "family_member_dob": entry.get("loinc_54124_3", ""), + "family_member_deceased": entry.get("snomed_740604001", ""), + "family_member_cause_of_death": add_prefix_to_code(entry.get( + "loinc_54112_8", ""), "ICD10CM"), + "family_member_deceased_age": int(entry.get( + "loinc_92662_6", 0)) if entry.get("loinc_92662_6") else None, + "family_member_disease": entry.get("loinc_75315_2", ""), + "rarelink_6_4_family_history_complete": entry.get( + "rarelink_6_4_family_history_complete", "") + } + +def map_consent(entry): + """ + Maps a flat REDCap entry to the Consent schema. + """ + return { + "consent_status": entry.get("snomed_309370004", ""), + "consent_date": entry.get("hl7fhir_consent_datetime", ""), + "health_policy_monitoring": entry.get("snomed_386318002", ""), + "agreement_to_contact": entry.get("rarelink_consent_contact", ""), + "consent_to_reuse_data": entry.get("rarelink_consent_data", ""), + "biological_sample": entry.get("snomed_123038009", ""), + "biobank_link": entry.get("rarelink_biobank_link", ""), + "rarelink_7_consent_complete": entry.get( + "rarelink_7_consent_complete", "") + } + +def map_disability(entry): + """ + Maps a flat REDCap entry to the Disability schema. + """ + return { + "icf_score": entry.get("rarelink_icf_score", ""), + "rarelink_8_disability_complete": entry.get( + "rarelink_8_disability_complete", "") + } + + +MAPPING_FUNCTIONS = { + "formal_criteria": map_formal_criteria, + "personal_information": map_personal_information, + "patient_status": map_patient_status, + "care_pathway": map_care_pathway, + "disease": map_disease, + "genetic_findings": map_genetic_findings, + "phenotypic_feature": map_phenotypic_feature, + "measurements": map_measurements, + "family_history": map_family_history, + "consent": map_consent, + "disability": map_disability +} \ No newline at end of file diff --git a/src/rarelink_cdm/v2_0_0_dev0/processing/preprocess_redcap_json.py b/src/rarelink_cdm/v2_0_0_dev0/processing/preprocess_redcap_json.py index 62f813b6..748e05d0 100644 --- a/src/rarelink_cdm/v2_0_0_dev0/processing/preprocess_redcap_json.py +++ b/src/rarelink_cdm/v2_0_0_dev0/processing/preprocess_redcap_json.py @@ -1,232 +1,6 @@ import json from collections import defaultdict - - -# Schema-specific mappers -def map_formal_criteria(entry): - """ - Maps a flat REDCap entry to the FormalCriteria schema. - """ - return { - "snomed_422549004": entry.get("snomed_422549004", ""), - "snomed_399423000": entry.get("snomed_399423000", ""), - "rarelink_1_formal_criteria_complete": entry.get("rarelink_1_formal_criteria_complete", "") - } - - -def map_personal_information(entry): - """ - Maps a flat REDCap entry to the PersonalInformation schema. - """ - return { - "snomed_184099003": entry.get("snomed_184099003", ""), - "snomed_281053000": entry.get("snomed_281053000", ""), - "snomed_1296886006": entry.get("snomed_1296886006", ""), - "snomed_263495000": entry.get("snomed_263495000", ""), - "snomed_370159000": entry.get("snomed_370159000", ""), - "rarelink_2_personal_information_complete": entry.get("rarelink_2_personal_information_complete", "") - } - - -def map_patient_status(entry): - """ - Maps a flat REDCap entry to the PatientStatus schema. - """ - def convert_to_boolean(value): - """ - Convert SNOMED codes for yes/no to Boolean values. - """ - yes_no_mapping = { - "snomed_373066001": True, # Yes - "snomed_373067005": False # No - } - return yes_no_mapping.get(value, None) - - return { - "patient_status_date": entry.get("patient_status_date", ""), - "snomed_278844005": entry.get("snomed_278844005", ""), - "snomed_398299004": entry.get("snomed_398299004", ""), - "snomed_184305005": entry.get("snomed_184305005", ""), - "snomed_105727008": entry.get("snomed_105727008", ""), - "snomed_412726003": entry.get("snomed_412726003", ""), - "snomed_723663001": convert_to_boolean(entry.get("snomed_723663001", "")), - "rarelink_3_patient_status_complete": entry.get("rarelink_3_patient_status_complete", "") - } - - -def map_care_pathway(entry): - """ - Maps a flat REDCap entry to the CarePathway schema. - """ - return { - "hl7fhir_enc_period_start": entry.get("hl7fhir_enc_period_start", ""), - "hl7fhir_enc_period_end": entry.get("hl7fhir_enc_period_end", ""), - "snomed_305058001": entry.get("snomed_305058001", ""), - "hl7fhir_encounter_class": entry.get("hl7fhir_encounter_class", ""), - "rarelink_4_care_pathway_complete": entry.get("rarelink_4_care_pathway_complete", "") - } - -def map_disease(entry): - """ - Maps a flat REDCap entry to the Disease schema. - """ - return { - "disease_coding": entry.get("disease_coding", ""), - "snomed_64572001_mondo": entry.get("snomed_64572001_mondo", ""), - "snomed_64572001_ordo": entry.get("snomed_64572001_ordo", ""), - "snomed_64572001_icd10cm": entry.get("snomed_64572001_icd10cm", ""), - "snomed_64572001_icd11": entry.get("snomed_64572001_icd11", ""), - "snomed_64572001_omim_p": entry.get("snomed_64572001_omim_p", ""), - "loinc_99498_8": entry.get("loinc_99498_8", ""), - "snomed_424850005": entry.get("snomed_424850005", ""), - "snomed_298059007": entry.get("snomed_298059007", ""), - "snomed_423493009": entry.get("snomed_423493009", ""), - "snomed_432213005": entry.get("snomed_432213005", ""), - "snomed_363698007": entry.get("snomed_363698007", ""), - "snomed_263493007": entry.get("snomed_263493007", ""), - "snomed_246112005": entry.get("snomed_246112005", ""), - "rarelink_5_disease_complete": entry.get("rarelink_5_disease_complete", "") - } - -def map_genetic_findings(entry): - """ - Maps a flat REDCap entry to the GeneticFindings schema. - """ - - def convert_to_boolean(value): - """ - Convert SNOMED codes for yes/no to Boolean values. - """ - yes_no_mapping = { - "yes": True, # Yes - "no": False # No - } - return yes_no_mapping.get(value, None) - - return { - "genetic_diagnosis_code": entry.get("genetic_diagnosis_code", ""), - "snomed_106221001_mondo": entry.get("snomed_106221001_mondo", ""), - "snomed_106221001_omim_p": entry.get("snomed_106221001_omim_p", ""), - "ga4gh_progress_status": entry.get("ga4gh_progress_status", ""), - "ga4gh_interp_status": entry.get("ga4gh_interp_status", ""), - "loinc_81304_8": entry.get("loinc_81304_8", ""), - "loinc_62374_4": entry.get("loinc_62374_4", ""), - "loinc_lp7824_8": entry.get("loinc_lp7824_8", ""), - "variant_expression": entry.get("variant_expression", ""), - "loinc_81290_9": entry.get("loinc_81290_9", ""), - "loinc_48004_6": entry.get("loinc_48004_6", ""), - "loinc_48005_3": entry.get("loinc_48005_3", ""), - "variant_validation": convert_to_boolean(entry.get("variant_validation", "")), - "loinc_48018_6": entry.get("loinc_48018_6", ""), - "loinc_48018_6_label": entry.get("loinc_48018_6_label", ""), - "loinc_53034_5": entry.get("loinc_53034_5", ""), - "loinc_53034_5_other": entry.get("loinc_53034_5_other", ""), - "loinc_48002_0": entry.get("loinc_48002_0", ""), - "loinc_48019_4": entry.get("loinc_48019_4", ""), - "loinc_48019_4_other": entry.get("loinc_48019_4_other", ""), - "loinc_53037_8": entry.get("loinc_53037_8", ""), - "ga4gh_therap_action": entry.get("ga4gh_therap_action", ""), - "loinc_93044_6": entry.get("loinc_93044_6", ""), - "rarelink_6_1_genetic_findings_complete": entry.get("rarelink_6_1_genetic_findings_complete", "") - } - -def map_phenotypic_feature(entry): - """ - Maps a flat REDCap entry to the PhenotypicFeature schema. - """ - return { - "snomed_8116006": entry.get("phenotypic_feature", ""), - "snomed_363778006": entry.get("status", ""), - "snomed_8116006_onset": entry.get("determination_date", ""), - "snomed_8116006_resolution": entry.get("resolution_date", ""), - "hp_0003674": entry.get("age_of_onset", ""), - "hp_0011008": entry.get("temporal_pattern", ""), - "hp_0012824": entry.get("severity", ""), - "hp_0012823_hp1": entry.get("clinical_modifier_hp1", ""), - "hp_0012823_hp2": entry.get("clinical_modifier_hp2", ""), - "hp_0012823_hp3": entry.get("clinical_modifier_hp3", ""), - "hp_0012823_ncbitaxon": entry.get("causing_organism", ""), - "hp_0012823_snomed": entry.get("primary_body_site", ""), - "phenotypicfeature_evidence": entry.get("evidence", ""), - "rarelink_6_2_phenotypic_feature_complete": entry.get("rarelink_6_2_phenotypic_feature_complete", "") - } - - -def map_measurement(entry): - """ - Maps a flat REDCap entry to the Measurement schema. - """ - return { - "assay": entry.get("ncit_c60819", ""), - "value": float(entry.get("ncit_c25712", 0)) if entry.get("ncit_c25712") else None, - "value_unit": entry.get("ncit_c92571", ""), - "interpretation": entry.get("ncit_c41255", ""), - "time_observed": entry.get("ncit_c82577", ""), - "procedure_ncit": entry.get("snomed_122869004_ncit", ""), - "procedure_snomed": entry.get("snomed_122869004_snomed", ""), - "rarelink_6_3_measurements_complete": entry.get("rarelink_6_3_measurements_complete", "") - } - -def map_family_history(entry): - """ - Maps a flat REDCap entry to the FamilyHistory schema. - """ - return { - "family_history_pseudonym": entry.get("family_history_pseudonym", ""), - "propositus": entry.get("snomed_64245008", ""), - "relationship_to_index_case": entry.get("snomed_408732007", ""), - "consanguinity": entry.get("snomed_842009", ""), - "family_member_relationship": entry.get("snomed_444018008", ""), - "family_member_record_status": entry.get("hl7fhir_fmh_status", ""), - "family_member_sex": entry.get("loinc_54123_5", ""), - "family_member_age": int(entry.get("loinc_54141_7", 0)) if entry.get("loinc_54141_7") else None, - "family_member_dob": entry.get("loinc_54124_3", ""), - "family_member_deceased": entry.get("snomed_740604001", ""), - "family_member_cause_of_death": entry.get("loinc_54112_8", ""), - "family_member_deceased_age": int(entry.get("loinc_92662_6", 0)) if entry.get("loinc_92662_6") else None, - "family_member_disease": entry.get("loinc_75315_2", ""), - "rarelink_6_4_family_history_complete": entry.get("rarelink_6_4_family_history_complete", "") - } - -def map_consent(entry): - """ - Maps a flat REDCap entry to the Consent schema. - """ - return { - "consent_status": entry.get("snomed_309370004", ""), - "consent_date": entry.get("hl7fhir_consent_datetime", ""), - "health_policy_monitoring": entry.get("snomed_386318002", ""), - "agreement_to_contact": entry.get("rarelink_consent_contact", ""), - "consent_to_reuse_data": entry.get("rarelink_consent_data", ""), - "biological_sample": entry.get("snomed_123038009", ""), - "biobank_link": entry.get("rarelink_biobank_link", ""), - "rarelink_7_consent_complete": entry.get("rarelink_7_consent_complete", "") - } - -def map_disability(entry): - """ - Maps a flat REDCap entry to the Disability schema. - """ - return { - "icf_score": entry.get("rarelink_icf_score", ""), - "rarelink_8_disability_complete": entry.get("rarelink_8_disability_complete", "") - } - - -# Modular mapping dispatcher -MAPPING_FUNCTIONS = { - "formal_criteria": map_formal_criteria, - "personal_information": map_personal_information, - "patient_status": map_patient_status, - "care_pathway": map_care_pathway, - "disease": map_disease, - "genetic_findings": map_genetic_findings, - "phenotypic_feature": map_phenotypic_feature, - "measurement": map_measurement, - "family_history": map_family_history, - "consent": map_consent, - "disability": map_disability -} +from .map_to_linkml_schema import MAPPING_FUNCTIONS def preprocess_flat_data(flat_data, mapping_functions): """ @@ -281,8 +55,8 @@ def preprocess_flat_data(flat_data, mapping_functions): repeated_element["genetic_findings"] = mapping_functions["genetic_findings"](entry) elif entry["redcap_repeat_instrument"] == "rarelink_6_2_phenotypic_feature": repeated_element["phenotypic_feature"] = mapping_functions["phenotypic_feature"](entry) - elif entry["redcap_repeat_instrument"] == "rarelink_6_3_measurement": - repeated_element["measurement"] = mapping_functions["measurement"](entry) + elif entry["redcap_repeat_instrument"] == "rarelink_6_3_measurements": + repeated_element["measurements"] = mapping_functions["measurements"](entry) elif entry["redcap_repeat_instrument"] == "rarelink_6_4_family_history": repeated_element["family_history"] = mapping_functions["family_history"](entry) record["repeated_elements"].append(repeated_element) diff --git a/src/rarelink_cdm/v2_0_0_dev0/schema_definitions/rarelink_6_1_genetic_findings.yaml b/src/rarelink_cdm/v2_0_0_dev0/schema_definitions/rarelink_6_1_genetic_findings.yaml index 8d646064..756b19f9 100644 --- a/src/rarelink_cdm/v2_0_0_dev0/schema_definitions/rarelink_6_1_genetic_findings.yaml +++ b/src/rarelink_cdm/v2_0_0_dev0/schema_definitions/rarelink_6_1_genetic_findings.yaml @@ -35,7 +35,6 @@ classes: - loinc_48005_3 - variant_validation - loinc_48018_6 - - loinc_48018_6_label - loinc_53034_5 - loinc_53034_5_other - loinc_48002_0 @@ -117,11 +116,6 @@ slots: range: string required: false - loinc_48018_6_label: - description: Gene label - range: string - required: false - loinc_53034_5: description: Zygosity of the genetic variant range: Zygosity diff --git a/src/rarelink_cdm/v2_0_0_dev0/schema_definitions/rarelink_6_3_measurements.yaml b/src/rarelink_cdm/v2_0_0_dev0/schema_definitions/rarelink_6_3_measurements.yaml index 49a0cc35..801bb68a 100644 --- a/src/rarelink_cdm/v2_0_0_dev0/schema_definitions/rarelink_6_3_measurements.yaml +++ b/src/rarelink_cdm/v2_0_0_dev0/schema_definitions/rarelink_6_3_measurements.yaml @@ -17,61 +17,114 @@ classes: Measurement: description: > The section Measurements (6.3) of the RareLink CDM. This section captures - assay-related measurements and their corresponding values, units, interpretations, and procedures. + assay-related measurements and their corresponding values, units, + interpretations, and procedures. slots: - - assay - - value - - value_unit - - interpretation - - time_observed - - procedure_ncit - - procedure_snomed + - measurement_category + - measurement_status + - ncit_c60819 + - ln_85353_1 + - ln_85353_1_other + - ncit_c25712 + - ncit_c92571 + - ncit_c41255 + - ncit_c82577 + - snomed_122869004_ncit + - snomed_122869004_snomed + - snomed_122869004 + - snomed_122869004_bodysite + - snomed_122869004_status - rarelink_6_3_measurements_complete + + slots: - assay: + measurement_category: + description: > + Category of the measurement (e.g., social history, vital signs, imaging). + Refer to the RareLink documentation for guidance. + range: string + required: true + + measurement_status: + description: > + The status of the measurement (e.g., registered, preliminary, final). + range: string + required: false + + ncit_c60819: description: > The assay used in the measurement, encoded with LOINC. range: string required: true - value: + ln_85353_1: + description: > + Specific vital sign measurement, encoded with LOINC. + range: string + required: false + + ln_85353_1_other: + description: > + Additional vital signs information if "Other" is selected. + range: string + required: false + + ncit_c25712: description: > The measurement value, entered as a two-decimal number. range: double required: true - value_unit: + ncit_c92571: description: > The unit of the measurement, encoded with Unit Ontology (UO). range: string required: true - interpretation: + ncit_c41255: description: > The interpretation of the measurement, encoded with NCIT. range: string required: false - time_observed: + ncit_c82577: description: > The time when the measurement was observed, in YYYY-MM-DD format. range: date required: false - procedure_ncit: + snomed_122869004_ncit: description: > The procedure related to the measurement, encoded with NCIT. range: string required: false - procedure_snomed: + snomed_122869004_snomed: + description: > + The procedure related to the measurement, encoded with SNOMEDCT. + range: string + required: false + + snomed_122869004: + description: > + The procedure related to the measurement, encoded with SNOMEDCT + range: string + required: false + + snomed_122869004_bodysite: + description: > + The anatomical site of the procedure, encoded with SNOMED. + range: string + required: false + + snomed_122869004_status: description: > - The procedure related to the measurement, encoded with SNOMED. + The status of the procedure (e.g., preparation, in-progress, completed). range: string required: false rarelink_6_3_measurements_complete: - description: Completion status of this section + description: Completion status of this section. range: string required: true diff --git a/src/rarelink_cdm/v2_0_0_dev0/schema_definitions/rarelink_repeated_elements.yaml b/src/rarelink_cdm/v2_0_0_dev0/schema_definitions/rarelink_repeated_elements.yaml index 311cc02f..5622a5e5 100644 --- a/src/rarelink_cdm/v2_0_0_dev0/schema_definitions/rarelink_repeated_elements.yaml +++ b/src/rarelink_cdm/v2_0_0_dev0/schema_definitions/rarelink_repeated_elements.yaml @@ -30,7 +30,7 @@ classes: - disease - genetic_findings - phenotypic_feature - - measruements + - measurements - family_history slots: @@ -74,7 +74,7 @@ slots: range: PhenotypicFeature required: false - measruements: + measurements: description: > Measurement data for the repeated element. range: Measurement diff --git a/tests/cli/redcap_setup/test_api.py b/tests/cli/redcap_setup/test_api.py index 43faeb8c..b96978be 100644 --- a/tests/cli/redcap_setup/test_api.py +++ b/tests/cli/redcap_setup/test_api.py @@ -47,25 +47,6 @@ def test_redcap_api_config_start_project_created(temp_config_file, monkeypatch): assert config["api_super_token"] == "" -def test_redcap_api_config_start_with_super_token( - temp_config_file, monkeypatch): - monkeypatch.setattr("rarelink.cli.redcap_setup.api_config.CONFIG_FILE", - temp_config_file) - - with patch("rarelink.cli.redcap_setup.api_config.masked_input", - side_effect=["mock_token", "mock_super_token"]): - result = runner.invoke( - redcap_api_config_app, - ["start"], - input="y\nhttp://example.com/redcap/api/\ny\n", - ) - assert result.exit_code == 0 - assert "✅ REDCap API configuration saved locally" in result.stdout - - config = json.loads(temp_config_file.read_text()) - assert config["api_super_token"] == "mock_super_token" - - def test_redcap_api_config_view_no_config(temp_config_file, monkeypatch): """ Test the `redcap-setup api-config view` command when no configuration exists. diff --git a/tests/conftest.py b/tests/conftest.py index 0e44de61..0a4270f3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,4 +10,23 @@ def pytest_ignore_collect(collection_path, config): "submodules/rd-cdm/docs" in str(collection_path) ): return True - return False \ No newline at end of file + return False + +def set_bioportal_api_key(): + """ + Ensures the BioPortal API key is available for tests by setting it + as an environment variable or creating the Oaklib configuration file. + """ + # Fetch the API key from an environment variable or a secret + api_key = os.getenv("BIOPORTAL_API_KEY") + if not api_key: + raise ValueError("BioPortal API key not found. Please set the BIOPORTAL_API_KEY environment variable.") + + # Option 1: Set the environment variable for Oaklib + os.environ["BIOPORTAL_API_KEY"] = api_key + + # Option 2: Create the configuration file Oaklib expects + config_dir = Path.home() / ".config" / "ontology-access-kit" + config_dir.mkdir(parents=True, exist_ok=True) + config_file = config_dir / "bioportal-apikey.txt" + config_file.write_text(api_key) \ No newline at end of file diff --git a/tests/utils/preprocessing/test_fetch_displays.py b/tests/utils/preprocessing/test_fetch_displays.py new file mode 100644 index 00000000..5b93d89d --- /dev/null +++ b/tests/utils/preprocessing/test_fetch_displays.py @@ -0,0 +1,95 @@ +import pytest +from rarelink.utils.preprocessing.fetch_displays import fetch_label_for_code, fetch_label_directly, batch_fetch_labels +import os +from dotenv import load_dotenv + +# Load the environment variables +load_dotenv() +bioportal_token = os.getenv("BIOPORTAL_API_TOKEN") + +if not bioportal_token: + raise ValueError("BioPortal API token not found. Please set it in the .env file.") + +@pytest.mark.parametrize("code, expected_label", [ + ("HP:0000118", "Phenotypic abnormality"), + ("ICD10CM:G46.4", "Cerebellar stroke syndrome"), # add prefix for processing + ("ORPHA:84", "Fanconi anemia"), + ("SNOMEDCT:106221001", "Genetic finding"), # add prefix for processing + ("MONDO:0019391", "Fanconi anemia"), + ("OMIM:601622", "TWIST FAMILY bHLH TRANSCRIPTION FACTOR 1"), # add prefix for processing + ("LOINC:62374-4", "Human reference sequence assembly release number:ID:Pt:Bld/Tiss:Nom:Molgen"), # add prefix for processing + ("Thesaurus:C3262", "Neoplasm"), + ("NCBITAXON:1279", "Staphylococcus"), # add prefix for processing + ("HGNC:4238", "GFI1B"), + ("ECO:0000180", "clinical study evidence"), + ("UO:0000276", "amount per container"), # convert prefix from UO_ to UO: for processing +]) + +def test_fetch_label_for_code(code, expected_label): + """ + Tests the `fetch_label_for_code` function by verifying that it returns + the correct label for given ontology codes. + + Args: + code (str): The ontology code to fetch the display label for. + expected_label (str): The expected label corresponding to the code. + + Raises: + AssertionError: If the fetched label does not match the expected label. + """ + # Use fetch_label_directly for specific cases + if ( + code.startswith("ORPHA:") + or code.startswith("HGNC:") + or code.startswith("Thesaurus:") + or code.startswith("NCBITAXON:") + ): + label = fetch_label_directly(code) + else: + label = fetch_label_for_code(code) + + # Validate the fetched label + if expected_label is None: + assert label is None, f"Expected None for {code}, but got {label}" + else: + assert label == expected_label, f"Label for {code} was {label}, expected {expected_label}" + + + +@pytest.mark.parametrize("codes, expected_labels", [ + ([ + "HP:0000118", # Directly resolvable + "ICD10CM:G46.4", # Prefixed for processing + "ORPHA:84", # ORDO + "SNOMEDCT:106221001", # Prefixed for processing + "MONDO:0019391", # Directly resolvable + "OMIM:601622", # Prefixed for processing + "LOINC:62374-4", # Prefixed for processing + "Thesaurus:C3262", # NCIT mapped + "NCBITAXON:1279", # Prefixed for processing + "HGNC:4238", # HGNC-NR + "ECO:0000180", # Directly resolvable + "UO:0000276" # Prefix converted + ], [ + "Phenotypic abnormality", + "Cerebellar stroke syndrome", + "Fanconi anemia", + "Genetic finding", + "Fanconi anemia", + "TWIST FAMILY bHLH TRANSCRIPTION FACTOR 1", + "Human reference sequence assembly release number:ID:Pt:Bld/Tiss:Nom:Molgen", + "Neoplasm", + "Staphylococcus", + "GFI1B", + "clinical study evidence", + "amount per container" + ]) +]) +def test_batch_fetch_labels(codes, expected_labels): + """ + Tests the `batch_fetch_labels` function by verifying it returns correct + labels for multiple ontology codes in a single call. + """ + labels = batch_fetch_labels(codes) + assert labels == expected_labels, f"Labels were {labels}, expected {expected_labels}" +