diff --git a/.gitignore b/.gitignore index f466384b..9edacbc0 100755 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,6 @@ py36 /netMHCIIpan-3.2.Linux.tar.gz /netMHCIIpan-4.0.Linux.tar.gz /netMHCpan-4.1b.Linux.tar.gz +neofox.log +*.swp +./test_* diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6cb37d14..8be0d4ae 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -68,4 +68,3 @@ publish_package: - TWINE_PASSWORD=${CI_JOB_TOKEN} TWINE_USERNAME=gitlab-ci-token python -m twine upload --repository-url https://gitlab.rlp.net/api/v4/projects/${CI_PROJECT_ID}/packages/pypi dist/* only: - develop - - master diff --git a/Dockerfile b/Dockerfile index ffaf4c61..8a156894 100644 --- a/Dockerfile +++ b/Dockerfile @@ -50,7 +50,7 @@ RUN tar -xvf netMHCIIpan-4.0.Linux.tar.gz RUN sed -i 's/\/net\/sund-nas.win.dtu.dk\/storage\/services\/www\/packages\/netMHCIIpan\/4.0\/netMHCIIpan-4.0/\/app\/netMHCIIpan-4.0/g' /app/netMHCIIpan-4.0/netMHCIIpan RUN sed -i 's/ \/tmp\//\/app\/netMHCIIpan-4.0\/tmp/g' /app/netMHCIIpan-4.0/netMHCIIpan RUN mkdir /app/netMHCIIpan-4.0/tmp -RUN wget http://www.cbs.dtu.dk/services/NetMHCIIpan-4.0/data.tar.gz -O /app/netMHCIIpan-4.0/data.tar.gz +RUN wget https://services.healthtech.dtu.dk/services/NetMHCIIpan-4.0/data.tar.gz -O /app/netMHCIIpan-4.0/data.tar.gz RUN tar -xvf /app/netMHCIIpan-4.0/data.tar.gz -C /app/netMHCIIpan-4.0 ENV NEOFOX_NETMHC2PAN /app/netMHCIIpan-4.0/netMHCIIpan RUN apt-get install tcsh @@ -71,7 +71,7 @@ ENV NEOFOX_MIXMHC2PRED /app/MixMHC2pred-1.2/MixMHC2pred_unix # install prime RUN wget https://github.com/GfellerLab/PRIME/archive/master.tar.gz RUN tar -xvf master.tar.gz -RUN sed -i 's/\/app\/PRIME/\/app\/PRIME-master/g' /app/PRIME-master/PRIME +RUN sed -i 's/PATH_TO_PRIME/\/app\/PRIME-master/g' /app/PRIME-master/PRIME ENV NEOFOX_PRIME /app/PRIME-master/PRIME # configure references diff --git a/README.md b/README.md index 957d1947..0d92c909 100755 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [![DOI](https://zenodo.org/badge/294667387.svg)](https://zenodo.org/badge/latestdoi/294667387) [![PyPI version](https://badge.fury.io/py/neofox.svg)](https://badge.fury.io/py/neofox) +[![Anaconda-Server Badge](https://anaconda.org/bioconda/neofox/badges/version.svg)](https://anaconda.org/bioconda/neofox) [![Documentation Status](https://readthedocs.org/projects/neofox/badge/?version=latest)](https://neofox.readthedocs.io/en/latest/?badge=latest) @@ -51,7 +52,7 @@ NeoFox covers the following neoantigen features and prediction algorithms: NeoFox depends on the following tools: -- Python >=3.6, <=3.8 +- Python >=3.7, <=3.8 - R 3.6.0 - BLAST 2.10.1 - netMHCpan 4.1 @@ -60,13 +61,23 @@ NeoFox depends on the following tools: - MixMHC2pred 1.2 - PRIME 1.0 +Install from PyPI: +``` +pip install neofox +``` + +Or install from bioconda: +``` +conda install bioconda::neofox +``` + ## 3 Usage from the command line NeoFox can be used from the command line as shown below or programmatically (see [https://neofox.readthedocs.io](https://neofox.readthedocs.io/) for more information). ````commandline -neofox --candidate-file/--json-file neoantigens_candidates.tab/neoantigens_candidates.json --patient-data/--patient-data-json patient_data.txt/patient_data.json --output-folder /path/to/out --output-prefix out_prefix [--patient-id] [--with-table] [--with-json] [--num_cpus] [--affinity-threshold] +neofox --candidate-file/--json-file neoantigens_candidates.tab/neoantigens_candidates.json --patient-data/--patient-data-json patient_data.txt/patient_data.json --output-folder /path/to/out --output-prefix out_prefix [--patient-id] [--with-table] [--with-json] [--num-cpus] [--affinity-threshold] ```` - `--candidate-file`: tab-separated values table with neoantigen candidates represented by long mutated peptide sequences as described [here](#41-neoantigen-candidates-in-tabular-format) - `--json-file`: JSON file neoantigens in NeoFox model format as described [here](#42-neoantigen-candidates-in-json-format) @@ -76,7 +87,7 @@ neofox --candidate-file/--json-file neoantigens_candidates.tab/neoantigens_candi - `--output-prefix`: prefix for the output files (*optional*) - `--with-table`: output file in tab-separated format (*default*) - `--with-json`: output file in JSON format (*optional*) -- `--num_cpus`: number of CPUs to use (*optional*) +- `--num-cpus`: number of CPUs to use (*optional*) - `--config`: a config file with the paths to dependencies as shown below (*optional*) - `--organism`: the organism to which the data corresponds. Possible values: [human, mouse]. Default value: human - `--affinity-threshold`: a affinity value (*optional*) neoantigen candidates with a best predicted affinity greater than or equal than this threshold will be not annotated with features that specifically model diff --git a/docs/resources/column_description.xlsx b/docs/resources/column_description.xlsx index 3747ff6d..5962041e 100755 Binary files a/docs/resources/column_description.xlsx and b/docs/resources/column_description.xlsx differ diff --git a/docs/source/01_overview.md b/docs/source/01_overview.md index 1490d10d..ec3cef1d 100644 --- a/docs/source/01_overview.md +++ b/docs/source/01_overview.md @@ -4,6 +4,7 @@ Welcome to the documentation of **NeoFox**! [![DOI](https://zenodo.org/badge/294667387.svg)](https://zenodo.org/badge/latestdoi/294667387) [![PyPI version](https://badge.fury.io/py/neofox.svg)](https://badge.fury.io/py/neofox) +[![Anaconda-Server Badge](https://anaconda.org/bioconda/neofox/badges/version.svg)](https://anaconda.org/bioconda/neofox) ## About NeoFox @@ -17,7 +18,7 @@ candidate to be a true neoantigen are required. Several neoantigen features that describe the ability of a neoantigen candidate to induce a T-cell response have been published in the last years. -**NeoFox** (**NEO**antigen **F**eature toolb**OX**) is a python package that annotates a given set of neoantigen candidate sequences with relevant neoantigen features. +**NeoFox** (**NEO**antigen **F**eature toolb**OX**) is a python package that annotates a given set of neoantigen candidate sequences with relevant neoantigen features. The annotation of neoepitope candidates is supported from NeoFox version 1.0.0. NeoFox supports annotation of neoantigen candidates derived from SNVs (single nucleotide variant) and alternative mutation classes such as INDELs or fusion genes. Furthermore, NeoFox supports both human and mouse derived neoantigen candidates. NeoFox covers neoepitope prediction by MHC binding and ligand prediction, similarity/foreignness of a neoepitope candidate sequence, combinatorial features and machine learning approaches. diff --git a/docs/source/02_installation.md b/docs/source/02_installation.md index 9d589a13..3414ca6a 100644 --- a/docs/source/02_installation.md +++ b/docs/source/02_installation.md @@ -22,7 +22,7 @@ the sites indicated below. Store these in the root folder of the repository, next to the `Dockerfile`. Do not rename the installer files. -Build the docker image: `docker build --tag neofox-docker .` +Build the docker image: `docker build --platform linux/amd64 --tag neofox-docker .` Run NeoFox: `docker run neofox-docker neofox --help` @@ -33,30 +33,54 @@ See the usage guide [here](03_03_usage.md) for further details. These installation instructions were tested on Ubuntu 18.04. -Python >=3.7, <=3.8 and R 3.6.0 should be preinstalled. +Python 3.7 or 3.8 should be preinstalled. -Set the environment variable pointing to `Rscript`. +The libz compression development library is required. This can be installed in Ubuntu as follows: ``` -export NEOFOX_RSCRIPT=`which Rscript` +apt-get install libz-dev ``` ### Install NeoFox +Install from PyPI: ``` pip install neofox ``` +or install from bioconda: +``` +conda install bioconda::neofox +``` + ### Install third-party dependencies + +#### Install R + +R 3.6.0 is required. + +Optionally set the environment variable pointing to `Rscript`, otherwise neofox will look for it in the path. +``` +export NEOFOX_RSCRIPT=`which Rscript` +``` + +**NOTE**: when installing from conda this dependency is already installed. + #### Install BLASTP The version of BLASTP that was tested is 2.10.1, other versions may work but that is untested. ``` wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.10.1/ncbi-blast-2.10.1+-x64-linux.tar.gz tar -xvf ncbi-blast-2.10.1+-x64-linux.tar.gz -export NEOFOX_BLASTP=`pwd`/ncbi-blast-2.10.1+/bin/blastp ``` +Optionally set the environment variable pointing to `blastp`, otherwise neofox will look for it in the path. +``` +export NEOFOX_BLASTP=/path/to/ncbi-blast-2.10.1+/bin/blastp +``` + +**NOTE**: when installing from conda this dependency is already installed. + #### Install NetMHCpan-4.1 NetMHCpan-4.1 can be downloaded by academic users from https://services.healthtech.dtu.dk/service.php?NetMHCpan-4.1 @@ -66,8 +90,11 @@ tar -xvf netMHCpan-4.1b.Linux.tar.gz cd netMHCpan-4.1 wget https://services.healthtech.dtu.dk/services/NetMHCpan-4.1/data.tar.gz tar -xvf data.tar.gz -cd .. -export NEOFOX_NETMHCPAN=`pwd`/netMHCpan-4.1/netMHCpan +``` + +Optionally set the environment variable pointing to `netMHCpan`, otherwise neofox will look for it in the path. +``` +export NEOFOX_NETMHCPAN=/path/to/netMHCpan-4.1/netMHCpan ``` Configure NetMHCpan as explained in the file `netMHCpan-4.1/netMHCpan-4.1.readme` @@ -83,12 +110,15 @@ cd netMHCIIpan-4.0 # download the data wget http://www.cbs.dtu.dk/services/NetMHCIIpan-4.0/data.Linux.tar.gz tar -xvf data.Linux.tar.gz -cd .. -export NEOFOX_NETMHC2PAN=`pwd`/netMHCIIpan-4.0/netMHCIIpan # install tcsh shell interpreter if not available yet sudo apt-get install tcsh ``` +Optionally set the environment variable pointing to `netMHCIIpan`, otherwise neofox will look for it in the path. +``` +export NEOFOX_NETMHC2PAN=/path/to/netMHCIIpan-4.0/netMHCIIpan +``` + Configure NetMHCIIpan-4.0 as explained in the file `netMHCIIpan-4.0/netMHCIIpan-4.0.readme` @@ -97,7 +127,12 @@ Configure NetMHCIIpan-4.0 as explained in the file `netMHCIIpan-4.0/netMHCIIpan- ``` wget https://github.com/GfellerLab/MixMHCpred/archive/v2.1.tar.gz tar -xvf v2.1.tar.gz -export NEOFOX_MIXMHCPRED=`pwd`/MixMHCpred-2.1/MixMHCpred +``` + +Set the environment variable pointing to `MixMHCpred`, there will be no search in the path as the installation folder +is also needed to determine the supported alleles. +``` +export NEOFOX_MIXMHCPRED=/path/to/MixMHCpred-2.1/MixMHCpred ``` Configure MixMHCpred-2.1 as explained in the file `MixMHCpred-2.1/README` @@ -107,6 +142,11 @@ Configure MixMHCpred-2.1 as explained in the file `MixMHCpred-2.1/README` ``` wget https://github.com/GfellerLab/MixMHC2pred/archive/v1.2.tar.gz tar -xvf v1.2.tar.gz +``` + +Set the environment variable pointing to `MixMHC2pred_unix`, there will be no search in the path as the installation +folder is also needed to determine the supported alleles. +``` export NEOFOX_MIXMHC2PRED=`pwd`/MixMHC2pred-1.2/MixMHC2pred_unix ``` @@ -115,6 +155,11 @@ export NEOFOX_MIXMHC2PRED=`pwd`/MixMHC2pred-1.2/MixMHC2pred_unix ``` wget https://github.com/GfellerLab/PRIME/archive/master.tar.gz tar -xvf master.tar.gz +``` + +Set the environment variable pointing to `PRIME`, there will be no search in the path as the installation folder +is also needed to determine the supported alleles. +``` export NEOFOX_PRIME==`pwd`/PRIME-master/PRIME ``` @@ -122,7 +167,8 @@ Configure PRIME as explained in the file `PRIME-master/README` ### Configuration of the reference folder -To configure the reference folder, set the environment variable for `makeblastdb`, NetMHCpan, NetMHCIIpan and Rscript: +To configure the reference folder, set the environment variables for `makeblastdb`, NetMHCpan, NetMHCIIpan and Rscript, + or alternatively rely on these being fetched from the path: ``` export NEOFOX_MAKEBLASTDB=`pwd`/ncbi-blast-2.10.1+/bin/makeblastdb @@ -143,6 +189,8 @@ Run the following to configure the NeoFox reference folder: neofox-configure --reference-folder /your/neofox/folder [--install-r-dependencies] ``` +**NOTE**: when installing from conda `--install-r-dependencies` is not needed. + The above command will install several resources and store in the annotations metadata their version, MD5 checksum and download timestamp. diff --git a/docs/source/03_01_input_data.md b/docs/source/03_01_input_data.md index d179acad..cdbd25c7 100644 --- a/docs/source/03_01_input_data.md +++ b/docs/source/03_01_input_data.md @@ -2,18 +2,23 @@ ## General information -NeoFox requires two input files: a file with neoantigen candidates and a file with patient data. +NeoFox requires two input files: a candidate file with neoantigen or neoepitope candidates and a file with patient data. The file with neoantigen candidates can be provided either in tabular format or in JSON format and this file may contain additional user-specific input that will be kept during the annotation process. The patient file requires a tabular format. -## File with neoantigen candidates +Alternatively, NeoFox may annotate a set of neoepitope candidates for which it will require a file with neoepitope +candidates and optionally a file with patient data. Both files are required in tabular format. -#### Tabular file format +## Candidate file + +### Tabular file format + +#### Neoantigen candidates This is an dummy example of a table with neoantigen candidates in tabular format: -| gene | mutation.wildTypeXmer | mutation.mutatedXmer | patientIdentifier | rnaExpression | rnaVariantAlleleFrequency | dnaVariantAlleleFrequency | external_annotation_1 | external_annotation_2 | -|-------|-----------------------------|-----------------------------|-------------------|---------------|---------------------------|---------------------------|-----------------------|-----------------------| +| gene | wildTypeXmer | mutatedXmer | patientIdentifier | rnaExpression | rnaVariantAlleleFrequency | dnaVariantAlleleFrequency | external_annotation_1 | external_annotation_2 | +|-------|-----------------------------|----------------------------|-------------------|---------------|---------------------------|---------------------------|-----------------------|-----------------------| | BRCA2 | AAAAAAAAAAAAALAAAAAAAAAAAAA | AAAAAAAAAAAAAFAAAAAAAAAAAAA | Ptx | 7.942 | 0.85 | 0.34 | some_value | some_value | | BRCA2 | AAAAAAAAAAAAAMAAAAAAAAAAAAA | AAAAAAAAAAAAARAAAAAAAAAAAAA | Ptx | 7.942 | 0.85 | 0.34 | some_value | some_value | | BRCA2 | AAAAAAAAAAAAAGAAAAAAAAAAAAA | AAAAAAAAAAAAAKAAAAAAAAAAAAA | Ptx | 7.942 | 0.85 | 0.34 | some_value | some_value | @@ -22,8 +27,8 @@ This is an dummy example of a table with neoantigen candidates in tabular format where: - `gene`: the HGNC gene symbol. (This field is not required for neoantigen candidates derived from other sources than SNVs) -- `mutation.mutatedXmer`: the neoantigen candidate sequence, i.e. the mutated amino acid sequence. In case of SNVs, the mutation should be located in the middle. We advise that the point mutation is flanked by 13 amino acid on both sites (IUPAC 1 respecting casing, eg: A) to cover both MHC I and MHC II neopeptides -- `mutation.wildTypeXmer`: the equivalent non-mutated amino acid sequence (IUPAC 1 respecting casing, eg: A). This field shall be empty, specially in the case of neoantigen candidates derived from other sources than SNVs. +- `mutatedXmer`: the neoantigen candidate sequence, i.e. the mutated amino acid sequence. In case of SNVs, the mutation should be located in the middle. We advise that the point mutation is flanked by 13 amino acid on both sites (IUPAC 1 respecting casing, eg: A) to cover both MHC I and MHC II neopeptides +- `wildTypeXmer`: the equivalent non-mutated amino acid sequence (IUPAC 1 respecting casing, eg: A). This field shall be empty, specially in the case of neoantigen candidates derived from other sources than SNVs. - `patientIdentifier`: the patient identifier - `rnaExpression`: RNA expression. (**optional**) (see *NOTE*) This value can be in any format chosen by the user (e.g. TPM, RPKM) but it is recommended to be consistent for data that should be compared. - `rnaVariantAlleleFrequency`: the variant allele frequency (VAF) calculated from the RNA (**optional**) @@ -35,21 +40,57 @@ where: - If `dnaVariantAlleleFrequency` is given while `rnaVariantAlleleFrequency` is not given, the VAF in RNA will be estimated by the VAF in DNA. This means that feature scores that rely on the VAF in RNA will be calulated with the VAF in DNA. +#### Neoepitope candidates + +This is an dummy example of a table with neoepitope candidates in tabular format: + +| gene | mutatedPeptide | wildTypePeptide | alleleMhcI | isoformMhcII | patientIdentifier | rnaExpression | rnaVariantAlleleFrequency | dnaVariantAlleleFrequency | +|-------|---------------------|-----------------------------|-------------|--------------|-------------------|---------------------------|---------------------------|---------------------------| +| BRCA2 | AAAALAAAAA | AAAAFAAAAA | HLA-A*01:01 | | Ptx | 7.942 | 0.85 | 0.34 | +| BRCA2 | AAAAAAAAAAAAAMAAAAAAAAAAAAA | AAAAAAAAAAAAARAAAAAAAAAAAAA | | DRB1*01:01 | Ptx | 7.942 | 0.85 | 0.34 | +| BRCA2 | AAAAGAAAAA | AAAAKAAAAA | | | Ptx | 7.942 | 0.85 | 0.34 | +| BRCA2 | AAAAAAAAAAAAACAAAAAAAAAAAAA | AAAAAAAAAAAAAEAAAAAAAAAAAAA | | | Ptx | 7.942 | 0.85 | 0.34 | +| BRCA2 | AAAAAAAAAAAAAKAAAAAAAAAAAAA | AAAAAAAAAAAAACAAAAAAAAAAAAA | | | Ptx | 7.942 | 0.85 | 0.34 | + +where: +- `mutatedPeptide`: the neoepitope candidate sequence, i.e. the mutated amino acid sequence. MHC-I neoepitopes should have a length between 8 and 14 amino acids, MHC-II neoepitopes should have a length between 9 and 20000 amino acids. +- `wildTypePeptide`: the equivalent non-mutated amino acid sequence (IUPAC 1 respecting casing, eg: A). This field shall be empty, specially in the case of neoepitope candidates derived from other sources than SNVs. +- `alleleMhcI`: the MHC-I allele to which this neoepitope is linked (**optional**) +- `isoformMhcII`: the MHC-II isoform to which this neoepitope is linked (**optional**) +- `patientIdentifier`: the patient identifier (**only required if alleleMhcI and isoformMhcII are not provided**) +- `gene`: the HGNC gene symbol. (This field is optional) +- `rnaExpression`: RNA expression. (**optional**) (see *NOTE*) This value can be in any format chosen by the user (e.g. TPM, RPKM) but it is recommended to be consistent for data that should be compared. +- `rnaVariantAlleleFrequency`: the variant allele frequency (VAF) calculated from the RNA (**optional**) +- `dnaVariantAlleleFrequency`: the VAF calculated from the DNA. (**optional**) + +**NOTE:** + +- Neoepitopes with a value for `alleleMhcI` are considered MHC-I neoepitopes, likewise neoepitopes with a value for `isoformMhcII` are considered MHC-II neoepitopes. Both fields cannot be provided for the same neoepitope. +- If none of `alleleMhcI` and `isoformMhcII` are provided then the `patientIdentifier` is required and one neoepitope sharing the same sequence will be annotated for each MHC-I allele and MHC-II isoform according to the patient HLA type. +- If rnaExpression is not provided and the tumor type is given in the patient data, expression will be estimated by gene expression in TCGA cohort indicated in the `tumorType` in the patient data (see below). Please, not that this does not work for mouse data. Here, expression imputation is currently not supported. +- If `dnaVariantAlleleFrequency` is given while `rnaVariantAlleleFrequency` is not given, the VAF in RNA will be estimated by the VAF in DNA. +This means that feature scores that rely on the VAF in RNA will be calulated with the VAF in DNA. + + ### JSON file format +#### Neoantigen candidates + Besides tabular format, neoantigen candidates can be provided as a list of neoantigen models in JSON format as shown below. To simplify, only one full neoantigen model is shown. The terminology follows the descriptions for the [tabular file format](#tabular-file-format). For a more detailed description of the models, please refer to [here](05_models.md): ```json [{ "patientIdentifier": "Ptx", "gene": "BRCA2", - "mutation": { - "wildTypeXmer": "AAAAAAAAAAAAALAAAAAAAAAAAAA", - "mutatedXmer": "AAAAAAAAAAAAAFAAAAAAAAAAAAA" - } + "wildTypeXmer": "AAAAAAAAAAAAALAAAAAAAAAAAAA", + "mutatedXmer": "AAAAAAAAAAAAAFAAAAAAAAAAAAA" }] ``` +#### Neoepitope candidates + +Not supported at the moment. + ## File with patient data ### Human @@ -125,3 +166,6 @@ A given allele is represented by a last small case single letter (eg: d, k, p) w These are examples of H-2 alleles: H2Kd, H2Dd, H2Lp + + + diff --git a/docs/source/03_02_output_data.md b/docs/source/03_02_output_data.md old mode 100644 new mode 100755 index 9900a3f7..6d612cc2 --- a/docs/source/03_02_output_data.md +++ b/docs/source/03_02_output_data.md @@ -1,10 +1,10 @@ # Output data -## General information +## Neoantigens NeoFox returns the neoantigen candidates and their annotated features as output. -Two output formats are supported: [tabular](#tabular-format ) tabular format or [json](#json-format) format. -The user can choose one preferred format or get the neoantigen annotations in all formats. +Two output formats are supported: tabular format or JSON format. +The user gets the neoantigen annotations in all formats. Despite different structures, both formats provide the same content with the exception of the metadata on the annotations which is only present in the JSON format. @@ -12,130 +12,133 @@ The following table describes each of the annotations in the output: **TABLE 1** -| Column Name | Description | Feature group/ Paper | -|---------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------| -| identifier | unique neoantigen id given by NeoFox | - | -| dnaVariantAlleleFrequency | the variant allele frequency calculated from the DNA | - | -| mutation.mutatedXmer | the long mutated amino acid sequence | - | -| mutation.wildTypeXmer | the long non-mutated amino acid sequence. This field shal be empty for alternative neoantigen classes | - | -| patientIdentifier | the patient identifier | - | -| rnaExpression | the RNA expression. If expression was imputed, this will will be `imputedGeneExpression` | expression | -| imputedGeneExpression | median gene expression in the TCGA cohort of the tumor entity provided in the patient file. | expression | -| rnaVariantAlleleFrequency | the variant allele frequency calculated from the RNA | - | -| gene | the HGNC gene symbol | - | -| Expression_mutated_transcript | transcript expression normalized by the variant allele frequency of the mutation | expression | -| mutation_not_found_in_proteome | indicates if mutated amino acid sequence was not found in the WT proteome by exact search | Priority score | -| Best_rank_MHCI_score | minimal MHC I binding rank score over all neoepitope candidates (8-11mers) and MHC I alleles | MHC I binding with netMHCpan | -| Best_rank_MHCI_score_epitope | neoepitope candidate sequence with minimal MHC I binding rank score | MHC I binding with netMHCpan | -| Best_rank_MHCI_score_allele | the MHC I allele related to ` Best_rank_MHCI_score_epitope` | MHC I binding with netMHCpan | -| Best_affinity_MHCI_score | minimal MHC I binding affinity over all neoepitope candidates (8-11mers) and MHC I alleles | MHC I binding with netMHCpan | -| Best_affinity_MHCI_epitope | neoepitope candidate sequence with minimal MHC I binding affinity | MHC I binding with netMHCpan | -| Best_affinity_MHCI_allele | the MHC I allele related to ` Best_affinity_MHCI_epitope` | MHC I binding with netMHCpan | -| Best_rank_MHCI_9mer_score | minimal MHC I binding rank score over all neoepitope candidates (9mers only) and MHC I alleles | MHC I binding with netMHCpan | -| Best_rank_MHCI_9mer_epitope | neoepitope candidate sequence (9mer) with minimal MHC I binding rank score | MHC I binding with netMHCpan | -| Best_rank_MHCI_9mer_allele | the MHC I allele related to ` Best_rank_MHCI_9mer_epitope` | MHC I binding with netMHCpan | -| Best_affinity_MHCI_9mer_score | minimal MHC I binding affinity over all neoepitope candidates (9mers) and MHC I alleles | MHC I binding with netMHCpan | -| Best_affinity_MHCI_9mer_allele | the MHC I allele related to ` Best_affinity_MHCI_9mer_epitope ` | MHC I binding with netMHCpan | -| Best_affinity_MHCI_9mer_epitope | neoepitope candidate sequence (9mer) with minimal MHC I binding affinity | MHC I binding with netMHCpan | -| Best_affinity_MHCI_score_WT | MHC I binding affinity of `Best_affinity_MHCI_epitope_WT` | MHC I binding with netMHCpan | -| Best_affinity_MHCI_epitope_WT | WT epitope that corresponds to ` Best_affinity_MHCI_epitope` | MHC I binding with netMHCpan | -| Best_affinity_MHCI_allele_WT | the MHC I allele related to `Best_affinity_MHCI_epitope_WT` | MHC I binding with netMHCpan | -| Best_rank_MHCI_score_WT | MHC I binding rank score of `Best_rank_MHCI_score_epitope_WT` | MHC I binding with netMHCpan | -| Best_rank_MHCI_score_epitope_WT | WT epitope that corresponds to `Best_rank_MHCI_score_epitope` | MHC I binding with netMHCpan | -| Best_rank_MHCI_score_allele_WT | the MHC I allele related to `Best_rank_MHCI_score_epitope_WT` | MHC I binding with netMHCpan | -| Best_rank_MHCI_9mer_score_WT | MHC I binding rank score of `Best_rank_MHCI_9mer_epitope_WT ` | MHC I binding with netMHCpan | -| Best_rank_MHCI_9mer_epitope_WT | WT epitope that corresponds to `Best_rank_MHCI_9mer_epitope` | MHC I binding with netMHCpan | -| Best_rank_MHCI_9mer_allele_WT | the MHC I allele related to `Best_rank_MHCI_9mer_epitope_WT ` | MHC I binding with netMHCpan | -| Best_affinity_MHCI_9mer_score_WT | MHC I binding affinity of ` Best_affinity_MHCI_9mer_allele_WT ` | MHC I binding with netMHCpan | -| Best_affinity_MHCI_9mer_allele_WT | the MHC I allele related to ` Best_affinity_MHCI_9mer_epitope_WT` | MHC I binding with netMHCpan | -| Best_affinity_MHCI_9mer_epitope_WT | WT epitope that corresponds to `Best_affinity_MHCI_9mer_epitope` | MHC I binding with netMHCpan | -| PHBR-I | harmonic mean of minimal MHC I binding rank scores of all MHC I alleles of a patient | PHBR-I | -| Best_affinity_MHCI_9mer_position_mutation | indicates position of the mutation in ` Best_affinity_MHCI_9mer_epitope` | MHC I binding with netMHCpan | -| Best_affinity_MHCI_9mer_anchor_mutated | mutation in ` Best_affinity_MHCI_9mer_epitope` in an anchor position (i.e. position 2 or 9) | anchor/non-anchor | -| Best_rank_MHCII_score | minimal MHC II binding rank score over all neoepitope candidates (15mers) and all MHC II alleles | MHC II binding with netMHCIIpan | -| Best_rank_MHCII_score_epitope | neoepitope candidate sequence with minimal MHC II binding rank score | MHC II binding with netMHCIIpan | -| Best_rank_MHCII_score_allele | the MHC II isoform related to ` Best_rank_MHCII_score_epitope` | MHC II binding with netMHCIIpan | -| Best_affinity_MHCII_score | minimal MHC II binding affinity over all neoepitope candidates (15mers) and all MHC II alleles | MHC II binding with netMHCIIpan | -| Best_affinity_MHCII_epitope | neoepitope candidate sequence with minimal MHC II binding affinity | MHC II binding with netMHCIIpan | -| Best_affinity_MHCII_allele | the MHC II isoform related to ` Best_affinity_MHCII_epitope ` | MHC II binding with netMHCIIpan | -| Best_rank_MHCII_score_WT | minimal MHC II binding rank of `Best_rank_MHCII_score_epitope_WT ` | MHC II binding with netMHCIIpan | -| Best_rank_MHCII_score_epitope_WT | WT epitope sequence (15mer) that corresponds to ` Best_rank_MHCII_score_epitope ` | MHC II binding with netMHCIIpan | -| Best_rank_MHCII_score_allele_WT | the MHC II isoform related to ` Best_rank_MHCII_score_epitope_WT` | MHC II binding with netMHCIIpan | -| Best_affinity_MHCII_score_WT | minimal MHC II binding rank of `Best_affinity_MHCII_epitope_WT` | MHC II binding with netMHCIIpan | -| Best_affinity_MHCII_epitope_WT | WT epitope sequence (15mer) that corresponds to ` Best_affinity_MHCII_epitope` | MHC II binding with netMHCIIpan | -| Best_affinity_MHCII_allele_WT | the MHC II isoform related to ` Best_affinity_MHCII_epitope_WT` | MHC II binding with netMHCIIpan | -| PHBR-II | harmonic mean of minimal MHC II binding rank scores of all MHC II alleles of a patient | PHBR-II | -| Amplitude_MHCI_affinity_9mer | ratio of `Best_affinity_MHCI_9mer_score_WT` and `Best_affinity_MHCI_9mer_score` | Recognition Potential | -| Amplitude_MHCI_affinity | ratio of `Best_affinity_MHCI_score_WT` and `Best_affinity_MHCI_score` | Generator rate | -| Amplitude_MHCII_rank | ratio of `Best_rank_MHCII_score_WT` and `Best_rank_MHCII_score` and | Generator rate | -| Pathogensimiliarity_MHCI_9mer | score representing the similarity of `Best_affinity_MHCI_9mer_epitope` to pathogen sequences in IEDB database | Recognition Potential | -| Pathogensimiliarity_MHCII | score representing the similarity of `Best_affinity_MHCII_epitope` to pathogen sequences in IEDB database | Recognition Potential | -| Recognition_Potential_MHCI_9mer | product of `Amplitude_MHCI_affinity_9mer` and `Pathogensimiliarity_MHCI_affinity_9mer` | Recognition Potential | -| DAI_MHCI_affinity | difference of `Best_affinity_MHCI_score_WT` and `Best_affinity_MHCI_score` | DAI | -| CDN_MHCI | `Best_affinity_MHCI_score` < 50 nM | Generator rate | -| ADN_MHCI | `Best_affinity_MHCI_score` < 5000 nM and `Amplitude_MHCI_affinity` > 10 | Generator rate | -| CDN_MHCII | `Best_rank_MHCII_score` < 1 | Generator rate | -| ADN_MHCII | `Best_rank_MHCII_score` < 4 and `Amplitude_MHCII_rank` < 2 | Generator rate | -| Generator_rate_CDN_MHCI | number of neoepitope candidates with MHC I binding affinity < 50 nM per neoantigen canidate | Generator rate | -| Generator_rate_ADN_MHCI | number of neoepitope candidates with MHC I binding affinity < 5000 nM per neoantigen canidate 10x better affinity in comparison to corresponding WT peptide | Generator rate | -| Generator_rate_MHCI | sum of `Generator_rate_CDN_MHCI` and `Generator_rate_ADN_MHCI` | Generator rate | -| Generator_rate_CDN_MHCII | number of neoepitope candidates with MHC II binding rank score < 1 per neoantigen canidate | Generator rate | -| Generator_rate_ADN_MHCII | number of neoepitope candidates with MHC II binding rank score < 4 per neoantigen candidate 4x better rank in comparison to corresponding WT peptide | Generator rate | -| Generator_rate_MHCII | sum of `Generator_rate_CDN_MHCII` and `Generator_rate_ADN_MHCII` | Generator rate | -| Tcell_predictor_score | output score of T cell predictor model | Tcell predictor | -| Improved_Binder_MHCI | ratio of `Best_rank_MHCI_score_WT` and `Best_rank_MHCI_score` > 1.2 | self-similarity | -| Selfsimilarity_MHCI_conserved_binder | score representing the similarity between `Best_rank_MHCI_score_epitope` and `Best_affinity_MHCI_epitope_WT` For conservered binder only | self-similarity | -| Selfsimilarity_MHCI | score representing the similarity between `Best_rank_MHCI_score_epitope` and `Best_affinity_MHCI_epitope_WT` | self-similarity | -| Selfsimilarity_MHCII | score representing the similarity between `Best_affinity_MHCII_epitope` and Best_affinity_MHCII_epitope_WT` | self-similarity | -| Number_of_mismatches_MCHI | number of amino acids that do no match between `Best_rank_MHCI_score_epitope` and `Best_rank_MHCI_score_epitope_WT` | Priority score | -| Priority_score | combinatorial score of several features such as MHC binding, expression and VAF | Priority score | -| Neoag_immunogenicity | output score of neoag model | neoag | -| IEDB_Immunogenicity_MHCI | IEDB Immunogenicity score for ` Best_affinity_MHCI_epitope ` | IEDB Immunogenicity | -| IEDB_Immunogenicity_MHCII | IEDB Immunogenicity score for ` Best_affinity_MHCII_epitope` | IEDB Immunogenicity | -| MixMHCpred_best_peptide | MHC class I neoepitope candidate sequence with maximum MixMHCpred score over all neoepitope canidates (8-11mers) and MHC I alleles | MixMHCpred | -| MixMHCpred_best_score | maximum MixMHCpred score over all neoepitope canidates (8-11mers) and MHC I alleles | MixMHCpred | -| MixMHCpred_best_rank | rank that corresponds to `MixMHCpred_best_score` | MixMHCpred | -| MixMHCpred_best_allele | the allele with maximum MixMHCpred score | MixMHCpred | -| MixMHC2pred_best_peptide | MHC class II neoepitope candidate sequence with minimal MixMHC2pred score over all neoepitope canidates (13-18mers) and MHC II alleles | MixMHC2pred | -| MixMHC2pred_best_rank | minimal MixMHC2pred score over all neoepitope canidates (13-18mers) and MHC II alleles | MixMHC2pred | -| MixMHC2pred_best_allele | the MHC II isoform with minimum MixMHC2pred rank score | MixMHC2pred | -| Dissimilarity_MHCI | score reflecting the dissimilarity of `Best_affinity_MHCI_epitope` to the self-proteome | dissimilarity | -| Dissimilarity_MHCII | score reflecting the dissimilarity of `Best_affinity_MHCII_epitope` to the self-proteome | dissimilarity | -| vaxrank_binding_score | total binding score of vaxrank | vaxrank | -| vaxrank_total_score | product of total binding score and expression score. Originally, the root of the number of reads supporting the mutation are used in the original implementation. To simplify, the expression normalised to VAF is used. | vaxrank | -| PRIME_best_allele | best predicted MHC allele by PRIME model | PRIME | -| PRIME_best_peptide | best predicted neoepitope candidate by PRIME model | PRIME | -| PRIME_best_rank | output rank score of PRIME model | PRIME | -| PRIME_best_score | output score of PRIME model | PRIME | -| Hex_alignment_score_MHCI | the alignment score by HEX for ` Best_affinity_MHCI_epitope ` | HEX | -| Hex_alignment_score_MHCII | the alignment score by HEX for ` Best_affinity_MHCII_epitope` | HEX | - -## Tabular format - -If the `--with-table` flag is enabled an output table with the suffix "*_neoantigen_candidates_annotated.tsv*" is created. This table contains the neoantigen candidates information, the neoantigen annotations and if some user-specific additional columns were provided in the input table, these external annotations. +| Column name | Description | Feature group/ Paper | +|----------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------| +| dnaVariantAlleleFrequency | the variant allele frequency calculated from the DNA | - | +| mutation.mutatedXmer | the long mutated amino acid sequence | - | +| mutation.wildTypeXmer | the long non-mutated amino acid sequence. This field shal be empty for alternative neoantigen classes | - | +| patientIdentifier | the patient identifier | - | +| rnaExpression | the RNA expression. If expression was imputed, this will will be `imputedGeneExpression` | expression | +| imputedGeneExpression | median gene expression in the TCGA cohort of the tumor entity provided in the patient file. | expression | +| rnaVariantAlleleFrequency | the variant allele frequency calculated from the RNA | - | +| gene | the HGNC gene symbol | - | +| Expression_mutated_transcript | transcript expression normalized by the variant allele frequency of the mutation | expression | +| mutation_not_found_in_proteome | indicates if mutated amino acid sequence was not found in the WT proteome by exact search | Priority score | +| NetMHCpan_MHCI_rank_bestRank | minimal MHC I binding rank score over all neoepitope candidates (8-11mers) and MHC I alleles | MHC I binding with netMHCpan | +| NetMHCpan_bestRank_peptide | neoepitope candidate sequence with minimal MHC I binding rank score | MHC I binding with netMHCpan | +| NetMHCpan_bestRank_allele | the MHC I allele related to ` NetMHCpan_bestRank_peptide` | MHC I binding with netMHCpan | +| NetMHCpan_bestAffinity_affinity | minimal MHC I binding affinity over all neoepitope candidates (8-11mers) and MHC I alleles | MHC I binding with netMHCpan | +| NetMHCpan_bestAffinity_peptide | neoepitope candidate sequence with minimal MHC I binding affinity | MHC I binding with netMHCpan | +| NetMHCpan_bestAffinity_allele | the MHC I allele related to ` NetMHCpan_bestAffinity_peptide` | MHC I binding with netMHCpan | +| NetMHCpan_bestRank9mer_rank | minimal MHC I binding rank score over all neoepitope candidates (9mers only) and MHC I alleles | MHC I binding with netMHCpan | +| NetMHCpan_bestRank9mer_peptide | neoepitope candidate sequence (9mer) with minimal MHC I binding rank score | MHC I binding with netMHCpan | +| NetMHCpan_bestRank9mer_allele | the MHC I allele related to `NetMHCpan_bestRank9mer_peptide` | MHC I binding with netMHCpan | +| NetMHCpan_bestAffinity9mer_affinity | minimal MHC I binding affinity over all neoepitope candidates (9mers) and MHC I alleles | MHC I binding with netMHCpan | +| NetMHCpan_bestAffinity9mer_allele | the MHC I allele related to ` NetMHCpan_bestAffinity9mer_peptide ` | MHC I binding with netMHCpan | +| NetMHCpan_bestAffinity9mer_peptide | neoepitope candidate sequence (9mer) with minimal MHC I binding affinity | MHC I binding with netMHCpan | +| NetMHCpan_bestAffinity_affinityWT | MHC I binding affinity of `NetMHCpan_bestAffinity_peptideWT` | MHC I binding with netMHCpan | +| NetMHCpan_bestAffinity_peptideWT | WT epitope that corresponds to ` NetMHCpan_bestAffinity_peptide` | MHC I binding with netMHCpan | +| NetMHCpan_bestRank_rankWT | MHC I binding rank score of `NetMHCpan_bestRank_peptideWT` | MHC I binding with netMHCpan | +| NetMHCpan_bestRank_peptideWT | WT epitope that corresponds to `NetMHCpan_bestRank_peptide` | MHC I binding with netMHCpan | +| NetMHCpan_bestRank9mer_rankWT | MHC I binding rank score of `NetMHCpan_bestRank9mer_peptideWT ` | MHC I binding with netMHCpan | +| NetMHCpan_bestRank9mer_peptideWT | WT epitope that corresponds to `NetMHCpan_bestRank9mer_peptide` | MHC I binding with netMHCpan | +| NetMHCpan_bestAffinity9mer_affinityWT | MHC I binding affinity of ` NetMHCpan_bestAffinity9mer_peptideWT ` | MHC I binding with netMHCpan | +| NetMHCpan_bestAffinity9mer_rankWT | MHC I binding rank score of `NetMHCpan_bestAffinity9mer_peptideWT` | MHC I binding with netMHCpan | +| NetMHCpan_bestAffinity9mer_peptideWT | WT epitope that corresponds to `NetMHCpan_bestAffinity9mer_peptide` | | +| PHBR_I | harmonic mean of minimal MHC I binding rank scores of all MHC I alleles of a patient | PHBR-I | +| NetMHCpan_bestAffinity9mer_positionMutation | indicates position of the mutation in ` NetMHCpan_bestRank9mer_peptide` | MHC I binding with netMHCpan | +| NetMHCpan_bestAffinity9mer_anchorMutated | mutation in ` NetMHCpan_bestRank9mer_peptide` in an anchor position (i.e. position 2 or 9) | anchor/non-anchor | +| NetMHCIIpan_bestRank_rank | minimal MHC II binding rank score over all neoepitope candidates (15mers) and all MHC II alleles | MHC II binding with netMHCIIpan | +| NetMHCIIpan_bestRank_peptide | neoepitope candidate sequence with minimal MHC II binding rank score | MHC II binding with netMHCIIpan | +| NetMHCIIpan_bestRank_allele | the MHC II isoform related to `NetMHCIIpan_bestRank_peptide` | MHC II binding with netMHCIIpan | +| NetMHCIIpan_bestAffinity_affinity | minimal MHC II binding affinity over all neoepitope candidates (15mers) and all MHC II alleles | MHC II binding with netMHCIIpan | +| NetMHCIIpan_bestAffinity_peptide | neoepitope candidate sequence with minimal MHC II binding affinity | MHC II binding with netMHCIIpan | +| NetMHCIIpan_bestAffinity_allele | the MHC II isoform related to ` NetMHCIIpan_bestAffinity_peptide` | MHC II binding with netMHCIIpan | +| NetMHCIIpan_bestRank_rankWT | minimal MHC II binding rank of `NetMHCIIpan_bestRank_peptideWT` | MHC II binding with netMHCIIpan | +| NetMHCIIpan_bestRank_peptideWT | WT epitope sequence (15mer) that corresponds to ` NetMHCIIpan_bestRank_peptide` | MHC II binding with netMHCIIpan | +| NetMHCIIpan_bestAffinity_affinityWT | minimal MHC II binding rank of `NetMHCIIpan_bestAffinity_peptideWT` | MHC II binding with netMHCIIpan | +| NetMHCIIpan_bestAffinity_peptideWT | WT epitope sequence (15mer) that corresponds to `NetMHCIIpan_bestAffinity_peptide` | MHC II binding with netMHCIIpan | +| PHBR_II | harmonic mean of minimal MHC II binding rank scores of all MHC II alleles of a patient | PHBR-II | +| Amplitude_MHCI_bestAffinity9mer | ratio of `NetMHCpan_bestAffinity9mer_affinity` and `NetMHCpan_bestAffinity9mer_affinityWT` | Recognition Potential | +| Amplitude_MHCI_bestAffinity | ratio of `NetMHCpan_bestAffinity_affinityWT` and `NetMHCpan_bestAffinity_affinity` | Generator rate | +| Amplitude_MHCII_bestRank | ratio of `NetMHCIIpan_bestRank_rank` and `NetMHCIIpan_bestRank_rankWT` | Generator rate | +| Pathogensimiliarity_MHCI_bestAffinity9mer | score representing the similarity of `NetMHCpan_bestAffinity9mer_peptide` to pathogen sequences in IEDB database | Recognition Potential | +| Pathogensimiliarity_MHCII_bestAffinity | score representing the similarity of `NetMHCIIpan_bestRank_peptide` to pathogen sequences in IEDB database | Recognition Potential | +| RecognitionPotential_MHCI_bestAffinity9mer | product of `Amplitude_MHCI_affinity_9mer` and `Pathogensimiliarity_MHCI_affinity_9mer` | Recognition Potential | +| DAI_MHCI_bestAffinity | difference of `NetMHCpan_bestAffinity_affinityWT` and `NetMHCpan_bestAffinity_affinity` | DAI | +| Classically_defined_neopeptide_MHCI | `NetMHCpan_bestAffinity_peptide`< 50 nM | Generator rate | +| Alternatively_defined_neopeptide_MHCI | `NetMHCpan_bestAffinity_peptide` < 5000 nM and `Amplitude_MHCI_bestAffinity` > 10 | Generator rate | +| Classically_defined_neopeptide_MHCII | `NetMHCIIpan_bestRank_rank` < 1 | Generator rate | +| Alternatively_defined_neopeptide_MHCII | `Best_rank_MHCII_score` < 4 and `Amplitude_MHCII_bestRank` < 2 | Generator rate | +| GeneratorRate_CDN_MHCI | number of neoepitope candidates with MHC I binding affinity < 50 nM per neoantigen canidate | Generator rate | +| GeneratorRate_ADN_MHCI | number of neoepitope candidates with MHC I binding affinity < 5000 nM per neoantigen canidate 10x better affinity in comparison to corresponding WT peptide | Generator rate | +| GeneratorRate_MHCI | sum of `GeneratorRate_CDN_MHCI` and `GeneratorRate_ADN_MHCI` | Generator rate | +| GeneratorRate_CDN_MHCII | number of neoepitope candidates with MHC II binding rank score < 1 per neoantigen canidate | Generator rate | +| GeneratorRate_ADN_MHCII | number of neoepitope candidates with MHC II binding rank score < 4 per neoantigen candidate 4x better rank in comparison to corresponding WT peptide | Generator rate | +| GeneratorRate_MHCII | sum of `GeneratorRate_CDN_MHCII` and `GeneratorRate_ADN_MHCII` | Generator rate | +| Tcell_predictor | output score of T cell predictor model | Tcell predictor | +| ImprovedBinder_MHCI | ratio of `NetMHCpan_MHCI_rank_bestRankWT` and `NetMHCpan_MHCI_rank_bestRank` > 1.2 | self-similarity | +| Selfsimilarity_MHCI_conserved_binder | score representing the similarity between `NetMHCpan_bestRank_peptide` and `NetMHCpan_bestRank_peptideWT` For conservered binder only | self-similarity | +| Selfsimilarity_MHCI | score representing the similarity between `NetMHCpan_bestRank_peptide` and `NetMHCpan_bestRank_peptide` | self-similarity | +| Selfsimilarity_MHCII | score representing the similarity between `NetMHCIIpan_bestAffinity_peptide` and `NetMHCIIpan_bestAffinity_peptide` | self-similarity | +| Number_of_mismatches_MCHI | number of amino acids that do no match between `NetMHCpan_bestRank_peptide` and `NetMHCpan_bestRank_peptideWT` | Priority score | +| Priority_score | combinatorial score of several features such as MHC binding, expression and VAF | Priority score | +| Neoag_immunogenicity | output score of neoag model | neoag | +| IEDB_Immunogenicity_MHCI | IEDB Immunogenicity score for ` NetMHCpan_bestAffinity_peptide` | IEDB Immunogenicity | +| IEDB_Immunogenicity_MHCII | IEDB Immunogenicity score for `NetMHCIIpan_bestAffinity_peptide` | IEDB Immunogenicity | +| MixMHCpred_bestScore_peptide | MHC class I neoepitope candidate sequence with maximum MixMHCpred score over all neoepitope canidates (8-11mers) and MHC I alleles | MixMHCpred | +| MixMHCpred_bestScore_score | maximum MixMHCpred score over all neoepitope canidates (8-11mers) and MHC I alleles | MixMHCpred | +| MixMHCpred_bestScore_rank | rank that corresponds to `MixMHCpred_bestScore_score` | MixMHCpred | +| MixMHCpred_bestScore_allele | the allele with maximum MixMHCpred score | MixMHCpred | +| MixMHC2pred_bestRank_peptide | MHC class II neoepitope candidate sequence with minimal MixMHC2pred score over all neoepitope canidates (13-18mers) and MHC II alleles | MixMHC2pred | +| MixMHC2pred_bestRank_rank | minimal MixMHC2pred score over all neoepitope canidates (13-18mers) and MHC II alleles | MixMHC2pred | +| MixMHC2pred_bestRank_allele | the MHC II isoform with minimum MixMHC2pred rank score | MixMHC2pred | +| Dissimilarity_MHCI | score reflecting the dissimilarity of `NetMHCpan_bestAffinity_peptide` to the self-proteome | dissimilarity | +| Dissimilarity_MHCII | score reflecting the dissimilarity of `NetMHCIIpan_bestAffinity_peptide` to the self-proteome | dissimilarity | +| Vaxrank_bindingScore | total binding score of vaxrank | vaxrank | +| Vaxrank_totalScore | product of total binding score and expression score. Originally, the root of the number of reads supporting the mutation are used in the original implementation. To simplify, the expression normalised to VAF is used. | vaxrank | +| PRIME_bestScore_allele | best predicted MHC allele by PRIME model | PRIME | +| PRIME_bestScore_peptide | best predicted neoepitope candidate by PRIME model | PRIME | +| PRIME_bestScore_rank | output rank score of PRIME model | PRIME | +| PRIME_bestScore_score | output score of PRIME model | PRIME | +| HexAlignmentScore_MHCI | the alignment score by HEX for `NetMHCpan_bestAffinity_peptide` | HEX | +| HexAlignmentScore_MHCII | the alignment score by HEX for ` NetMHCIIpan_bestAffinity_peptide` | HEX | + + +In addition, all logging output is appended to a log file with the suffix +"*/.log*", where the folder is set by `--output-folder` and the +prefix can be set with `--output-prefix`. + +### Tabular format + +An output table with the suffix "*_neoantigen_candidates_annotated.tsv*" is created. +This table contains the neoantigen candidates information, the neoantigen annotations and if some user-specific additional columns +were provided in the input table, these external annotations. This is a dummy example: -| dnaVariantAlleleFrequency | gene |imputedGeneExpression | mutation.mutatedXmer | mutation.position | mutation.wildTypeXmer | patientIdentifier | rnaExpression | rnaVariantAlleleFrequency | +-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) | ADN_MHCI | ADN_MHCII | Amplitude_MHCII_rank | Amplitude_MHCI_affinity | Amplitude_MHCI_affinity_9mer | Best_affinity_MHCII_allele | Best_affinity_MHCII_allele_WT | Best_affinity_MHCII_epitope | Best_affinity_MHCII_epitope_WT | Best_affinity_MHCII_score | Best_affinity_MHCII_score_WT | Best_affinity_MHCI_9mer_allele | Best_affinity_MHCI_9mer_allele_WT | Best_affinity_MHCI_9mer_anchor_mutated | Best_affinity_MHCI_9mer_epitope | Best_affinity_MHCI_9mer_epitope_WT | Best_affinity_MHCI_9mer_position_mutation | Best_affinity_MHCI_9mer_score | Best_affinity_MHCI_9mer_score_WT | Best_affinity_MHCI_allele | Best_affinity_MHCI_allele_WT | Best_affinity_MHCI_epitope | Best_affinity_MHCI_epitope_WT | Best_affinity_MHCI_score | Best_affinity_MHCI_score_WT | Best_rank_MHCII_score | Best_rank_MHCII_score_WT | Best_rank_MHCII_score_allele | Best_rank_MHCII_score_allele_WT | Best_rank_MHCII_score_epitope | Best_rank_MHCII_score_epitope_WT | Best_rank_MHCI_9mer_allele | Best_rank_MHCI_9mer_allele_WT | Best_rank_MHCI_9mer_epitope | Best_rank_MHCI_9mer_epitope_WT | Best_rank_MHCI_9mer_score | Best_rank_MHCI_9mer_score_WT | Best_rank_MHCI_score | Best_rank_MHCI_score_WT | Best_rank_MHCI_score_allele | Best_rank_MHCI_score_allele_WT | Best_rank_MHCI_score_epitope | Best_rank_MHCI_score_epitope_WT | CDN_MHCI | CDN_MHCII | DAI_MHCI_affinity_cutoff500nM | Dissimilarity_MHCI_cutoff500nM | Expression_mutated_transcript | Generator_rate | IEDB_Immunogenicity_MHCI_cutoff500nM | Improved_Binder_MHCI | MixMHC2pred_best_allele | MixMHC2pred_best_peptide | MixMHC2pred_best_rank | MixMHCpred_best_allele | MixMHCpred_best_peptide | MixMHCpred_best_rank | MixMHCpred_best_score | Neoag_immunogenicity | Number_of_mismatches_MCHI | PHBR-I | PHBR-II | Pathogensimiliarity_MHCI_affinity_9mer | Priority_score | Recognition_Potential_MHCI_affinity_9mer | Selfsimilarity_MHCI_conserved_binder | Tcell_predictor_score_cutoff500nM | VAF_in_RNA | VAF_in_tumor | [WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) | mutation_not_found_in_proteome | patient | substitution | transcript_expression | vaxrank_binding_score | vaxrank_total_score | -|---------------------------|-------|-----------------------------|-----------------------------|-------------------|-----------------------------|-------------------|---------------|---------------------------|----------------------------------------|----------|-----------|----------------------|-------------------------|------------------------------|----------------------------|-------------------------------|-----------------------------|--------------------------------|---------------------------|------------------------------|--------------------------------|-----------------------------------|----------------------------------------|---------------------------------|------------------------------------|-------------------------------------------|-------------------------------|----------------------------------|---------------------------|------------------------------|----------------------------|-------------------------------|--------------------------|-----------------------------|-----------------------|--------------------------|------------------------------|---------------------------------|-------------------------------|----------------------------------|----------------------------|-------------------------------|-----------------------------|--------------------------------|---------------------------|------------------------------|----------------------|-------------------------|-----------------------------|--------------------------------|------------------------------|---------------------------------|----------|-----------|-------------------------------|--------------------------------|-------------------------------|----------------|--------------------------------------|----------------------|-------------------------|--------------------------|-----------------------|------------------------|-------------------------|----------------------|-----------------------|----------------------|---------------------------|---------|---------|----------------------------------------|----------------|------------------------------------------|--------------------------------------|-----------------------------------|------------|--------------|---------------------------------------------|--------------------------------|---------|--------------|-----------------------|-----------------------|---------------------| - | 0.294 | BRCA2 | 0.5| AAAAAAAAAAAAAFAAAAAAAAAAAAA| 14 | AAAAAAAAAAAAALAAAAAAAAAAAAA | Ptx | 0.51950689 | 0.857 | AAAAAAAAAAAAAFAAAAAAAAAAAAA | 0 | 1 | 28 | 0.88723 | 0.88723 | HLA-DQA10401-DQB10402 | HLA-DQA10401-DQB10402 | AAAAFAAAAAAAAAA | AAAALAAAAAAAAAA | 251.77 | 513.02 | HLA-C*16:01 | HLA-C*16:01 | 1 | AAAAAAAAF | AAAAAAAAL | 9 | 24.3 | 21.7 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 24.3 | 21.7 | 0.05 | 1.4 | HLA-DQA10301-DQB10402 | HLA-DQA10301-DQB10402 | AAAAFAAAAAAAAAA | AAAALAAAAAAAAAA | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 0.0592 | 0.0493 | 0.0592 | 0.0493 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 1 | 1 | -2.6 | 1 | 0.44522 | 1 | 0.18288 | 0 | DPA1_01_03__DPB1_04_01 | AAAAFAAAAAAAAAAA | 0.997 | B0702 | AAAAAAAAF | 0.1 | 0.50487 | 13.16998 | 1 | 0.31193 | 0.21892 | 0 | 0.07017 | 0 | 0.99178271 | 0.40327581 | 0.857 | 0.294 | AAAAAAAAAAAAALAAAAAAAAAAAAA | 1 | Ptx | I547T | 0.51950689 | 3.7689 | 1.678 | -| 0.173 | BRCA2 | 0.5| AAAAAAAAAAAAAMAAAAAAAAAAAAA | 14 | AAAAAAAAAAAAARAAAAAAAAAAAAA | Ptx | 0.71575659 | 0.556 | AAAAAAAAAAAAAMAAAAAAAAAAAAA | 1 | 1 | 10 | 90.685 | 90.685 | HLA-DQA10401-DQB10402 | HLA-DQA10401-DQB10402 | AAAAAAAAAMAAAAA | AAAAAAAAARAAAAA | 421.53 | 554.92 | HLA-C*16:01 | HLA-C*16:01 | 1 | AAAAAAAAM | AAAAAAAAR | 9 | 24.1 | 6346.9 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 24.1 | 6346.9 | 0.25 | 2.5 | HLA-DQA10401-DQB10302 | HLA-DQA10401-DQB10302 | AAAAAAAAAAMAAAA | AAAAAAAAAARAAAA | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 0.0587 | 8.9317 | 0.0587 | 8.9317 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 1 | 1 | 6322.8 | 1 | 0.39796 | 1 | 0.18288 | 1 | DPA1_01_03__DPB1_04_01 | AAAAMAAAAAAAAAAA | 2.44 | B0702 | AAAAAAAAM | 0.07 | 0.5444 | 39.51379 | 1 | 0.29303 | 1.5594 | 0 | 0.10626 | 0 | NA | 0.46452844 | 0.556 | 0.173 | AAAAAAAAAAAAARAAAAAAAAAAAAA | 1 | Ptx | E135S | 0.71575659 | 3.8741 | 1.5417 | +| dnaVariantAlleleFrequency | gene | imputedGeneExpression | mutatedXmer | position | wildTypeXmer | patientIdentifier | rnaExpression | rnaVariantAlleleFrequency | +-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) | ADN_MHCI | ADN_MHCII | Amplitude_MHCII_rank | Amplitude_MHCI_affinity | Amplitude_MHCI_affinity_9mer | Best_affinity_MHCII_allele | Best_affinity_MHCII_allele_WT | Best_affinity_MHCII_epitope | Best_affinity_MHCII_epitope_WT | Best_affinity_MHCII_score | Best_affinity_MHCII_score_WT | Best_affinity_MHCI_9mer_allele | Best_affinity_MHCI_9mer_allele_WT | Best_affinity_MHCI_9mer_anchor_mutated | Best_affinity_MHCI_9mer_epitope | Best_affinity_MHCI_9mer_epitope_WT | Best_affinity_MHCI_9mer_position_mutation | Best_affinity_MHCI_9mer_score | Best_affinity_MHCI_9mer_score_WT | Best_affinity_MHCI_allele | Best_affinity_MHCI_allele_WT | Best_affinity_MHCI_epitope | Best_affinity_MHCI_epitope_WT | Best_affinity_MHCI_score | Best_affinity_MHCI_score_WT | Best_rank_MHCII_score | Best_rank_MHCII_score_WT | Best_rank_MHCII_score_allele | Best_rank_MHCII_score_allele_WT | Best_rank_MHCII_score_epitope | Best_rank_MHCII_score_epitope_WT | Best_rank_MHCI_9mer_allele | Best_rank_MHCI_9mer_allele_WT | Best_rank_MHCI_9mer_epitope | Best_rank_MHCI_9mer_epitope_WT | Best_rank_MHCI_9mer_score | Best_rank_MHCI_9mer_score_WT | Best_rank_MHCI_score | Best_rank_MHCI_score_WT | Best_rank_MHCI_score_allele | Best_rank_MHCI_score_allele_WT | Best_rank_MHCI_score_epitope | Best_rank_MHCI_score_epitope_WT | CDN_MHCI | CDN_MHCII | DAI_MHCI_affinity_cutoff500nM | Dissimilarity_MHCI_cutoff500nM | Expression_mutated_transcript | Generator_rate | IEDB_Immunogenicity_MHCI_cutoff500nM | Improved_Binder_MHCI | MixMHC2pred_best_allele | MixMHC2pred_best_peptide | MixMHC2pred_best_rank | MixMHCpred_best_allele | MixMHCpred_best_peptide | MixMHCpred_best_rank | MixMHCpred_best_score | Neoag_immunogenicity | Number_of_mismatches_MCHI | PHBR-I | PHBR-II | Pathogensimiliarity_MHCI_affinity_9mer | Priority_score | Recognition_Potential_MHCI_affinity_9mer | Selfsimilarity_MHCI_conserved_binder | Tcell_predictor_score_cutoff500nM | VAF_in_RNA | VAF_in_tumor | [WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) | mutation_not_found_in_proteome | patient | substitution | transcript_expression | vaxrank_binding_score | vaxrank_total_score | +|---------------------------|-------|------------------------|-----------------------------|-----------|-----------------------------|-------------------|---------------|---------------------------|----------------------------------------|----------|-----------|----------------------|-------------------------|------------------------------|----------------------------|-------------------------------|-----------------------------|--------------------------------|---------------------------|------------------------------|--------------------------------|-----------------------------------|----------------------------------------|---------------------------------|------------------------------------|-------------------------------------------|-------------------------------|----------------------------------|---------------------------|------------------------------|----------------------------|-------------------------------|--------------------------|-----------------------------|-----------------------|--------------------------|------------------------------|---------------------------------|-------------------------------|----------------------------------|----------------------------|-------------------------------|-----------------------------|--------------------------------|---------------------------|------------------------------|----------------------|-------------------------|-----------------------------|--------------------------------|------------------------------|---------------------------------|----------|-----------|-------------------------------|--------------------------------|-------------------------------|----------------|--------------------------------------|----------------------|-------------------------|--------------------------|-----------------------|------------------------|-------------------------|----------------------|-----------------------|----------------------|---------------------------|---------|---------|----------------------------------------|----------------|------------------------------------------|--------------------------------------|-----------------------------------|------------|--------------|---------------------------------------------|--------------------------------|---------|--------------|-----------------------|-----------------------|---------------------| + | 0.294 | BRCA2 | 0.5 | AAAAAAAAAAAAAFAAAAAAAAAAAAA | 14 | AAAAAAAAAAAAALAAAAAAAAAAAAA | Ptx | 0.51950689 | 0.857 | AAAAAAAAAAAAAFAAAAAAAAAAAAA | 0 | 1 | 28 | 0.88723 | 0.88723 | HLA-DQA10401-DQB10402 | HLA-DQA10401-DQB10402 | AAAAFAAAAAAAAAA | AAAALAAAAAAAAAA | 251.77 | 513.02 | HLA-C*16:01 | HLA-C*16:01 | 1 | AAAAAAAAF | AAAAAAAAL | 9 | 24.3 | 21.7 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 24.3 | 21.7 | 0.05 | 1.4 | HLA-DQA10301-DQB10402 | HLA-DQA10301-DQB10402 | AAAAFAAAAAAAAAA | AAAALAAAAAAAAAA | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 0.0592 | 0.0493 | 0.0592 | 0.0493 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 1 | 1 | -2.6 | 1 | 0.44522 | 1 | 0.18288 | 0 | DPA1_01_03__DPB1_04_01 | AAAAFAAAAAAAAAAA | 0.997 | B0702 | AAAAAAAAF | 0.1 | 0.50487 | 13.16998 | 1 | 0.31193 | 0.21892 | 0 | 0.07017 | 0 | 0.99178271 | 0.40327581 | 0.857 | 0.294 | AAAAAAAAAAAAALAAAAAAAAAAAAA | 1 | Ptx | I547T | 0.51950689 | 3.7689 | 1.678 | +| 0.173 | BRCA2 | 0.5 | AAAAAAAAAAAAAMAAAAAAAAAAAAA | 14 | AAAAAAAAAAAAARAAAAAAAAAAAAA | Ptx | 0.71575659 | 0.556 | AAAAAAAAAAAAAMAAAAAAAAAAAAA | 1 | 1 | 10 | 90.685 | 90.685 | HLA-DQA10401-DQB10402 | HLA-DQA10401-DQB10402 | AAAAAAAAAMAAAAA | AAAAAAAAARAAAAA | 421.53 | 554.92 | HLA-C*16:01 | HLA-C*16:01 | 1 | AAAAAAAAM | AAAAAAAAR | 9 | 24.1 | 6346.9 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 24.1 | 6346.9 | 0.25 | 2.5 | HLA-DQA10401-DQB10302 | HLA-DQA10401-DQB10302 | AAAAAAAAAAMAAAA | AAAAAAAAAARAAAA | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 0.0587 | 8.9317 | 0.0587 | 8.9317 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 1 | 1 | 6322.8 | 1 | 0.39796 | 1 | 0.18288 | 1 | DPA1_01_03__DPB1_04_01 | AAAAMAAAAAAAAAAA | 2.44 | B0702 | AAAAAAAAM | 0.07 | 0.5444 | 39.51379 | 1 | 0.29303 | 1.5594 | 0 | 0.10626 | 0 | NA | 0.46452844 | 0.556 | 0.173 | AAAAAAAAAAAAARAAAAAAAAAAAAA | 1 | Ptx | E135S | 0.71575659 | 3.8741 | 1.5417 | -## JSON format +### JSON format + +An output file with the suffix "*_neoantigen_candidates_annotated.json*" is created. +This file contains neoantigen candidates information in JSON format. +The names within the models are described in **TABLE 1**. -If the `--with-json` flag is enabled an output file with the suffix "*_neoantigen_candidates_annotated.json*" is created. This file contains neoantigen candidates information in JSON format. Furthermore, a second file with the suffix *"_neoantigen_features.json"* is created. This file contains the annotated neoantigen features in JSON format. The names within the models are described in **TABLE 1**. -\ -This is a dummy example of a "*_neoantigen_candidates.json*" file. This file contains a list of neoantigen candidate models (for further information, please see [here](05_models.md). To simplify, only one full neoantigen candidate model is shown: +This is a dummy example of a "*_neoantigen_candidates.json*" file. +This file contains a list of neoantigen candidate models (for further information, please see [here](05_models.md). +To simplify, only one full neoantigen candidate model is shown: ```json [{ "patient_identifier": "Ptx", "gene": "BRCA2", - "mutation": { - "position": [14], - "wild_type_xmer": "AAAAAAAAAAAAALAAAAAAAAAAAAA", - "mutated_xmer": "AAAAAAAAAAAAAFAAAAAAAAAAAAA" - }, + "position": [14], + "wild_type_xmer": "AAAAAAAAAAAAALAAAAAAAAAAAAA", + "mutated_xmer": "AAAAAAAAAAAAAFAAAAAAAAAAAAA", "rna_expression": 0.5195068939999999, "imputed_gene_expression": 0.5, "dna_variant_allele_frequency": 0.294, @@ -145,11 +148,9 @@ This is a dummy example of a "*_neoantigen_candidates.json*" file. This file con }, { "patient_identifier": "Ptx", "gene": "BRCA2", - "mutation": { - "position": [14], - "wild_type_xmer": "AAAAAAAAAAAAARAAAAAAAAAAAAA", - "mutated_xmer": "AAAAAAAAAAAAAMAAAAAAAAAAAAA" - }, + "position": [14], + "wild_type_xmer": "AAAAAAAAAAAAARAAAAAAAAAAAAA", + "mutated_xmer": "AAAAAAAAAAAAAMAAAAAAAAAAAAA", "rna_expression": 0.715756594, "imputed_gene_expression": 0.5, "dna_variant_allele_frequency": 0.17300000000000001, @@ -167,242 +168,14 @@ This is a dummy example of the field `neofox_annotations`. ```json { "annotations": [{ - "name": "Best_rank_MHCI_score", - "value": "0.0592" - }, { - "name": "Best_rank_MHCI_score_epitope", - "value": "AAAAAAAAF" - }, { - "name": "Best_rank_MHCI_score_allele", - "value": "HLA-C*16:01" - }, { - "name": "Best_affinity_MHCI_score", - "value": "24.3" - }, { - "name": "Best_affinity_MHCI_epitope", - "value": "AAAAAAAAF" - }, { - "name": "Best_affinity_MHCI_allele", - "value": "HLA-C*16:01" - }, { - "name": "Best_rank_MHCI_9mer_score", + "name": "NetMHCpan_MHCI_rank_bestRank", "value": "0.0592" }, { - "name": "Best_rank_MHCI_9mer_epitope", + "name": "NetMHCpan_MHCI_rank_peptide", "value": "AAAAAAAAF" - }, { - "name": "Best_rank_MHCI_9mer_allele", - "value": "HLA-C*16:01" - }, { - "name": "Best_affinity_MHCI_9mer_score", - "value": "24.3" - }, { - "name": "Best_affinity_MHCI_9mer_allele", - "value": "HLA-C*16:01" - }, { - "name": "Best_affinity_MHCI_9mer_epitope", - "value": "AAAAAAAAF" - }, { - "name": "Best_affinity_MHCI_score_WT", - "value": "21.7" - }, { - "name": "Best_affinity_MHCI_epitope_WT", - "value": "AAAAAAAAL" - }, { - "name": "Best_affinity_MHCI_allele_WT", - "value": "HLA-C*16:01" - }, { - "name": "Best_rank_MHCI_score_WT", - "value": "0.0493" - }, { - "name": "Best_rank_MHCI_score_epitope_WT", - "value": "AAAAAAAAL" - }, { - "name": "Best_rank_MHCI_score_allele_WT", - "value": "HLA-C*16:01" - }, { - "name": "Best_rank_MHCI_9mer_score_WT", - "value": "0.0493" - }, { - "name": "Best_rank_MHCI_9mer_epitope_WT", - "value": "AAAAAAAAL" - }, { - "name": "Best_rank_MHCI_9mer_allele_WT", - "value": "HLA-C*16:01" - }, { - "name": "Best_affinity_MHCI_9mer_score_WT", - "value": "21.7" - }, { - "name": "Best_affinity_MHCI_9mer_allele_WT", - "value": "HLA-C*16:01" - }, { - "name": "Best_affinity_MHCI_9mer_epitope_WT", - "value": "AAAAAAAAL" - }, { - "name": "Generator_rate", - "value": "1" - }, { - "name": "PHBR-I", - "value": "0.31193" - }, { - "name": "Best_affinity_MHCI_9mer_position_mutation", - "value": "9" - }, { - "name": "Best_affinity_MHCI_9mer_anchor_mutated", - "value": "1" - }, { - "name": "Best_rank_MHCII_score", - "value": "0.05" - }, { - "name": "Best_rank_MHCII_score_epitope", - "value": "AAAAFAAAAAAAAAA" - }, { - "name": "Best_rank_MHCII_score_allele", - "value": "HLA-DQA10301-DQB10402" - }, { - "name": "Best_affinity_MHCII_score", - "value": "251.77" - }, { - "name": "Best_affinity_MHCII_epitope", - "value": "AAAAFAAAAAAAAAA" - }, { - "name": "Best_affinity_MHCII_allele", - "value": "HLA-DQA10401-DQB10402" - }, { - "name": "Best_rank_MHCII_score_WT", - "value": "1.4" - }, { - "name": "Best_rank_MHCII_score_epitope_WT", - "value": "AAAALAAAAAAAAAA" - }, { - "name": "Best_rank_MHCII_score_allele_WT", - "value": "HLA-DQA10301-DQB10402" - }, { - "name": "Best_affinity_MHCII_score_WT", - "value": "513.02" - }, { - "name": "Best_affinity_MHCII_epitope_WT", - "value": "AAAALAAAAAAAAAA" - }, { - "name": "Best_affinity_MHCII_allele_WT", - "value": "HLA-DQA10401-DQB10402" - }, { - "name": "PHBR-II", - "value": "0.21892" - }, { - "name": "MixMHCpred_best_peptide", - "value": "AAAAAAAAF" - }, { - "name": "MixMHCpred_best_score", - "value": "0.50487" - }, { - "name": "MixMHCpred_best_rank", - "value": "0.1" - }, { - "name": "MixMHCpred_best_allele", - "value": "B0702" - }, { - "name": "MixMHC2pred_best_peptide", - "value": "AAAAFAAAAAAAAAAA" - }, { - "name": "MixMHC2pred_best_rank", - "value": "0.997" - }, { - "name": "MixMHC2pred_best_allele", - "value": "DPA1_01_03__DPB1_04_01" - }, { - "name": "Expression_mutated_transcript", - "value": "0.44522" - }, { - "name": "mutation_not_found_in_proteome", - "value": "1" - }, { - "name": "Amplitude_MHCI_affinity_9mer", - "value": "0.88723" - }, { - "name": "Amplitude_MHCI_affinity", - "value": "0.88723" - }, { - "name": "Amplitude_MHCII_rank", - "value": "28" - }, { - "name": "Pathogensimiliarity_MHCI_affinity_9mer", - "value": "0" - }, { - "name": "Recognition_Potential_MHCI_affinity_9mer", - "value": "0" - }, { - "name": "DAI_MHCI_affinity_cutoff500nM", - "value": "-2.6" - }, { - "name": "CDN_MHCI", - "value": "1" - }, { - "name": "ADN_MHCI", - "value": "0" - }, { - "name": "CDN_MHCII", - "value": "1" - }, { - "name": "ADN_MHCII", - "value": "1" - }, { - "name": "Tcell_predictor_score_cutoff500nM", - "value": "0.4032758100297953" - }, { - "name": "Improved_Binder_MHCI", - "value": "0" - }, { - "name": "Selfsimilarity_MHCI_conserved_binder", - "value": "0.9917827053614943" - }, { - "name": "Number_of_mismatches_MCHI", - "value": "1" - }, { - "name": "Priority_score", - "value": "0.07017" - }, { - "name": "Neoag_immunogenicity", - "value": "13.16998" - }, { - "name": "IEDB_Immunogenicity_MHCI_cutoff500nM", - "value": "0.18288" - }, { - "name": "Dissimilarity_MHCI_cutoff500nM", - "value": "1" - }, { - "name": "vaxrank_binding_score", - "value": "3.7689" - }, { - "name": "vaxrank_total_score", - "value": "1.678" - }, { - "name": "patient", - "value": "Ptx" - }, { - "name": "substitution", - "value": "I547T" - }, { - "name": "transcript_expression", - "value": "0.5195068939999999" - }, { - "name": "+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL)", - "value": "AAAAAAAAAAAAAFAAAAAAAAAAAAA" - }, { - "name": "[WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL)", - "value": "AAAAAAAAAAAAALAAAAAAAAAAAAA" - }, { - "name": "VAF_in_tumor", - "value": "0.294" - }, { - "name": "VAF_in_RNA", - "value": "0.857" - }, { - "name": "Unnamed: 8", - "value": null }], "annotator": "Neofox", - "annotator_version": "0.4.0", + "annotator_version": "1.0.0", "timestamp": "20201211115212061465" } ``` @@ -472,4 +245,119 @@ The metadata on the annotations will look as follows: } ] } -``` \ No newline at end of file +``` + + +## Neoepitopes + +NeoFox returns the neoepitopes candidates and their annotated features as output when using +the flag `--with-all-neoepitopes` or when explicitly annotating neoepitopes with the command `neofox-epitope`. +Two output formats are supported: tabular format or JSON format. +The user gets the neoepitope annotations in all formats. +Despite different structures, both formats provide the same content with the exception of the metadata on +the annotations which is only present in the JSON format. +The tabular format is split into two tables: +a first one for the MHC-I neoepitope candidates and a second one for +the MHC-II neoepitope candidates. + +### Tabular format + +Two output files with the suffix "*_mhcI_epitope_candidates_annotated.tsv" and ""*_mhcII_epitope_candidates_annotated.tsv"" are created. + +The following table describes each of the annotations in the output: + +**TABLE 2** + +| Column Name | Description | Feature group/ Paper | +|---------------------------------|------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------| +| dnaVariantAlleleFrequency | the variant allele frequency calculated from the DNA | - | +| mutatedSequence | the mutated amino acid sequence | - | +| wildTypeSequence | the non-mutated amino acid sequence (when not provided in the input this will contain the Blastp closest sequence in the proteome) | - | +| alleleMhcI / isoformMhcII | either the MHC-I allele for MHC-I neoepitopes or the MHC-II isoform for MHC-II neoepitopes | - | +| patientIdentifier | the patient identifier (optional) | - | +| rnaExpression | the RNA expression. If expression was imputed, this will will be `imputedGeneExpression` | expression | +| imputedGeneExpression | median gene expression in the TCGA cohort of the tumor entity provided in the patient file. | expression | +| rnaVariantAlleleFrequency | the variant allele frequency calculated from the RNA | - | +| gene | the HGNC gene symbol | - | +| affinityMutated | NetMHCpan / NetMHCIIpan affinity score of the mutated peptide for MHC-I or MHC-II neoepitopes respectively | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan | +| affinityWildType | NetMHCpan / NetMHCIIpan affinity score of the wild type peptide for MHC-I or MHC-II neoepitopes respectively | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan | +| rankMutated | NetMHCpan / NetMHCIIpan rank of the mutated peptide for MHC-I or MHC-II neoepitopes respectively | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan | +| rankWildType | NetMHCpan / NetMHCIIpan rank of the wild type peptide for MHC-I or MHC-II neoepitopes respectively | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan | +| MixMHCpred_score | MixMHCpred score of the mutated peptide for MHC-I neoepitopes | MHC-I binding with mixMHCpred | +| MixMHCpred_rank | MixMHCpred rank of the mutated peptide for MHC-I neoepitopes | MHC-I binding with mixMHCpred | +| MixMHCpred_WT_score | MixMHCpred score of the wild type peptide for MHC-I neoepitopes | MHC-I binding with mixMHCpred | +| MixMHCpred_WT_rank | MixMHCpred rank of the wild type peptide for MHC-I neoepitopes | MHC-I binding with mixMHCpred | +| MixMHC2pred_score | MixMHC2pred score of the mutated peptide for MHC-II neoepitopes | MHC-II binding with mixMHC2pred | +| MixMHC2pred_rank | MixMHC2pred rank of the mutated peptide for MHC-II neoepitopes | MHC-II binding with mixMHC2pred | +| MixMHC2pred_WT_score | MixMHC2pred score of the wild type peptide for MHC-II neoepitopes | MHC-II binding with mixMHC2pred | +| MixMHC2pred_WT_rank | MixMHC2pred rank of the wild type peptide for MHC-II neoepitopes | MHC-II binding with mixMHC2pred | +| PRIME_score | PRIME score of the mutated peptide for MHC-I neoepitopes | MHC-I binding with PRIME | +| PRIME_rank | PRIME rank of the mutated peptide for MHC-I neoepitopes | MHC-I binding with PRIME | +| PRIME_WT_score | PRIME score of the wild type peptide for MHC-I neoepitopes | MHC-I binding with PRIME | +| PRIME_WT_rank | PRIME rank of the wild type peptide for MHC-I neoepitopes | MHC-I binding with PRIME | +| DAI | difference of `affinityWildType` and `affinityMutated` | DAI (only availble for MHC-I) | +| IEDB_Immunogenicity | IEDB Immunogenicity score for `affinityMutated` | IEDB immunogenicity | +| Improved_Binder_MHCI | ratio of `affinityWildType` and `affinityMutated` > 1.2 | self-similarity (only available for MHC-I) | +| Priority_score | combinatorial score of several features such as MHC binding, expression and VAF | Priority score | +| mutation_not_found_in_proteome | indicates if mutated amino acid sequence was not found in the WT proteome by exact search | Priority score | +| Selfsimilarity | score representing the similarity between `rankMutated` and `rankWildType` | self-similarity | +| Selfsimilarity_conserved_binder | score representing the similarity between `rankMutated` and `rankWildType` for conserved binder only | self-similarity (only available for MHC-I) | +| dissimilarity_score | score reflecting the dissimilarity of `affinityMutated` to the self-proteome | dissimilarity | +| Tcell_predictor_score | output score of T cell predictor model | Tcell predictor (only available for MHC-I) | +| amplitude | ratio of `affinityWildType` and `affinityMutated` for MHC-I and `rankWildType` and `rankMutated` for MHC-II | Generator rate | +| anchor_mutated | flag indicating if a mutation lies in an anchor position (i.e. position 2 or 9) | anchor/non-anchor (only available for MHC-I) | +| hex_alignment_score | the alignment score by HEX for `mutatedSequence` | HEX | +| number_of_mismatches | number of amino acids that do no match between `mutatedSequence` and `wildTypeSequence` | Priority score (only available for MHC-I) | +| pathogen_similarity | score representing the similarity of `mutatedSequence` to pathogen sequences in IEDB database | Recognition potential | +| recognition_potential | product of `amplitude` and `pathogenSimilarity` | Recognition potential (only available for MHC-I) | +| position_mutation | indicates position of the mutation in `mutatedSequence` | MHC I binding with netMHCpan (only available for MHC-I) | + + +## JSON format + +Only when using the command `neofox-epitope` an output file with the suffix "*_neoepitope_candidates_annotated.json*" is created. +This file contains neoepitope candidates information in JSON format. +The names within the models are described in **TABLE 2**. + +This is a dummy example of a "*_neoantigen_candidates.json*" file. +This file contains a list of neoantigen candidate models (for further information, please see [here](05_models.md). +To simplify, only one full neoantigen candidate model is shown: +```json +[{ + "patient_identifier": "Ptx", + "gene": "BRCA2", + "mutated_peptide": "AAAALAAAA", + "wild_type_peptide": "AAAAFAAAA", + "allele_mhc_i": "HLA-A*01:01", + "rna_expression": 0.519, + "imputed_gene_expression": 0.5, + "dna_variant_allele_frequency": 0.294, + "rna_variant_allele_frequency": 0.857, + "affinity_mutated": 2.567, + "rank_mutated": 0.898, + "affinity_wild_type": 1.023, + "rank_wild_type": 2.398, + "neofox_annotations": [...], + "external_annotations": [...] +}, { + "patient_identifier": "Ptx", + "gene": "BRCA2", + "mutated_peptide": "AAAAAAAAAAAAARAAAAAAAAAAAAA", + "wild_type_peptide": "AAAAAAAAAAAAAMAAAAAAAAAAAAA", + "isoform_mhc_i_i": "DRB1*01:01", + "rna_expression": 0.715, + "imputed_gene_expression": 0.5, + "dna_variant_allele_frequency": 0.173, + "rna_variant_allele_frequency": 0.556, + "affinity_mutated": 2.567, + "rank_mutated": 0.898, + "affinity_wild_type": 1.023, + "rank_wild_type": 2.398, + "neofox_annotations": [ ... ], + "external_annotations": [ ... ] +}] +``` + +Notice that for simplicity purposes both fields `neofox_annotations` and `external_annotations` are not shown above. +For further information, please see [here](05_models.md). +For an example of the NeoFox annotations section, see the previous section. diff --git a/docs/source/03_03_usage.md b/docs/source/03_03_usage.md old mode 100644 new mode 100755 index b04b15a2..6755997e --- a/docs/source/03_03_usage.md +++ b/docs/source/03_03_usage.md @@ -1,44 +1,50 @@ # Usage -There are two ways to use NeoFox for annotation of neoantigen candidates with neoantigen features: directly from the [command line](#command-line) or [programmatically](#api). +There are two ways to use NeoFox for annotation of neoantigen candidates with neoantigen features: directly from the [command line](#command-line), [docker](#running-from-docker) or [programmatically](#api). ## Command line +### Neoantigen-Mode + To call NeoFox from the command line, use the following command. Make sure that the requirements have been added to PATH as described [here](02_installation.md) or add a config file as described below: ````commandline -neofox --candidate-file/--json-file neoantigens_candidates.tab/neoantigens_candidates.json \ +neofox --input-file neoantigens_candidates.tsv \ --patient-data patient_data.txt \ --output-folder /path/to/out \ - --output-prefix out_prefix \ - [--with-table] [--with-json] [--num_cpus] [--affinity-threshold] [--config] [--patient-id] + [--output-prefix out_prefix] \ + [--organism human|mouse] \ + [--rank-mhci-threshold 2.0] \ + [--rank-mhcii-threshold 4.0] \ + [--num-cpus] \ + [--config] \ + [--patient-id] \ + [--with-all-neoepitopes] ```` where: -- `--candidate-file`: tab-separated values table with neoantigen candidates represented by long mutated peptide sequences as described [here](03_01_input_data.md#tabular-file-format) -- `--json-file`: JSON file neoantigens in NeoFox model format as described [here](03_01_input_data.md#json-file-format) +- `--input-file`: tab-separated values table with neoantigen candidates represented by long mutated peptide sequences + as described [here](03_01_input_data.md#tabular-file-format) (extensions .txt and .tsv) or JSON file neoantigens in + NeoFox model format as described [here](03_01_input_data.md#json-file-format) (extension .json) - `--patient-data`: a table of tab separated values containing metadata on the patient as described [here](03_01_input_data.md#file-with-patient-information) - `--output-folder`: path to the folder to which the output files should be written - `--output-prefix`: prefix for the output files (*optional*) -- `--with-table`: output file in [tabular](03_02_output_data.md#tabular-format) format (*default*, *optional*) -- `--with-json`: output file in [JSON](03_02_output_data.md#json-format) format (*optional*) +- `--with-all-neoepitopes`: output annotations for all MHC-I and MHC-II neoepitopes on all HLA alleles (*optional*) +- `--rank-mhci-threshold`: MHC-I epitopes with a netMHCpan predicted rank greater than or equal than this threshold will be filtered out (*optional*) +- `--rank-mhcii-threshold`: MHC-II epitopes with a netMHCIIpan predicted rank greater than or equal than this threshold will be filtered out (*optional*) - `--organism`: the organism to which the data corresponds. Possible values: [human, mouse]. Default value: human -- `--num_cpus`: number of CPUs to use (*optional*) +- `--num-cpus`: number of CPUs to use (*optional*) - `--config`: a config file with the paths to dependencies as shown below (*optional*) -- `--affinity-threshold`: an affinity value (*optional*) neoantigen candidates with a best predicted affinity greater - than or equal than this threshold will be not annotated with features that specifically model neoepitope recognition. A threshold that is commonly used is 500 nM. - `--patient-id`: patient identifier (*optional*, this is only relevant if the column `patientIdentifier` is missing in the candidate input file) -**PLEASE NOTE THE FOLLOWING HINTS**: -- provide the neoantigen candidate file either as `--candidate-file` or `--json-file` -- if no specific output format is selected, the output will be written in [tabular](03_02_output_data.md#tabular-format) format +**PLEASE NOTE THE FOLLOWING HINTS**: - if all expression values related to a patient are NA or `rnaExpression` is not given in the input file but the tumor type has been provided in the patient file, imputated expression will be used for the relevant features **EXAMPLE** This is an example to call NeoFox with a candidate-file and obtaining the annotated neoantigen candidates in [tabular](03_02_output_data.md#tabular-format) format: ````commandline -neofox --candidate-file neoantigens_candidates.tab \ +neofox --input-file neoantigens_candidates.tsv \ --patient-data patient_data.tab \ --output-folder /path/to/out \ --output-prefix test @@ -57,7 +63,31 @@ NEOFOX_MAKEBLASTDB=path/to/ncbi-blast-2.8.1+/bin/makeblastdb NEOFOX_PRIME=/path/to/PRIME/PRIME ```` -### Running from docker +### Neoepitope-Mode + +To call NeoFox over a list neoepitope candidates from the command line, use the following command. The configuration process is similar as described before: + +````commandline +neofox-epitope --input-file neoepitope_candidates.tsv \ + --output-folder /path/to/out \ + [--patient-data patient_data.txt \] + [--output-prefix out_prefix] \ + [--organism human|mouse] \ + [--num-cpus] \ + [--config] \ +```` + +where: +- `--input-file`: tab-separated values table with neoepitope candidates represented by mutated peptide sequences + as described [here](03_01_input_data.md#file-with-neoepitope-candidates) (extensions .txt and .tsv) +- `--patient-data`: a table of tab separated values containing metadata on the patient as described [here](03_01_input_data.md#file-with-patient-information) +- `--output-folder`: path to the folder to which the output files should be written +- `--output-prefix`: prefix for the output files (*optional*) +- `--organism`: the organism to which the data corresponds. Possible values: [human, mouse]. Default value: human +- `--num-cpus`: number of CPUs to use (*optional*) +- `--config`: a config file with the paths to dependencies as shown below (*optional*) + +## Running from docker In order to run the command line in a docker image, all of the above applies but some additional steps are required. @@ -95,7 +125,7 @@ Now, NeoFox can be run as following by mounting the volume as indicated. Note that the output folder needs to be specified within the volume, if the output from NeoFox should be recovered. ``` docker run -v neofox-volume:/app/data neofox-docker \ -neofox --candidate-file /app/data/test_model_file.txt \ +neofox --input-file /app/data/test_model_file.txt \ --patient-data /app/data/test_patient_info.txt \ --output-folder /app/data/output ``` @@ -165,6 +195,32 @@ patient = PatientFactory.build_patient( **WARNING**: alleles in homozygous state need to be provided twice, otherwise they are considered as hemizygous. For instance `["HLA-A*01:01"]` would be interpreted as hemizygous and `["HLA-A*01:01", "HLA-A*01:01"]` as homozygous. +### Create a neoepitope object + +Create a neoepitope candidate as indicated below. +The data will be internally validated. +Additional annotations with custom names are supported. + +```python +from neofox.model.factories import NeoepitopeFactory +from neofox.references.references import ReferenceFolder + + +hla_database = ReferenceFolder(organism='human').get_mhc_database() + +# create a neoantigen candidate using the factory +neoepitope = NeoepitopeFactory.build_neoepitope( + mutated_peptide="AAAARAAAA", + wild_type_peptide="AAAAMAAAA", + allele_mhc_i="HLA-A*01:01", + rna_expression=0.52, + rna_variant_allele_frequency=0.88, + dna_variant_allele_frequency=0.29, + my_custom_annotation="add any custom annotation as additional fields with any name", + organism='human', + mhc_database=hla_database +) +``` ### Run NeoFox @@ -185,6 +241,25 @@ where: **HINT**: process multiple neoantigens by passing a list of neoantigens and a list of patients to `NeoFox().get_annotations()`. +### Run NeoFox for neoepitopes + +Run NeoFox by passing the neoepitope and patients object to get the neoantigen features. +The output is a list of type `PredictedEpitope`: + +```python +from neofox.neofox_epitope import NeoFoxEpitope + +annotated_neoepitopes = NeoFoxEpitope(neoepitopes=[neoepitope], patients=[patient], num_cpus=2).get_annotations() +``` + +where: + - `neoepitopes`: a list of neoepitope objects + - `patients`: a list of patient objects + - `num_cpus`: number of CPUs to use (*optional*) + + +**HINT**: process multiple neoepitopes by passing a list of neoepitopes and a list of patients to `NeoFoxEpitope().get_annotations()`. + ### Data transformation @@ -195,7 +270,7 @@ a Pandas data frame or into JSON format, as described [here](03_02_output_data.m from neofox.model.conversion import ModelConverter # Pandas data frame -annotations_table = ModelConverter.annotations2table(neoantigens=annotated_neoantigens) +annotations_table = ModelConverter.annotations2neoantigens_table(neoantigens=annotated_neoantigens) # JSON neoantigen_json = ModelConverter.objects2json(model_objects=annotated_neoantigens) diff --git a/docs/source/03_04_examples.md b/docs/source/03_04_examples.md index ed8a7bf0..ab2d4e2d 100644 --- a/docs/source/03_04_examples.md +++ b/docs/source/03_04_examples.md @@ -2,5 +2,17 @@ ## Demo dataset -- :download:`Neoantigens input file ` -- :download:`Patients input file ` \ No newline at end of file +Sample input: +- :download:`Neoantigens input file ` +- :download:`Patients input file ` + +Command: +``` +neofox --input-file test_data.tsv --patient-data test_patients.tsv --output-folder your_folder --output-prefix test --with-all-neoepitopes --organism human +``` + +Sample output: +- :download:`Annotated neoantigens in tabular format ` +- :download:`Annotated neoantigens and neoepitopes in JSON format ` +- :download:`Annotated MHC-I neoepitopes in tabular format ` +- :download:`Annotated MHC-II neoepitopes in tabular format ` \ No newline at end of file diff --git a/docs/source/05_models.md b/docs/source/05_models.md index f1dc245c..6f257bc6 100644 --- a/docs/source/05_models.md +++ b/docs/source/05_models.md @@ -9,15 +9,15 @@ Protocol Buffers is employed to model Neofox's input and output data: neoantigen - [neoantigen.proto](#neoantigen.proto) - [Annotation](#neoantigen.Annotation) + - [Annotations](#neoantigen.Annotations) - [Mhc1](#neoantigen.Mhc1) - [Mhc2](#neoantigen.Mhc2) - [Mhc2Gene](#neoantigen.Mhc2Gene) - [Mhc2Isoform](#neoantigen.Mhc2Isoform) - [MhcAllele](#neoantigen.MhcAllele) - - [Mutation](#neoantigen.Mutation) - [Neoantigen](#neoantigen.Neoantigen) - - [NeoantigenAnnotations](#neoantigen.NeoantigenAnnotations) - [Patient](#neoantigen.Patient) + - [PredictedEpitope](#neoantigen.PredictedEpitope) - [Resource](#neoantigen.Resource) - [Mhc1Name](#neoantigen.Mhc1Name) @@ -52,6 +52,25 @@ This is a generic class to hold annotations from Neofox + + +### Annotations +A set of annotations for a neoantigen candidate + + +| Field | Type | Label | Description | +| ----- | ---- | ----- | ----------- | +| annotations | [Annotation](#neoantigen.Annotation) | repeated | List of annotations | +| annotator | [string](#string) | | The annotator | +| annotatorVersion | [string](#string) | | The version of the annotator | +| timestamp | [string](#string) | | A timestamp determined when the annotation was created | +| resources | [Resource](#neoantigen.Resource) | repeated | List of resources | + + + + + + ### Mhc1 @@ -140,23 +159,6 @@ or changes in expression. See http://hla.alleles.org/nomenclature/naming.html fo - - -### Mutation - - - -| Field | Type | Label | Description | -| ----- | ---- | ----- | ----------- | -| position | [int32](#int32) | repeated | The amino acid position within the neoantigen candidate sequence. 1-based, starting in the N-terminus | -| wildTypeXmer | [string](#string) | | Amino acid sequence of the WT corresponding to the neoantigen candidate sequence (IUPAC 1 letter codes) | -| mutatedXmer | [string](#string) | | Amino acid sequence of the neoantigen candidate (IUPAC 1 letter codes) | - - - - - - ### Neoantigen @@ -167,51 +169,66 @@ A neoantigen minimal definition | ----- | ---- | ----- | ----------- | | patientIdentifier | [string](#string) | | Patient identifier | | gene | [string](#string) | | The HGNC gene symbol or gene identifier | -| mutation | [Mutation](#neoantigen.Mutation) | | The mutation | +| position | [int32](#int32) | repeated | The amino acid position within the neoantigen candidate sequence. 1-based, starting in the N-terminus | +| wildTypeXmer | [string](#string) | | Amino acid sequence of the WT corresponding to the neoantigen candidate sequence (IUPAC 1 letter codes) | +| mutatedXmer | [string](#string) | | Amino acid sequence of the neoantigen candidate (IUPAC 1 letter codes) | | rnaExpression | [float](#float) | | Expression value of the transcript from RNA data. Range [0, +inf]. | | imputedGeneExpression | [float](#float) | | Expression value of the transcript from TCGA data. Range [0, +inf]. | | dnaVariantAlleleFrequency | [float](#float) | | Variant allele frequency from the DNA. Range [0.0, 1.0] | | rnaVariantAlleleFrequency | [float](#float) | | Variant allele frequency from the RNA. Range [0.0, 1.0] | -| neofoxAnnotations | [NeoantigenAnnotations](#neoantigen.NeoantigenAnnotations) | | The NeoFox neoantigen annotations | +| neofoxAnnotations | [Annotations](#neoantigen.Annotations) | | The NeoFox neoantigen annotations | | externalAnnotations | [Annotation](#neoantigen.Annotation) | repeated | List of external annotations | +| neoepitopesMhcI | [PredictedEpitope](#neoantigen.PredictedEpitope) | repeated | List of predicted neoepitopes for MHC-I with feature annotation (optional) | +| neoepitopesMhcII | [PredictedEpitope](#neoantigen.PredictedEpitope) | repeated | List of predicted neoepitopes for MHC-II with feature annotation (optional) | - + -### NeoantigenAnnotations -A set of annotations for a neoantigen +### Patient +The metadata required for analysis for a given patient + its patient identifier | Field | Type | Label | Description | | ----- | ---- | ----- | ----------- | -| annotations | [Annotation](#neoantigen.Annotation) | repeated | List of annotations | -| annotator | [string](#string) | | The annotator | -| annotatorVersion | [string](#string) | | The version of the annotator | -| timestamp | [string](#string) | | A timestamp determined when the annotation was created | -| resources | [Resource](#neoantigen.Resource) | repeated | List of resources | +| identifier | [string](#string) | | Patient identifier | +| isRnaAvailable | [bool](#bool) | | Is RNA expression available? | +| tumorType | [string](#string) | | Tumor entity in TCGA study abbrevation style as described here: https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations | +| mhc1 | [Mhc1](#neoantigen.Mhc1) | repeated | MHC I classic molecules | +| mhc2 | [Mhc2](#neoantigen.Mhc2) | repeated | MHC II classic molecules | - + + +### PredictedEpitope -### Patient -The metadata required for analysis for a given patient + its patient identifier | Field | Type | Label | Description | | ----- | ---- | ----- | ----------- | -| identifier | [string](#string) | | Patient identifier | -| isRnaAvailable | [bool](#bool) | | Is RNA expression available? | -| tumorType | [string](#string) | | Tumor entity in TCGA study abbrevation style as described here: https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations | -| mhc1 | [Mhc1](#neoantigen.Mhc1) | repeated | MHC I classic molecules | -| mhc2 | [Mhc2](#neoantigen.Mhc2) | repeated | MHC II classic molecules | +| position | [int32](#int32) | | Not sure that we need this... this is in the old PredictedEpitope model | +| mutatedPeptide | [string](#string) | | The mutated peptide | +| wildTypePeptide | [string](#string) | | Closest wild type peptide | +| alleleMhcI | [MhcAllele](#neoantigen.MhcAllele) | | MHC I allele | +| isoformMhcII | [Mhc2Isoform](#neoantigen.Mhc2Isoform) | | MHC II isoform | +| affinityMutated | [float](#float) | | MHC binding affinity for the mutated peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides | +| rankMutated | [float](#float) | | MHC binding rank for the mutated peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides | +| affinityWildType | [float](#float) | | MHC binding affinity for the wild type peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides | +| rankWildType | [float](#float) | | MHC binding rank for the wild type peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides | +| neofoxAnnotations | [Annotations](#neoantigen.Annotations) | | The NeoFox neoantigen annotations | +| patientIdentifier | [string](#string) | | Patient identifier | +| gene | [string](#string) | | The HGNC gene symbol or gene identifier | +| rnaExpression | [float](#float) | | Expression value of the transcript from RNA data. Range [0, +inf]. | +| imputedGeneExpression | [float](#float) | | Expression value of the transcript from TCGA data. Range [0, +inf]. | +| dnaVariantAlleleFrequency | [float](#float) | | Variant allele frequency from the DNA. Range [0.0, 1.0] | +| rnaVariantAlleleFrequency | [float](#float) | | Variant allele frequency from the RNA. Range [0.0, 1.0] | diff --git a/docs/source/_static/README b/docs/source/_static/README new file mode 100644 index 00000000..bc67afcf --- /dev/null +++ b/docs/source/_static/README @@ -0,0 +1,2 @@ +This is the output of running: +neofox --input-file docs/source/_static/test_data.tsv --patient-data docs/source/_static/test_patients.tsv --output-folder docs/source/_static/ --output-prefix test --with-all-neoepitopes --organism human \ No newline at end of file diff --git a/docs/source/_static/test_data.tsv b/docs/source/_static/test_data.tsv index 97ceaef3..600a49af 100755 --- a/docs/source/_static/test_data.tsv +++ b/docs/source/_static/test_data.tsv @@ -1,3 +1,3 @@ -patientIdentifier gene rnaExpression mutation.wildTypeXmer mutation.mutatedXmer rnaVariantAlleleFrequency dnaVariantAlleleFrequency +patientIdentifier gene rnaExpression wildTypeXmer mutatedXmer rnaVariantAlleleFrequency dnaVariantAlleleFrequency Ptx BRCA2 0.519506894 AAAAAAAAAAAAAFAAAAAAAAAAAAA AAAAAAAAAAAAALAAAAAAAAAAAAA 0.294 0.857 Ptx BRCA2 0.715756594 AAAAAAAAAAAAAMAAAAAAAAAAAAA AAAAAAAAAAAAARAAAAAAAAAAAAA 0.173 0.556 diff --git a/docs/source/_static/test_mhcII_epitope_candidates_annotated.tsv b/docs/source/_static/test_mhcII_epitope_candidates_annotated.tsv new file mode 100644 index 00000000..ccbf5551 --- /dev/null +++ b/docs/source/_static/test_mhcII_epitope_candidates_annotated.tsv @@ -0,0 +1,105 @@ +position mutatedPeptide wildTypePeptide affinityMutated rankMutated affinityWildType rankWildType patientIdentifier gene rnaExpression imputedGeneExpression dnaVariantAlleleFrequency rnaVariantAlleleFrequency isoformMhcII.name isoformMhcII.alphaChain.fullName isoformMhcII.alphaChain.name isoformMhcII.alphaChain.gene isoformMhcII.alphaChain.group isoformMhcII.alphaChain.protein isoformMhcII.betaChain.fullName isoformMhcII.betaChain.name isoformMhcII.betaChain.gene isoformMhcII.betaChain.group isoformMhcII.betaChain.protein patient_identifier IEDB_Immunogenicity MixMHC2pred_rank Selfsimilarity amplitude dissimilarity_score hex_alignment_score mutation_not_found_in_proteome pathogen_similarity +5 AAAAAAAAALAAAAA AAAAAAAAAFAAAAA 775.74 0.4 778.08 0.8 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36258 0.783 0.9822502911203502 2 1e-05 246 1 0 +6 AAAAAAAALAAAAAA AAAAAAAAFAAAAAA 812.22 0.39 812.51 1.61 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36258 0.532 0.981263768075705 4.1282 3e-05 389 1 0 +7 AAAAAAALAAAAAAA AAAAAAAFAAAAAAA 817.84 0.34 831.29 0.97 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36258 0.73 0.980933318805297 2.8529 0 322 1 0 +8 AAAAAALAAAAAAAA AAAAAAFAAAAAAAA 769.27 0.79 617.85 0.33 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36258 0.633 0.981263768075705 0.41772 1e-05 281 1 0 +9 AAAAALAAAAAAAAA AAAAAFAAAAAAAAA 707.77 0.37 543.14 0.05 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36258 0.555 0.98225029112035 0.13514 0 369 1 0 +10 AAAALAAAAAAAAAA AAAAFAAAAAAAAAA 667.61 0.1 505.19 0.01 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36258 0.349 0.9838711693573455 0.1 0 346 1 0 +11 AAALAAAAAAAAAAA AAAFAAAAAAAAAAA 612.72 0.04 465.8 0.0 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36095 0.575 0.9860888907728267 0 0 335 1 0 +12 AALAAAAAAAAAAAA AAFAAAAAAAAAAAA 703.69 0.18 560.78 0.04 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.39518 0.79 0.9888559259233813 0.22222 0 336 1 0 +13 ALAAAAAAAAAAAAA AFAAAAAAAAAAAAA 824.45 0.56 678.63 0.28 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.41148 1.24 0.9921226615333941 0.5 0 341 1 0 +5 AAAAAAAAALAAAAA AAAAAAAAAFAAAAA 633.73 7.18 491.05 6.89 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36258 0.783 0.9822502911203502 0.95961 1e-05 246 1 0 +6 AAAAAAAALAAAAAA AAAAAAAAFAAAAAA 653.53 5.99 547.35 6.37 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36258 0.532 0.981263768075705 1.0634 3e-05 389 1 0 +7 AAAAAAALAAAAAAA AAAAAAAFAAAAAAA 694.34 5.77 574.09 4.43 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36258 0.73 0.980933318805297 0.76776 0 322 1 0 +9 AAAAALAAAAAAAAA AAAAAFAAAAAAAAA 584.81 3.23 467.56 1.3 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36258 0.555 0.98225029112035 0.40248 0 369 1 0 +10 AAAALAAAAAAAAAA AAAAFAAAAAAAAAA 524.6 1.23 443.41 0.67 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36258 0.349 0.9838711693573455 0.54472 0 346 1 0 +11 AAALAAAAAAAAAAA AAAFAAAAAAAAAAA 495.4 0.78 452.64 0.47 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36095 0.575 0.9860888907728267 0.60256 0 335 1 0 +12 AALAAAAAAAAAAAA AAFAAAAAAAAAAAA 593.48 2.6 553.29 1.57 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.39518 0.79 0.9888559259233813 0.60385 0 336 1 0 +13 ALAAAAAAAAAAAAA AFAAAAAAAAAAAAA 756.49 7.57 713.32 5.31 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.41148 1.24 0.9921226615333941 0.70145 0 341 1 0 +9 AAAAALAAAAAAAAA AAAAAFAAAAAAAAA 278.25 8.94 259.76 11.67 0.0 0.0 0.0 0.0 HLA-DRB1*04:04 HLA-DRB1*04:04 HLA-DRB1*04:04 DRB1 04 04 Ptx 0.36258 0.555 0.98225029112035 1.3054 0 369 1 0 +10 AAAALAAAAAAAAAA AAAAFAAAAAAAAAA 179.85 4.6 199.63 7.28 0.0 0.0 0.0 0.0 HLA-DRB1*04:04 HLA-DRB1*04:04 HLA-DRB1*04:04 DRB1 04 04 Ptx 0.36258 0.349 0.9838711693573455 1.5826 0 346 1 0 +11 AAALAAAAAAAAAAA AAAFAAAAAAAAAAA 142.15 3.57 169.07 5.85 0.0 0.0 0.0 0.0 HLA-DRB1*04:04 HLA-DRB1*04:04 HLA-DRB1*04:04 DRB1 04 04 Ptx 0.36095 0.575 0.9860888907728267 1.6387 0 335 1 0 +12 AALAAAAAAAAAAAA AAFAAAAAAAAAAAA 193.07 9.05 213.1 13.85 0.0 0.0 0.0 0.0 HLA-DRB1*04:04 HLA-DRB1*04:04 HLA-DRB1*04:04 DRB1 04 04 Ptx 0.39518 0.79 0.9888559259233813 1.5304 0 336 1 0 +11 AAALAAAAAAAAAAA AAAFAAAAAAAAAAA 584.1 7.6 535.79 9.79 0.0 0.0 0.0 0.0 HLA-DRB1*15:01 HLA-DRB1*15:01 HLA-DRB1*15:01 DRB1 15 01 Ptx 0.36095 0.575 0.9860888907728267 1.2882 0 335 1 0 +5 AAAAAAAAALAAAAA AAAAAAAAAFAAAAA 15.77 0.03 20.72 0.27 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36258 0.783 0.9822502911203502 9 1e-05 246 1 0 +6 AAAAAAAALAAAAAA AAAAAAAAFAAAAAA 15.65 0.04 22.47 0.41 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36258 0.532 0.981263768075705 10.25 3e-05 389 1 0 +7 AAAAAAALAAAAAAA AAAAAAAFAAAAAAA 17.14 0.07 23.89 0.35 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36258 0.73 0.980933318805297 5 0 322 1 0 +8 AAAAAALAAAAAAAA AAAAAAFAAAAAAAA 21.13 0.39 27.61 0.4 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36258 0.633 0.981263768075705 1.0256 1e-05 281 1 0 +9 AAAAALAAAAAAAAA AAAAAFAAAAAAAAA 22.5 0.33 29.02 0.29 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36258 0.555 0.98225029112035 0.87879 0 369 1 0 +10 AAAALAAAAAAAAAA AAAAFAAAAAAAAAA 22.35 0.13 27.86 0.22 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36258 0.349 0.9838711693573455 1.6923 0 346 1 0 +11 AAALAAAAAAAAAAA AAAFAAAAAAAAAAA 21.21 0.07 26.86 0.11 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.36095 0.575 0.9860888907728267 1.5714 0 335 1 0 +12 AALAAAAAAAAAAAA AAFAAAAAAAAAAAA 23.24 0.08 27.69 0.08 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.39518 0.79 0.9888559259233813 1 0 336 1 0 +13 ALAAAAAAAAAAAAA AFAAAAAAAAAAAAA 28.07 0.1 29.39 0.09 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.41148 1.24 0.9921226615333941 0.9 0 341 1 0 +5 AAAAAAAAALAAAAA AAAAAAAAAFAAAAA 3415.63 2.93 2585.77 2.12 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.36258 0.783 0.9822502911203502 0.72355 1e-05 246 1 0 +6 AAAAAAAALAAAAAA AAAAAAAAFAAAAAA 2958.8 1.55 2154.78 0.99 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.36258 0.532 0.981263768075705 0.63871 3e-05 389 1 0 +7 AAAAAAALAAAAAAA AAAAAAAFAAAAAAA 2710.11 1.53 2144.94 1.06 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.36258 0.73 0.980933318805297 0.69281 0 322 1 0 +8 AAAAAALAAAAAAAA AAAAAAFAAAAAAAA 2459.74 3.18 1299.57 0.73 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.36258 0.633 0.981263768075705 0.22956 1e-05 281 1 0 +9 AAAAALAAAAAAAAA AAAAAFAAAAAAAAA 1990.01 1.81 940.81 0.32 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.36258 0.555 0.98225029112035 0.1768 0 369 1 0 +10 AAAALAAAAAAAAAA AAAAFAAAAAAAAAA 1561.25 0.83 698.42 0.16 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.36258 0.349 0.9838711693573455 0.19277 0 346 1 0 +11 AAALAAAAAAAAAAA AAAFAAAAAAAAAAA 1308.41 0.62 542.44 0.12 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.36095 0.575 0.9860888907728267 0.19355 0 335 1 0 +12 AALAAAAAAAAAAAA AAFAAAAAAAAAAAA 1572.88 1.72 671.31 0.5 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.39518 0.79 0.9888559259233813 0.2907 0 336 1 0 +13 ALAAAAAAAAAAAAA AFAAAAAAAAAAAAA 2211.13 4.46 929.37 2.68 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.41148 1.24 0.9921226615333941 0.6009 0 341 1 0 +5 AAAAAAAAALAAAAA AAAAAAAAAFAAAAA 988.63 4.52 658.16 3.67 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36258 0.783 0.9822502911203502 0.81195 1e-05 246 1 0 +6 AAAAAAAALAAAAAA AAAAAAAAFAAAAAA 1040.53 4.39 702.02 2.95 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36258 0.532 0.981263768075705 0.67198 3e-05 389 1 0 +7 AAAAAAALAAAAAAA AAAAAAAFAAAAAAA 998.08 3.75 717.94 2.67 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36258 0.73 0.980933318805297 0.712 0 322 1 0 +8 AAAAAALAAAAAAAA AAAAAAFAAAAAAAA 1075.48 5.69 695.28 1.57 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36258 0.633 0.981263768075705 0.27592 1e-05 281 1 0 +9 AAAAALAAAAAAAAA AAAAAFAAAAAAAAA 1116.09 5.4 744.22 1.03 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36258 0.555 0.98225029112035 0.19074 0 369 1 0 +10 AAAALAAAAAAAAAA AAAAFAAAAAAAAAA 1119.5 4.0 768.85 0.61 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36258 0.349 0.9838711693573455 0.1525 0 346 1 0 +11 AAALAAAAAAAAAAA AAAFAAAAAAAAAAA 1106.38 2.76 834.87 0.37 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.36095 0.575 0.9860888907728267 0.13406 0 335 1 0 +12 AALAAAAAAAAAAAA AAFAAAAAAAAAAAA 1248.22 4.25 1005.79 1.58 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.39518 0.79 0.9888559259233813 0.37176 0 336 1 0 +13 ALAAAAAAAAAAAAA AFAAAAAAAAAAAAA 1435.11 5.84 1159.82 3.96 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.41148 1.24 0.9921226615333941 0.67808 0 341 1 0 +1 AAAAAAAAAAAAARA AAAAAAAAAAAAAMA 1511.84 2.77 1323.48 4.81 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.41886 1.52 0.9886461245535874 1.7365 0 349 1 0 +2 AAAAAAAAAAAARAA AAAAAAAAAAAAMAA 1355.29 2.24 1185.64 4.1 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42214 2.28 0.9840173345571813 1.8304 0 369 1 0 +3 AAAAAAAAAAARAAA AAAAAAAAAAAMAAA 1458.83 2.75 1103.83 4.27 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42337 1.65 0.9801341662894537 1.5527 0 339 1 0 +4 AAAAAAAAAARAAAA AAAAAAAAAAMAAAA 1244.72 2.32 970.05 4.64 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 2.26 0.9770469333485702 2 0 336 1 0 +5 AAAAAAAAARAAAAA AAAAAAAAAMAAAAA 1313.38 2.71 947.08 4.03 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 2.32 0.9748046960972773 1.4871 0.00382 289 1 0 +6 AAAAAAAARAAAAAA AAAAAAAAMAAAAAA 1483.05 3.03 917.88 4.47 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 1.85 0.9734459201423025 1.4752 0.99994 239 1 0 +7 AAAAAAARAAAAAAA AAAAAAAMAAAAAAA 1461.69 2.32 915.62 5.39 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 2.14 0.9729918667892427 2.3233 1e-05 409 1 0 +8 AAAAAARAAAAAAAA AAAAAAMAAAAAAAA 1508.36 3.04 950.08 4.72 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 0.15 0.9734459201423027 1.5526 1e-05 226 1 0 +9 AAAAARAAAAAAAAA AAAAAMAAAAAAAAA 1466.44 2.71 907.21 3.1 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 0.0473 0.9748046960972773 1.1439 1e-05 284 1 0 +10 AAAARAAAAAAAAAA AAAAMAAAAAAAAAA 1395.27 2.26 897.36 1.96 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 0.0176 0.9770469333485704 0.86726 0.99239 346 1 0 +11 AAARAAAAAAAAAAA AAAMAAAAAAAAAAA 1464.32 2.77 995.3 1.78 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*03:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42419 0.048 0.9801341662894535 0.6426 0 343 1 0 +1 AAAAAAAAAAAAARA AAAAAAAAAAAAAMA 5840.63 2.39 5798.69 6.28 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.41886 1.52 0.9886461245535874 2.6276 0 349 1 0 +2 AAAAAAAAAAAARAA AAAAAAAAAAAAMAA 5167.9 1.5 4819.91 4.45 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.42214 2.28 0.9840173345571813 2.9667 0 369 1 0 +3 AAAAAAAAAAARAAA AAAAAAAAAAAMAAA 4422.07 1.26 3998.71 3.26 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.42337 1.65 0.9801341662894537 2.5873 0 339 1 0 +4 AAAAAAAAAARAAAA AAAAAAAAAAMAAAA 4015.1 1.59 3412.01 3.32 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.42378 2.26 0.9770469333485702 2.088 0 336 1 0 +5 AAAAAAAAARAAAAA AAAAAAAAAMAAAAA 4553.8 2.74 3508.29 4.28 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.42378 2.32 0.9748046960972773 1.562 0.00382 289 1 0 +6 AAAAAAAARAAAAAA AAAAAAAAMAAAAAA 4749.3 1.95 3495.48 3.68 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.42378 1.85 0.9734459201423025 1.8872 0.99994 239 1 0 +7 AAAAAAARAAAAAAA AAAAAAAMAAAAAAA 3586.24 0.44 3424.69 2.93 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.42378 2.14 0.9729918667892427 6.6591 1e-05 409 1 0 +8 AAAAAARAAAAAAAA AAAAAAMAAAAAAAA 668.32 0.0 3016.89 3.7 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.42378 0.15 0.9734459201423027 NA 1e-05 226 1 0 +9 AAAAARAAAAAAAAA AAAAAMAAAAAAAAA 394.13 0.0 2471.8 2.33 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.42378 0.0473 0.9748046960972773 NA 1e-05 284 1 0 +10 AAAARAAAAAAAAAA AAAAMAAAAAAAAAA 296.13 0.0 2096.25 1.31 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.42378 0.0176 0.9770469333485704 NA 0.99239 346 1 0 +11 AAARAAAAAAAAAAA AAAMAAAAAAAAAAA 264.65 0.0 1835.64 1.03 0.0 0.0 0.0 0.0 HLA-DPA1*01:03-DPB1*06:01 HLA-DPA1*01:03 HLA-DPA1*01:03 DPA1 01 03 HLA-DPB1*06:01 HLA-DPB1*06:01 DPB1 06 01 Ptx 0.42419 0.048 0.9801341662894535 NA 0 343 1 0 +1 AAAAAAAAAAAAARA AAAAAAAAAAAAAMA 34.83 0.03 33.35 0.09 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.41886 1.52 0.9886461245535874 3 0 349 1 0 +2 AAAAAAAAAAAARAA AAAAAAAAAAAAMAA 30.04 0.03 26.8 0.09 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42214 2.28 0.9840173345571813 3 0 369 1 0 +3 AAAAAAAAAAARAAA AAAAAAAAAAAMAAA 28.35 0.03 19.73 0.15 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42337 1.65 0.9801341662894537 5 0 339 1 0 +4 AAAAAAAAAARAAAA AAAAAAAAAAMAAAA 23.91 0.01 15.95 0.06 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 2.26 0.9770469333485702 6 0 336 1 0 +5 AAAAAAAAARAAAAA AAAAAAAAAMAAAAA 21.67 0.01 14.36 0.05 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 2.32 0.9748046960972773 5 0.00382 289 1 0 +6 AAAAAAAARAAAAAA AAAAAAAAMAAAAAA 22.36 0.02 14.69 0.05 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 1.85 0.9734459201423025 2.5 0.99994 239 1 0 +7 AAAAAAARAAAAAAA AAAAAAAMAAAAAAA 22.74 0.03 16.47 0.07 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 2.14 0.9729918667892427 2.3333 1e-05 409 1 0 +8 AAAAAARAAAAAAAA AAAAAAMAAAAAAAA 27.93 0.06 17.78 0.27 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 0.15 0.9734459201423027 4.5 1e-05 226 1 0 +9 AAAAARAAAAAAAAA AAAAAMAAAAAAAAA 30.43 0.07 18.01 0.13 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 0.0473 0.9748046960972773 1.8571 1e-05 284 1 0 +10 AAAARAAAAAAAAAA AAAAMAAAAAAAAAA 32.5 0.05 17.62 0.06 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 0.0176 0.9770469333485704 1.2 0.99239 346 1 0 +11 AAARAAAAAAAAAAA AAAMAAAAAAAAAAA 34.6 0.05 17.16 0.04 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*06:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42419 0.048 0.9801341662894535 0.8 0 343 1 0 +1 AAAAAAAAAAAAARA AAAAAAAAAAAAAMA 851.97 0.14 1053.73 0.64 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.41886 1.52 0.9886461245535874 4.5714 0 349 1 0 +2 AAAAAAAAAAAARAA AAAAAAAAAAAAMAA 677.62 0.13 898.47 0.58 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42214 2.28 0.9840173345571813 4.4615 0 369 1 0 +3 AAAAAAAAAAARAAA AAAAAAAAAAAMAAA 622.85 0.14 752.33 0.82 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42337 1.65 0.9801341662894537 5.8571 0 339 1 0 +4 AAAAAAAAAARAAAA AAAAAAAAAAMAAAA 516.75 0.08 691.14 0.46 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 2.26 0.9770469333485702 5.75 0 336 1 0 +5 AAAAAAAAARAAAAA AAAAAAAAAMAAAAA 482.08 0.07 689.4 0.35 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 2.32 0.9748046960972773 5 0.00382 289 1 0 +6 AAAAAAAARAAAAAA AAAAAAAAMAAAAAA 507.17 0.08 721.2 0.45 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 1.85 0.9734459201423025 5.625 0.99994 239 1 0 +7 AAAAAAARAAAAAAA AAAAAAAMAAAAAAA 497.47 0.07 710.44 0.48 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 2.14 0.9729918667892427 6.8571 1e-05 409 1 0 +8 AAAAAARAAAAAAAA AAAAAAMAAAAAAAA 528.6 0.13 628.26 0.83 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 0.15 0.9734459201423027 6.3846 1e-05 226 1 0 +9 AAAAARAAAAAAAAA AAAAAMAAAAAAAAA 540.55 0.06 555.02 0.19 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 0.0473 0.9748046960972773 3.1667 1e-05 284 1 0 +10 AAAARAAAAAAAAAA AAAAMAAAAAAAAAA 543.22 0.05 551.31 0.05 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42378 0.0176 0.9770469333485704 1 0.99239 346 1 0 +11 AAARAAAAAAAAAAA AAAMAAAAAAAAAAA 631.3 0.11 527.33 0.03 0.0 0.0 0.0 0.0 HLA-DQA1*03:01-DQB1*06:02 HLA-DQA1*03:01 HLA-DQA1*03:01 DQA1 03 01 HLA-DQB1*06:02 HLA-DQB1*06:02 DQB1 06 02 Ptx 0.42419 0.048 0.9801341662894535 0.27273 0 343 1 0 +1 AAAAAAAAAAAAARA AAAAAAAAAAAAAMA 1037.44 3.94 860.3 5.49 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.41886 1.52 0.9886461245535874 1.3934 0 349 1 0 +2 AAAAAAAAAAAARAA AAAAAAAAAAAAMAA 923.33 3.37 703.91 4.81 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42214 2.28 0.9840173345571813 1.4273 0 369 1 0 +3 AAAAAAAAAAARAAA AAAAAAAAAAAMAAA 933.47 3.57 621.56 4.73 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42337 1.65 0.9801341662894537 1.3249 0 339 1 0 +4 AAAAAAAAAARAAAA AAAAAAAAAAMAAAA 784.97 2.47 591.88 5.99 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 2.26 0.9770469333485702 2.4251 0 336 1 0 +5 AAAAAAAAARAAAAA AAAAAAAAAMAAAAA 791.86 3.32 626.93 9.48 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 2.32 0.9748046960972773 2.8554 0.00382 289 1 0 +6 AAAAAAAARAAAAAA AAAAAAAAMAAAAAA 826.67 3.38 674.0 8.54 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 1.85 0.9734459201423025 2.5266 0.99994 239 1 0 +7 AAAAAAARAAAAAAA AAAAAAAMAAAAAAA 793.58 2.06 661.79 5.5 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 2.14 0.9729918667892427 2.6699 1e-05 409 1 0 +8 AAAAAARAAAAAAAA AAAAAAMAAAAAAAA 759.9 2.5 592.84 6.39 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 0.15 0.9734459201423027 2.556 1e-05 226 1 0 +9 AAAAARAAAAAAAAA AAAAAMAAAAAAAAA 738.78 1.21 533.46 2.07 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 0.0473 0.9748046960972773 1.7107 1e-05 284 1 0 +10 AAAARAAAAAAAAAA AAAAMAAAAAAAAAA 713.14 0.69 491.92 0.84 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42378 0.0176 0.9770469333485704 1.2174 0.99239 346 1 0 +11 AAARAAAAAAAAAAA AAAMAAAAAAAAAAA 774.9 1.37 494.5 0.56 0.0 0.0 0.0 0.0 HLA-DQA1*01:02-DQB1*03:02 HLA-DQA1*01:02 HLA-DQA1*01:02 DQA1 01 02 HLA-DQB1*03:02 HLA-DQB1*03:02 DQB1 03 02 Ptx 0.42419 0.048 0.9801341662894535 0.40876 0 343 1 0 diff --git a/docs/source/_static/test_mhcI_epitope_candidates_annotated.tsv b/docs/source/_static/test_mhcI_epitope_candidates_annotated.tsv new file mode 100644 index 00000000..3f718ba5 --- /dev/null +++ b/docs/source/_static/test_mhcI_epitope_candidates_annotated.tsv @@ -0,0 +1,2 @@ +patient_identifier position mutatedPeptide wildTypePeptide affinityMutated rankMutated affinityWildType rankWildType patientIdentifier gene rnaExpression imputedGeneExpression dnaVariantAlleleFrequency rnaVariantAlleleFrequency alleleMhcI.fullName alleleMhcI.name alleleMhcI.gene alleleMhcI.group alleleMhcI.protein DAI IEDB_Immunogenicity Improved_Binder_MHCI MixMHCpred_affinity_score MixMHCpred_rank PRIME_affinity_score PRIME_rank Priority_score Selfsimilarity Selfsimilarity_conserved_binder Tcell_predictor_score amplitude anchor_mutated dissimilarity_score hex_alignment_score mutation_not_found_in_proteome number_of_mismatches pathogen_similarity position_mutation recognition_potential +Ptx 9.0 AAAAARAAA AAAAAMAAA 199.38 1.346 1697.4 4.347 0.0 0.0 0.0 0.0 HLA-B*07:02 HLA-B*07:02 B 07 02 1498 0.19477 1 0.51774 0.08 0.18732 0.06 0.32903 0.966307602001344 NA 0.5173265191848659 5.6409 0 1 132 1 1 0 6 0 diff --git a/docs/source/_static/test_neoantigen_candidates_annotated.json b/docs/source/_static/test_neoantigen_candidates_annotated.json index ffaf8f8f..f7d7506f 100644 --- a/docs/source/_static/test_neoantigen_candidates_annotated.json +++ b/docs/source/_static/test_neoantigen_candidates_annotated.json @@ -1 +1 @@ -[{"patient_identifier":"Ptx","gene":"BRCA2","mutation":{"position":[14],"wild_type_xmer":"AAAAAAAAAAAAAFAAAAAAAAAAAAA","mutated_xmer":"AAAAAAAAAAAAALAAAAAAAAAAAAA"},"rna_expression":0.5195068939999999,"imputed_gene_expression":0.5365996317,"dna_variant_allele_frequency":0.857,"rna_variant_allele_frequency":0.294,"neofox_annotations":{"annotations":[{"name":"Best_rank_MHCI_score","value":"4.389"},{"name":"Best_rank_MHCI_score_epitope","value":"AAALAAAAA"},{"name":"Best_rank_MHCI_score_allele","value":"HLA-B*07:02"},{"name":"Best_affinity_MHCI_score","value":"1018.2"},{"name":"Best_affinity_MHCI_epitope","value":"AALAAAAAAA"},{"name":"Best_affinity_MHCI_allele","value":"HLA-A*02:01"},{"name":"Best_rank_MHCI_9mer_score","value":"4.389"},{"name":"Best_rank_MHCI_9mer_epitope","value":"AAALAAAAA"},{"name":"Best_rank_MHCI_9mer_allele","value":"HLA-B*07:02"},{"name":"Best_affinity_MHCI_9mer_score","value":"1534.8"},{"name":"Best_affinity_MHCI_9mer_allele","value":"HLA-C*03:04"},{"name":"Best_affinity_MHCI_9mer_epitope","value":"AAALAAAAA"},{"name":"Best_affinity_MHCI_score_WT","value":"8184.1"},{"name":"Best_affinity_MHCI_epitope_WT","value":"AAFAAAAAAA"},{"name":"Best_affinity_MHCI_allele_WT","value":"HLA-A*02:01"},{"name":"Best_rank_MHCI_score_WT","value":"5.068"},{"name":"Best_rank_MHCI_score_epitope_WT","value":"AAAFAAAAA"},{"name":"Best_rank_MHCI_score_allele_WT","value":"HLA-B*07:02"},{"name":"Best_rank_MHCI_9mer_score_WT","value":"5.068"},{"name":"Best_rank_MHCI_9mer_epitope_WT","value":"AAAFAAAAA"},{"name":"Best_rank_MHCI_9mer_allele_WT","value":"HLA-B*07:02"},{"name":"Best_affinity_MHCI_9mer_score_WT","value":"1180.8"},{"name":"Best_affinity_MHCI_9mer_allele_WT","value":"HLA-C*03:04"},{"name":"Best_affinity_MHCI_9mer_epitope_WT","value":"AAAFAAAAA"},{"name":"Generator_rate_MHCI","value":"0"},{"name":"Generator_rate_CDN_MHCI","value":"0"},{"name":"Generator_rate_ADN_MHCI","value":"0"},{"name":"PHBR_I","value":"8.5743"},{"name":"Best_affinity_MHCI_9mer_position_mutation","value":"4"},{"name":"Best_affinity_MHCI_9mer_anchor_mutated","value":"0"},{"name":"Best_rank_MHCII_score","value":"0.03"},{"name":"Best_rank_MHCII_score_epitope","value":"AAAAAAAAALAAAAA"},{"name":"Best_rank_MHCII_score_allele","value":"HLA-DQA1*01:02-DQB1*06:02"},{"name":"Best_affinity_MHCII_score","value":"15.65"},{"name":"Best_affinity_MHCII_epitope","value":"AAAAAAAALAAAAAA"},{"name":"Best_affinity_MHCII_allele","value":"HLA-DQA1*01:02-DQB1*06:02"},{"name":"Best_rank_MHCII_score_WT","value":"0.27"},{"name":"Best_rank_MHCII_score_epitope_WT","value":"AAAAAAAAAFAAAAA"},{"name":"Best_rank_MHCII_score_allele_WT","value":"HLA-DQA1*01:02-DQB1*06:02"},{"name":"Best_affinity_MHCII_score_WT","value":"22.47"},{"name":"Best_affinity_MHCII_epitope_WT","value":"AAAAAAAAFAAAAAA"},{"name":"Best_affinity_MHCII_allele_WT","value":"HLA-DQA1*01:02-DQB1*06:02"},{"name":"PHBR_II","value":"NA"},{"name":"Generator_rate_MHCII","value":"25"},{"name":"Generator_rate_CDN_MHCII","value":"21"},{"name":"Generator_rate_ADN_MHCII","value":"4"},{"name":"MixMHCpred_best_peptide","value":"AAALAAAAA"},{"name":"MixMHCpred_best_score","value":"0.3862"},{"name":"MixMHCpred_best_rank","value":"0.3"},{"name":"MixMHCpred_best_allele","value":"HLA-B*07:02"},{"name":"PRIME_best_peptide","value":"AAALAAAAA"},{"name":"PRIME_best_score","value":"0.18933"},{"name":"PRIME_best_rank","value":"0.04"},{"name":"PRIME_best_allele","value":"HLA-B*07:02"},{"name":"MixMHC2pred_best_peptide","value":"AAAALAAAAAAAAAAA"},{"name":"MixMHC2pred_best_rank","value":"0.296"},{"name":"MixMHC2pred_best_allele","value":"HLA-DPA1*01:03-DPB1*06:01"},{"name":"Expression_mutated_transcript","value":"0.15274"},{"name":"mutation_not_found_in_proteome","value":"1"},{"name":"Amplitude_MHCI_affinity_9mer","value":"0.56811"},{"name":"Amplitude_MHCI_affinity","value":"2.3262"},{"name":"Amplitude_MHCII_rank","value":"9"},{"name":"Pathogensimiliarity_MHCI_9mer","value":"0"},{"name":"Recognition_Potential_MHCI_9mer","value":"0"},{"name":"Pathogensimiliarity_MHCII","value":"0"},{"name":"DAI_MHCI_affinity","value":"7165.9"},{"name":"CDN_MHCI","value":"0"},{"name":"ADN_MHCI","value":"0"},{"name":"CDN_MHCII","value":"1"},{"name":"ADN_MHCII","value":"1"},{"name":"Tcell_predictor_score","value":"0.39121828442992074"},{"name":"Improved_Binder_MHCI","value":"0"},{"name":"Selfsimilarity_MHCII","value":"0.981263768075705"},{"name":"Selfsimilarity_MHCI","value":"0.9763465205057597"},{"name":"Selfsimilarity_MHCI_conserved_binder","value":"0.9763465205057597"},{"name":"Number_of_mismatches_MCHI","value":"1"},{"name":"Priority_score","value":"0"},{"name":"Neoag_immunogenicity","value":"13.16998"},{"name":"IEDB_Immunogenicity_MHCI","value":"0.18182"},{"name":"IEDB_Immunogenicity_MHCII","value":"0.36258"},{"name":"Dissimilarity_MHCI","value":"1"},{"name":"Dissimilarity_MHCII","value":"6e-05"},{"name":"vaxrank_binding_score","value":"0.01301"},{"name":"vaxrank_total_score","value":"0.00199"},{"name":"Hex_alignment_score_MHCI","value":"177"},{"name":"Hex_alignment_score_MHCII","value":"389"}],"annotator":"NeoFox","annotator_version":"0.5.4.dev5","timestamp":"20211104154037935536","resources":[{"name":"netMHCpan","version":"4.1"},{"name":"netMHCIIpan","version":"4.0"},{"name":"mixMHCpred","version":"2.1"},{"name":"mixMHC2pred","version":"1.2"},{"name":"IEDB","url":"http://www.iedb.org/downloader.php?file_name=doc/tcell_full_v3.zip","hash":"d225ab671ef375400d387354a5f450ff","download_timestamp":"20211103221051"},{"name":"Human Ensembl proteome","version":"2021_03","url":"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000005640/UP000005640_9606.fasta.gz","hash":"a41303fd38380ca0321cf8a3d9beb4bc","download_timestamp":"20211103221051"},{"name":"Mouse Ensembl proteome","version":"2021_03","url":"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000000589/UP000000589_10090.fasta.gz","hash":"27a5de8c1eca42eebaf56400945cf7cb","download_timestamp":"20211103221051"},{"name":"IMGT/HLA database","version":"3.46.0","url":"https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/Allelelist.txt","hash":"5a7618819498b88d0790bf4d58975d13","download_timestamp":"20211103221051"}]}},{"patient_identifier":"Ptx","gene":"BRCA2","mutation":{"position":[14],"wild_type_xmer":"AAAAAAAAAAAAAMAAAAAAAAAAAAA","mutated_xmer":"AAAAAAAAAAAAARAAAAAAAAAAAAA"},"rna_expression":0.715756594,"imputed_gene_expression":0.5365996317,"dna_variant_allele_frequency":0.556,"rna_variant_allele_frequency":0.17300000000000001,"neofox_annotations":{"annotations":[{"name":"Best_rank_MHCI_score","value":"1.346"},{"name":"Best_rank_MHCI_score_epitope","value":"AAAAARAAA"},{"name":"Best_rank_MHCI_score_allele","value":"HLA-B*07:02"},{"name":"Best_affinity_MHCI_score","value":"199.38"},{"name":"Best_affinity_MHCI_epitope","value":"AAAAARAAA"},{"name":"Best_affinity_MHCI_allele","value":"HLA-B*07:02"},{"name":"Best_rank_MHCI_9mer_score","value":"1.346"},{"name":"Best_rank_MHCI_9mer_epitope","value":"AAAAARAAA"},{"name":"Best_rank_MHCI_9mer_allele","value":"HLA-B*07:02"},{"name":"Best_affinity_MHCI_9mer_score","value":"199.38"},{"name":"Best_affinity_MHCI_9mer_allele","value":"HLA-B*07:02"},{"name":"Best_affinity_MHCI_9mer_epitope","value":"AAAAARAAA"},{"name":"Best_affinity_MHCI_score_WT","value":"1697.4"},{"name":"Best_affinity_MHCI_epitope_WT","value":"AAAAAMAAA"},{"name":"Best_affinity_MHCI_allele_WT","value":"HLA-B*07:02"},{"name":"Best_rank_MHCI_score_WT","value":"4.347"},{"name":"Best_rank_MHCI_score_epitope_WT","value":"AAAAAMAAA"},{"name":"Best_rank_MHCI_score_allele_WT","value":"HLA-B*07:02"},{"name":"Best_rank_MHCI_9mer_score_WT","value":"4.347"},{"name":"Best_rank_MHCI_9mer_epitope_WT","value":"AAAAAMAAA"},{"name":"Best_rank_MHCI_9mer_allele_WT","value":"HLA-B*07:02"},{"name":"Best_affinity_MHCI_9mer_score_WT","value":"1697.4"},{"name":"Best_affinity_MHCI_9mer_allele_WT","value":"HLA-B*07:02"},{"name":"Best_affinity_MHCI_9mer_epitope_WT","value":"AAAAAMAAA"},{"name":"Generator_rate_MHCI","value":"0"},{"name":"Generator_rate_CDN_MHCI","value":"0"},{"name":"Generator_rate_ADN_MHCI","value":"0"},{"name":"PHBR_I","value":"4.6879"},{"name":"Best_affinity_MHCI_9mer_position_mutation","value":"6"},{"name":"Best_affinity_MHCI_9mer_anchor_mutated","value":"0"},{"name":"Best_rank_MHCII_score","value":"0"},{"name":"Best_rank_MHCII_score_epitope","value":"AAAAAARAAAAAAAA"},{"name":"Best_rank_MHCII_score_allele","value":"HLA-DPA1*01:03-DPB1*06:01"},{"name":"Best_affinity_MHCII_score","value":"21.67"},{"name":"Best_affinity_MHCII_epitope","value":"AAAAAAAAARAAAAA"},{"name":"Best_affinity_MHCII_allele","value":"HLA-DQA1*01:02-DQB1*06:02"},{"name":"Best_rank_MHCII_score_WT","value":"3.7"},{"name":"Best_rank_MHCII_score_epitope_WT","value":"AAAAAAMAAAAAAAA"},{"name":"Best_rank_MHCII_score_allele_WT","value":"HLA-DPA1*01:03-DPB1*06:01"},{"name":"Best_affinity_MHCII_score_WT","value":"14.36"},{"name":"Best_affinity_MHCII_epitope_WT","value":"AAAAAAAAAMAAAAA"},{"name":"Best_affinity_MHCII_allele_WT","value":"HLA-DQA1*01:02-DQB1*06:02"},{"name":"PHBR_II","value":"NA"},{"name":"Generator_rate_MHCII","value":"45"},{"name":"Generator_rate_CDN_MHCII","value":"28"},{"name":"Generator_rate_ADN_MHCII","value":"17"},{"name":"MixMHCpred_best_peptide","value":"AAAAAARAA"},{"name":"MixMHCpred_best_score","value":"0.51814"},{"name":"MixMHCpred_best_rank","value":"0.08"},{"name":"MixMHCpred_best_allele","value":"HLA-B*07:02"},{"name":"PRIME_best_peptide","value":"AAAAAARAA"},{"name":"PRIME_best_score","value":"0.18732"},{"name":"PRIME_best_rank","value":"0.06"},{"name":"PRIME_best_allele","value":"HLA-B*07:02"},{"name":"MixMHC2pred_best_peptide","value":"AAAARAAAAAAAAAA"},{"name":"MixMHC2pred_best_rank","value":"0.0176"},{"name":"MixMHC2pred_best_allele","value":"HLA-DPA1*01:03-DPB1*06:01"},{"name":"Expression_mutated_transcript","value":"0.12383"},{"name":"mutation_not_found_in_proteome","value":"1"},{"name":"Amplitude_MHCI_affinity_9mer","value":"5.6409"},{"name":"Amplitude_MHCI_affinity","value":"5.6409"},{"name":"Amplitude_MHCII_rank","value":"NA"},{"name":"Pathogensimiliarity_MHCI_9mer","value":"0"},{"name":"Recognition_Potential_MHCI_9mer","value":"0"},{"name":"Pathogensimiliarity_MHCII","value":"0"},{"name":"DAI_MHCI_affinity","value":"1498"},{"name":"CDN_MHCI","value":"0"},{"name":"ADN_MHCI","value":"0"},{"name":"CDN_MHCII","value":"1"},{"name":"ADN_MHCII","value":"NA"},{"name":"Tcell_predictor_score","value":"0.5173265191848659"},{"name":"Improved_Binder_MHCI","value":"1"},{"name":"Selfsimilarity_MHCII","value":"0.9748046960972773"},{"name":"Selfsimilarity_MHCI","value":"0.966307602001344"},{"name":"Selfsimilarity_MHCI_conserved_binder","value":"NA"},{"name":"Number_of_mismatches_MCHI","value":"1"},{"name":"Priority_score","value":"0.32903"},{"name":"Neoag_immunogenicity","value":"13.16998"},{"name":"IEDB_Immunogenicity_MHCI","value":"0.19477"},{"name":"IEDB_Immunogenicity_MHCII","value":"0.42378"},{"name":"Dissimilarity_MHCI","value":"1"},{"name":"Dissimilarity_MHCII","value":"0.4943"},{"name":"vaxrank_binding_score","value":"0.80336"},{"name":"vaxrank_total_score","value":"0.09948"},{"name":"Hex_alignment_score_MHCI","value":"132"},{"name":"Hex_alignment_score_MHCII","value":"289"}],"annotator":"NeoFox","annotator_version":"0.5.4.dev5","timestamp":"20211104154016918216","resources":[{"name":"netMHCpan","version":"4.1"},{"name":"netMHCIIpan","version":"4.0"},{"name":"mixMHCpred","version":"2.1"},{"name":"mixMHC2pred","version":"1.2"},{"name":"IEDB","url":"http://www.iedb.org/downloader.php?file_name=doc/tcell_full_v3.zip","hash":"d225ab671ef375400d387354a5f450ff","download_timestamp":"20211103221051"},{"name":"Human Ensembl proteome","version":"2021_03","url":"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000005640/UP000005640_9606.fasta.gz","hash":"a41303fd38380ca0321cf8a3d9beb4bc","download_timestamp":"20211103221051"},{"name":"Mouse Ensembl proteome","version":"2021_03","url":"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000000589/UP000000589_10090.fasta.gz","hash":"27a5de8c1eca42eebaf56400945cf7cb","download_timestamp":"20211103221051"},{"name":"IMGT/HLA database","version":"3.46.0","url":"https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/Allelelist.txt","hash":"5a7618819498b88d0790bf4d58975d13","download_timestamp":"20211103221051"}]}}] \ No newline at end of file +[{"patient_identifier":"Ptx","gene":"BRCA2","position":[14],"wild_type_xmer":"AAAAAAAAAAAAAFAAAAAAAAAAAAA","mutated_xmer":"AAAAAAAAAAAAALAAAAAAAAAAAAA","rna_expression":0.5195068939999999,"imputed_gene_expression":0.5365996317,"dna_variant_allele_frequency":0.857,"rna_variant_allele_frequency":null,"neofox_annotations":{"annotations":[{"name":"Best_rank_MHCI_score","value":"4.389"},{"name":"Best_rank_MHCI_score_epitope","value":"AAALAAAAA"},{"name":"Best_rank_MHCI_score_allele","value":"HLA-B*07:02"},{"name":"Best_rank_MHCI_score_WT","value":"5.068"},{"name":"Best_rank_MHCI_score_epitope_WT","value":"AAAFAAAAA"},{"name":"Best_affinity_MHCI_score","value":"1018.2"},{"name":"Best_affinity_MHCI_epitope","value":"AALAAAAAAA"},{"name":"Best_affinity_MHCI_allele","value":"HLA-A*02:01"},{"name":"Best_affinity_MHCI_score_WT","value":"8184.1"},{"name":"Best_affinity_MHCI_epitope_WT","value":"AAFAAAAAAA"},{"name":"Best_rank_MHCI_9mer_score","value":"4.389"},{"name":"Best_rank_MHCI_9mer_epitope","value":"AAALAAAAA"},{"name":"Best_rank_MHCI_9mer_allele","value":"HLA-B*07:02"},{"name":"Best_rank_MHCI_9mer_score_WT","value":"5.068"},{"name":"Best_rank_MHCI_9mer_epitope_WT","value":"AAAFAAAAA"},{"name":"Best_affinity_MHCI_9mer_score","value":"1534.8"},{"name":"Best_affinity_MHCI_9mer_allele","value":"HLA-C*03:04"},{"name":"Best_affinity_MHCI_9mer_epitope","value":"AAALAAAAA"},{"name":"Best_affinity_MHCI_9mer_score_WT","value":"1180.8"},{"name":"Best_affinity_MHCI_9mer_epitope_WT","value":"AAAFAAAAA"},{"name":"PHBR_I","value":"8.5743"},{"name":"Generator_rate_MHCI","value":"0"},{"name":"Generator_rate_CDN_MHCI","value":"0"},{"name":"Generator_rate_ADN_MHCI","value":"0"},{"name":"Best_affinity_MHCI_9mer_position_mutation","value":"4"},{"name":"Best_affinity_MHCI_9mer_anchor_mutated","value":"0"},{"name":"Best_rank_MHCII_score","value":"0.03"},{"name":"Best_rank_MHCII_score_epitope","value":"AAAAAAAAALAAAAA"},{"name":"Best_rank_MHCII_score_allele","value":"HLA-DQA1*01:02-DQB1*06:02"},{"name":"Best_rank_MHCII_score_WT","value":"0.27"},{"name":"Best_rank_MHCII_score_epitope_WT","value":"AAAAAAAAAFAAAAA"},{"name":"Best_affinity_MHCII_score","value":"15.65"},{"name":"Best_affinity_MHCII_epitope","value":"AAAAAAAALAAAAAA"},{"name":"Best_affinity_MHCII_allele","value":"HLA-DQA1*01:02-DQB1*06:02"},{"name":"Best_affinity_MHCII_score_WT","value":"22.47"},{"name":"Best_affinity_MHCII_epitope_WT","value":"AAAAAAAAFAAAAAA"},{"name":"PHBR_II","value":"NA"},{"name":"Generator_rate_MHCII","value":"25"},{"name":"Generator_rate_CDN_MHCII","value":"21"},{"name":"Generator_rate_ADN_MHCII","value":"4"},{"name":"MixMHCpred_best_peptide","value":"AAALAAAAA"},{"name":"MixMHCpred_best_score","value":"0.3862"},{"name":"MixMHCpred_best_rank","value":"0.3"},{"name":"MixMHCpred_best_allele","value":"HLA-B*07:02"},{"name":"PRIME_best_peptide","value":"AAALAAAAA"},{"name":"PRIME_best_score","value":"0.18933"},{"name":"PRIME_best_rank","value":"0.04"},{"name":"PRIME_best_allele","value":"HLA-B*07:02"},{"name":"MixMHC2pred_best_peptide","value":"AAAALAAAAAAAAAAA"},{"name":"MixMHC2pred_best_rank","value":"0.296"},{"name":"MixMHC2pred_best_allele","value":"HLA-DPA1*01:03-DPB1*06:01"},{"name":"Expression_mutated_transcript","value":"0.44522"},{"name":"mutation_not_found_in_proteome","value":"1"},{"name":"Amplitude_MHCI_affinity_9mer","value":"0.56811"},{"name":"Amplitude_MHCI_affinity","value":"2.3262"},{"name":"Amplitude_MHCII_rank","value":"9"},{"name":"Pathogensimiliarity_MHCI_9mer","value":"0"},{"name":"Recognition_Potential_MHCI_9mer","value":"0"},{"name":"Pathogensimiliarity_MHCII","value":"0"},{"name":"DAI_MHCI_affinity","value":"7165.9"},{"name":"CDN_MHCI","value":"0"},{"name":"ADN_MHCI","value":"0"},{"name":"CDN_MHCII","value":"1"},{"name":"ADN_MHCII","value":"1"},{"name":"Tcell_predictor_score","value":"0.39121828442992074"},{"name":"Improved_Binder_MHCI","value":"0"},{"name":"Selfsimilarity_MHCII","value":"0.981263768075705"},{"name":"Selfsimilarity_MHCI","value":"0.9763465205057597"},{"name":"Selfsimilarity_MHCI_conserved_binder","value":"0.9763465205057597"},{"name":"Number_of_mismatches_MCHI","value":"1"},{"name":"Priority_score","value":"0"},{"name":"Neoag_immunogenicity","value":"13.16998"},{"name":"IEDB_Immunogenicity_MHCI","value":"0.18182"},{"name":"IEDB_Immunogenicity_MHCII","value":"0.36258"},{"name":"Dissimilarity_MHCI","value":"1"},{"name":"Dissimilarity_MHCII","value":"3e-05"},{"name":"vaxrank_binding_score","value":"0.01301"},{"name":"vaxrank_total_score","value":"0.00579"},{"name":"Hex_alignment_score_MHCI","value":"177"},{"name":"Hex_alignment_score_MHCII","value":"389"}],"annotator":"NeoFox","annotator_version":"0.7.0dev13","timestamp":"20220803154540091490","resources":[{"name":"netMHCpan","version":"4.1"},{"name":"netMHCIIpan","version":"4.0"},{"name":"mixMHCpred","version":"2.1"},{"name":"mixMHC2pred","version":"1.2"},{"name":"IEDB","url":"http://www.iedb.org/downloader.php?file_name=doc/tcell_full_v3.zip","hash":"09f325faadbe664b52d2bf0c1dd2b043","download_timestamp":"20220404152052"},{"name":"IMGT/HLA database","version":"3.47.0","url":"https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/Allelelist.txt","hash":"c3f7f4d9b72d96efc3d384bd6942f84a","download_timestamp":"20220404152052"},{"name":"Human Uniprot proteome","version":"2022_01","url":"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000005640/UP000005640_9606.fasta.gz","hash":"c11e17f38507b1f1de4137498a37e6bc","download_timestamp":"20220404152052"},{"name":"Human Uniprot proteome isoforms","version":"2022_01","url":"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000005640/UP000005640_9606_additional.fasta.gz","hash":"9394e800212b6fbe45a183ae4b5a582c","download_timestamp":"20220404152052"},{"name":"Mouse Uniprot proteome","version":"2022_01","url":"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000000589/UP000000589_10090.fasta.gz","hash":"cc88e5c5c01bd586702b8db79a7da738","download_timestamp":"20220404152052"},{"name":"Mouse Uniprot proteome isoforms","version":"2022_01","url":"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000000589/UP000000589_10090_additional.fasta.gz","hash":"390d3b7302c50fdc06df34ced194aa34","download_timestamp":"20220404152052"}]},"neoepitopes_mhc_i_i":[{"position":5,"mutated_peptide":"AAAAAAAAALAAAAA","wild_type_peptide":"AAAAAAAAAFAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":775.74,"rank_mutated":0.4,"affinity_wild_type":778.08,"rank_wild_type":0.8},{"position":6,"mutated_peptide":"AAAAAAAALAAAAAA","wild_type_peptide":"AAAAAAAAFAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":812.22,"rank_mutated":0.39,"affinity_wild_type":812.51,"rank_wild_type":1.61},{"position":7,"mutated_peptide":"AAAAAAALAAAAAAA","wild_type_peptide":"AAAAAAAFAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":817.84,"rank_mutated":0.34,"affinity_wild_type":831.29,"rank_wild_type":0.97},{"position":8,"mutated_peptide":"AAAAAALAAAAAAAA","wild_type_peptide":"AAAAAAFAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":769.27,"rank_mutated":0.79,"affinity_wild_type":617.85,"rank_wild_type":0.33},{"position":9,"mutated_peptide":"AAAAALAAAAAAAAA","wild_type_peptide":"AAAAAFAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":707.77,"rank_mutated":0.37,"affinity_wild_type":543.14,"rank_wild_type":0.05},{"position":10,"mutated_peptide":"AAAALAAAAAAAAAA","wild_type_peptide":"AAAAFAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":667.61,"rank_mutated":0.1,"affinity_wild_type":505.19,"rank_wild_type":0.01},{"position":11,"mutated_peptide":"AAALAAAAAAAAAAA","wild_type_peptide":"AAAFAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":612.72,"rank_mutated":0.04,"affinity_wild_type":465.8},{"position":12,"mutated_peptide":"AALAAAAAAAAAAAA","wild_type_peptide":"AAFAAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":703.69,"rank_mutated":0.18,"affinity_wild_type":560.78,"rank_wild_type":0.04},{"position":13,"mutated_peptide":"ALAAAAAAAAAAAAA","wild_type_peptide":"AFAAAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":824.45,"rank_mutated":0.56,"affinity_wild_type":678.63,"rank_wild_type":0.28},{"position":5,"mutated_peptide":"AAAAAAAAALAAAAA","wild_type_peptide":"AAAAAAAAAFAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":633.73,"rank_mutated":7.18,"affinity_wild_type":491.05,"rank_wild_type":6.89},{"position":6,"mutated_peptide":"AAAAAAAALAAAAAA","wild_type_peptide":"AAAAAAAAFAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":653.53,"rank_mutated":5.99,"affinity_wild_type":547.35,"rank_wild_type":6.37},{"position":7,"mutated_peptide":"AAAAAAALAAAAAAA","wild_type_peptide":"AAAAAAAFAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":694.34,"rank_mutated":5.77,"affinity_wild_type":574.09,"rank_wild_type":4.43},{"position":9,"mutated_peptide":"AAAAALAAAAAAAAA","wild_type_peptide":"AAAAAFAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":584.81,"rank_mutated":3.23,"affinity_wild_type":467.56,"rank_wild_type":1.3},{"position":10,"mutated_peptide":"AAAALAAAAAAAAAA","wild_type_peptide":"AAAAFAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":524.6,"rank_mutated":1.23,"affinity_wild_type":443.41,"rank_wild_type":0.67},{"position":11,"mutated_peptide":"AAALAAAAAAAAAAA","wild_type_peptide":"AAAFAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":495.4,"rank_mutated":0.78,"affinity_wild_type":452.64,"rank_wild_type":0.47},{"position":12,"mutated_peptide":"AALAAAAAAAAAAAA","wild_type_peptide":"AAFAAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":593.48,"rank_mutated":2.6,"affinity_wild_type":553.29,"rank_wild_type":1.57},{"position":13,"mutated_peptide":"ALAAAAAAAAAAAAA","wild_type_peptide":"AFAAAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":756.49,"rank_mutated":7.57,"affinity_wild_type":713.32,"rank_wild_type":5.31},{"position":9,"mutated_peptide":"AAAAALAAAAAAAAA","wild_type_peptide":"AAAAAFAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DRB1*04:04","beta_chain":{"full_name":"HLA-DRB1*04:04","name":"HLA-DRB1*04:04","gene":"DRB1","group":"04","protein":"04"}},"affinity_mutated":278.25,"rank_mutated":8.94,"affinity_wild_type":259.76,"rank_wild_type":11.67},{"position":10,"mutated_peptide":"AAAALAAAAAAAAAA","wild_type_peptide":"AAAAFAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DRB1*04:04","beta_chain":{"full_name":"HLA-DRB1*04:04","name":"HLA-DRB1*04:04","gene":"DRB1","group":"04","protein":"04"}},"affinity_mutated":179.85,"rank_mutated":4.6,"affinity_wild_type":199.63,"rank_wild_type":7.28},{"position":11,"mutated_peptide":"AAALAAAAAAAAAAA","wild_type_peptide":"AAAFAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DRB1*04:04","beta_chain":{"full_name":"HLA-DRB1*04:04","name":"HLA-DRB1*04:04","gene":"DRB1","group":"04","protein":"04"}},"affinity_mutated":142.15,"rank_mutated":3.57,"affinity_wild_type":169.07,"rank_wild_type":5.85},{"position":12,"mutated_peptide":"AALAAAAAAAAAAAA","wild_type_peptide":"AAFAAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DRB1*04:04","beta_chain":{"full_name":"HLA-DRB1*04:04","name":"HLA-DRB1*04:04","gene":"DRB1","group":"04","protein":"04"}},"affinity_mutated":193.07,"rank_mutated":9.05,"affinity_wild_type":213.1,"rank_wild_type":13.85},{"position":11,"mutated_peptide":"AAALAAAAAAAAAAA","wild_type_peptide":"AAAFAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DRB1*15:01","beta_chain":{"full_name":"HLA-DRB1*15:01","name":"HLA-DRB1*15:01","gene":"DRB1","group":"15","protein":"01"}},"affinity_mutated":584.1,"rank_mutated":7.6,"affinity_wild_type":535.79,"rank_wild_type":9.79},{"position":5,"mutated_peptide":"AAAAAAAAALAAAAA","wild_type_peptide":"AAAAAAAAAFAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":15.77,"rank_mutated":0.03,"affinity_wild_type":20.72,"rank_wild_type":0.27},{"position":6,"mutated_peptide":"AAAAAAAALAAAAAA","wild_type_peptide":"AAAAAAAAFAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":15.65,"rank_mutated":0.04,"affinity_wild_type":22.47,"rank_wild_type":0.41},{"position":7,"mutated_peptide":"AAAAAAALAAAAAAA","wild_type_peptide":"AAAAAAAFAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":17.14,"rank_mutated":0.07,"affinity_wild_type":23.89,"rank_wild_type":0.35},{"position":8,"mutated_peptide":"AAAAAALAAAAAAAA","wild_type_peptide":"AAAAAAFAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":21.13,"rank_mutated":0.39,"affinity_wild_type":27.61,"rank_wild_type":0.4},{"position":9,"mutated_peptide":"AAAAALAAAAAAAAA","wild_type_peptide":"AAAAAFAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":22.5,"rank_mutated":0.33,"affinity_wild_type":29.02,"rank_wild_type":0.29},{"position":10,"mutated_peptide":"AAAALAAAAAAAAAA","wild_type_peptide":"AAAAFAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":22.35,"rank_mutated":0.13,"affinity_wild_type":27.86,"rank_wild_type":0.22},{"position":11,"mutated_peptide":"AAALAAAAAAAAAAA","wild_type_peptide":"AAAFAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":21.21,"rank_mutated":0.07,"affinity_wild_type":26.86,"rank_wild_type":0.11},{"position":12,"mutated_peptide":"AALAAAAAAAAAAAA","wild_type_peptide":"AAFAAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":23.24,"rank_mutated":0.08,"affinity_wild_type":27.69,"rank_wild_type":0.08},{"position":13,"mutated_peptide":"ALAAAAAAAAAAAAA","wild_type_peptide":"AFAAAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":28.07,"rank_mutated":0.1,"affinity_wild_type":29.39,"rank_wild_type":0.09},{"position":5,"mutated_peptide":"AAAAAAAAALAAAAA","wild_type_peptide":"AAAAAAAAAFAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":3415.63,"rank_mutated":2.93,"affinity_wild_type":2585.77,"rank_wild_type":2.12},{"position":6,"mutated_peptide":"AAAAAAAALAAAAAA","wild_type_peptide":"AAAAAAAAFAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":2958.8,"rank_mutated":1.55,"affinity_wild_type":2154.78,"rank_wild_type":0.99},{"position":7,"mutated_peptide":"AAAAAAALAAAAAAA","wild_type_peptide":"AAAAAAAFAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":2710.11,"rank_mutated":1.53,"affinity_wild_type":2144.94,"rank_wild_type":1.06},{"position":8,"mutated_peptide":"AAAAAALAAAAAAAA","wild_type_peptide":"AAAAAAFAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":2459.74,"rank_mutated":3.18,"affinity_wild_type":1299.57,"rank_wild_type":0.73},{"position":9,"mutated_peptide":"AAAAALAAAAAAAAA","wild_type_peptide":"AAAAAFAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":1990.01,"rank_mutated":1.81,"affinity_wild_type":940.81,"rank_wild_type":0.32},{"position":10,"mutated_peptide":"AAAALAAAAAAAAAA","wild_type_peptide":"AAAAFAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":1561.25,"rank_mutated":0.83,"affinity_wild_type":698.42,"rank_wild_type":0.16},{"position":11,"mutated_peptide":"AAALAAAAAAAAAAA","wild_type_peptide":"AAAFAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":1308.41,"rank_mutated":0.62,"affinity_wild_type":542.44,"rank_wild_type":0.12},{"position":12,"mutated_peptide":"AALAAAAAAAAAAAA","wild_type_peptide":"AAFAAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":1572.88,"rank_mutated":1.72,"affinity_wild_type":671.31,"rank_wild_type":0.5},{"position":13,"mutated_peptide":"ALAAAAAAAAAAAAA","wild_type_peptide":"AFAAAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":2211.13,"rank_mutated":4.46,"affinity_wild_type":929.37,"rank_wild_type":2.68},{"position":5,"mutated_peptide":"AAAAAAAAALAAAAA","wild_type_peptide":"AAAAAAAAAFAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":988.63,"rank_mutated":4.52,"affinity_wild_type":658.16,"rank_wild_type":3.67},{"position":6,"mutated_peptide":"AAAAAAAALAAAAAA","wild_type_peptide":"AAAAAAAAFAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1040.53,"rank_mutated":4.39,"affinity_wild_type":702.02,"rank_wild_type":2.95},{"position":7,"mutated_peptide":"AAAAAAALAAAAAAA","wild_type_peptide":"AAAAAAAFAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":998.08,"rank_mutated":3.75,"affinity_wild_type":717.94,"rank_wild_type":2.67},{"position":8,"mutated_peptide":"AAAAAALAAAAAAAA","wild_type_peptide":"AAAAAAFAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1075.48,"rank_mutated":5.69,"affinity_wild_type":695.28,"rank_wild_type":1.57},{"position":9,"mutated_peptide":"AAAAALAAAAAAAAA","wild_type_peptide":"AAAAAFAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1116.09,"rank_mutated":5.4,"affinity_wild_type":744.22,"rank_wild_type":1.03},{"position":10,"mutated_peptide":"AAAALAAAAAAAAAA","wild_type_peptide":"AAAAFAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1119.5,"rank_mutated":4.0,"affinity_wild_type":768.85,"rank_wild_type":0.61},{"position":11,"mutated_peptide":"AAALAAAAAAAAAAA","wild_type_peptide":"AAAFAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1106.38,"rank_mutated":2.76,"affinity_wild_type":834.87,"rank_wild_type":0.37},{"position":12,"mutated_peptide":"AALAAAAAAAAAAAA","wild_type_peptide":"AAFAAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1248.22,"rank_mutated":4.25,"affinity_wild_type":1005.79,"rank_wild_type":1.58},{"position":13,"mutated_peptide":"ALAAAAAAAAAAAAA","wild_type_peptide":"AFAAAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1435.11,"rank_mutated":5.84,"affinity_wild_type":1159.82,"rank_wild_type":3.96}]},{"patient_identifier":"Ptx","gene":"BRCA2","position":[14],"wild_type_xmer":"AAAAAAAAAAAAAMAAAAAAAAAAAAA","mutated_xmer":"AAAAAAAAAAAAARAAAAAAAAAAAAA","rna_expression":0.715756594,"imputed_gene_expression":0.5365996317,"dna_variant_allele_frequency":0.556,"rna_variant_allele_frequency":null,"neofox_annotations":{"annotations":[{"name":"Best_rank_MHCI_score","value":"1.346"},{"name":"Best_rank_MHCI_score_epitope","value":"AAAAARAAA"},{"name":"Best_rank_MHCI_score_allele","value":"HLA-B*07:02"},{"name":"Best_rank_MHCI_score_WT","value":"4.347"},{"name":"Best_rank_MHCI_score_epitope_WT","value":"AAAAAMAAA"},{"name":"Best_affinity_MHCI_score","value":"199.38"},{"name":"Best_affinity_MHCI_epitope","value":"AAAAARAAA"},{"name":"Best_affinity_MHCI_allele","value":"HLA-B*07:02"},{"name":"Best_affinity_MHCI_score_WT","value":"1697.4"},{"name":"Best_affinity_MHCI_epitope_WT","value":"AAAAAMAAA"},{"name":"Best_rank_MHCI_9mer_score","value":"1.346"},{"name":"Best_rank_MHCI_9mer_epitope","value":"AAAAARAAA"},{"name":"Best_rank_MHCI_9mer_allele","value":"HLA-B*07:02"},{"name":"Best_rank_MHCI_9mer_score_WT","value":"4.347"},{"name":"Best_rank_MHCI_9mer_epitope_WT","value":"AAAAAMAAA"},{"name":"Best_affinity_MHCI_9mer_score","value":"199.38"},{"name":"Best_affinity_MHCI_9mer_allele","value":"HLA-B*07:02"},{"name":"Best_affinity_MHCI_9mer_epitope","value":"AAAAARAAA"},{"name":"Best_affinity_MHCI_9mer_score_WT","value":"1697.4"},{"name":"Best_affinity_MHCI_9mer_epitope_WT","value":"AAAAAMAAA"},{"name":"PHBR_I","value":"4.6189"},{"name":"Generator_rate_MHCI","value":"0"},{"name":"Generator_rate_CDN_MHCI","value":"0"},{"name":"Generator_rate_ADN_MHCI","value":"0"},{"name":"Best_affinity_MHCI_9mer_position_mutation","value":"6"},{"name":"Best_affinity_MHCI_9mer_anchor_mutated","value":"0"},{"name":"Best_rank_MHCII_score","value":"0"},{"name":"Best_rank_MHCII_score_epitope","value":"AAARAAAAAAAAAAA"},{"name":"Best_rank_MHCII_score_allele","value":"HLA-DPA1*01:03-DPB1*06:01"},{"name":"Best_rank_MHCII_score_WT","value":"1.03"},{"name":"Best_rank_MHCII_score_epitope_WT","value":"AAAMAAAAAAAAAAA"},{"name":"Best_affinity_MHCII_score","value":"21.67"},{"name":"Best_affinity_MHCII_epitope","value":"AAAAAAAAARAAAAA"},{"name":"Best_affinity_MHCII_allele","value":"HLA-DQA1*01:02-DQB1*06:02"},{"name":"Best_affinity_MHCII_score_WT","value":"14.36"},{"name":"Best_affinity_MHCII_epitope_WT","value":"AAAAAAAAAMAAAAA"},{"name":"PHBR_II","value":"NA"},{"name":"Generator_rate_MHCII","value":"45"},{"name":"Generator_rate_CDN_MHCII","value":"28"},{"name":"Generator_rate_ADN_MHCII","value":"17"},{"name":"MixMHCpred_best_peptide","value":"AAAAAARAA"},{"name":"MixMHCpred_best_score","value":"0.51814"},{"name":"MixMHCpred_best_rank","value":"0.08"},{"name":"MixMHCpred_best_allele","value":"HLA-B*07:02"},{"name":"PRIME_best_peptide","value":"AAAAARAAA"},{"name":"PRIME_best_score","value":"0.18732"},{"name":"PRIME_best_rank","value":"0.06"},{"name":"PRIME_best_allele","value":"HLA-B*07:02"},{"name":"MixMHC2pred_best_peptide","value":"AAAARAAAAAAAAAA"},{"name":"MixMHC2pred_best_rank","value":"0.0176"},{"name":"MixMHC2pred_best_allele","value":"HLA-DPA1*01:03-DPB1*06:01"},{"name":"Expression_mutated_transcript","value":"0.39796"},{"name":"mutation_not_found_in_proteome","value":"1"},{"name":"Amplitude_MHCI_affinity_9mer","value":"5.6409"},{"name":"Amplitude_MHCI_affinity","value":"5.6409"},{"name":"Amplitude_MHCII_rank","value":"NA"},{"name":"Pathogensimiliarity_MHCI_9mer","value":"0"},{"name":"Recognition_Potential_MHCI_9mer","value":"0"},{"name":"Pathogensimiliarity_MHCII","value":"0"},{"name":"DAI_MHCI_affinity","value":"1498"},{"name":"CDN_MHCI","value":"0"},{"name":"ADN_MHCI","value":"0"},{"name":"CDN_MHCII","value":"1"},{"name":"ADN_MHCII","value":"NA"},{"name":"Tcell_predictor_score","value":"0.5173265191848659"},{"name":"Improved_Binder_MHCI","value":"1"},{"name":"Selfsimilarity_MHCII","value":"0.9748046960972773"},{"name":"Selfsimilarity_MHCI","value":"0.966307602001344"},{"name":"Selfsimilarity_MHCI_conserved_binder","value":"NA"},{"name":"Number_of_mismatches_MCHI","value":"1"},{"name":"Priority_score","value":"0.32903"},{"name":"Neoag_immunogenicity","value":"13.16998"},{"name":"IEDB_Immunogenicity_MHCI","value":"0.19477"},{"name":"IEDB_Immunogenicity_MHCII","value":"0.42378"},{"name":"Dissimilarity_MHCI","value":"1"},{"name":"Dissimilarity_MHCII","value":"0.00382"},{"name":"vaxrank_binding_score","value":"0.80336"},{"name":"vaxrank_total_score","value":"0.31971"},{"name":"Hex_alignment_score_MHCI","value":"132"},{"name":"Hex_alignment_score_MHCII","value":"289"}],"annotator":"NeoFox","annotator_version":"0.7.0dev13","timestamp":"20220803154540101853","resources":[{"name":"netMHCpan","version":"4.1"},{"name":"netMHCIIpan","version":"4.0"},{"name":"mixMHCpred","version":"2.1"},{"name":"mixMHC2pred","version":"1.2"},{"name":"IEDB","url":"http://www.iedb.org/downloader.php?file_name=doc/tcell_full_v3.zip","hash":"09f325faadbe664b52d2bf0c1dd2b043","download_timestamp":"20220404152052"},{"name":"IMGT/HLA database","version":"3.47.0","url":"https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/Allelelist.txt","hash":"c3f7f4d9b72d96efc3d384bd6942f84a","download_timestamp":"20220404152052"},{"name":"Human Uniprot proteome","version":"2022_01","url":"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000005640/UP000005640_9606.fasta.gz","hash":"c11e17f38507b1f1de4137498a37e6bc","download_timestamp":"20220404152052"},{"name":"Human Uniprot proteome isoforms","version":"2022_01","url":"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000005640/UP000005640_9606_additional.fasta.gz","hash":"9394e800212b6fbe45a183ae4b5a582c","download_timestamp":"20220404152052"},{"name":"Mouse Uniprot proteome","version":"2022_01","url":"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000000589/UP000000589_10090.fasta.gz","hash":"cc88e5c5c01bd586702b8db79a7da738","download_timestamp":"20220404152052"},{"name":"Mouse Uniprot proteome isoforms","version":"2022_01","url":"https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000000589/UP000000589_10090_additional.fasta.gz","hash":"390d3b7302c50fdc06df34ced194aa34","download_timestamp":"20220404152052"}]},"neoepitopes_mhc_i":[{"position":9,"mutated_peptide":"AAAAARAAA","wild_type_peptide":"AAAAAMAAA","allele_mhc_i":{"full_name":"HLA-B*07:02","name":"HLA-B*07:02","gene":"B","group":"07","protein":"02"},"affinity_mutated":199.38,"rank_mutated":1.346,"affinity_wild_type":1697.4,"rank_wild_type":4.347}],"neoepitopes_mhc_i_i":[{"position":1,"mutated_peptide":"AAAAAAAAAAAAARA","wild_type_peptide":"AAAAAAAAAAAAAMA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1511.84,"rank_mutated":2.77,"affinity_wild_type":1323.48,"rank_wild_type":4.81},{"position":2,"mutated_peptide":"AAAAAAAAAAAARAA","wild_type_peptide":"AAAAAAAAAAAAMAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1355.29,"rank_mutated":2.24,"affinity_wild_type":1185.64,"rank_wild_type":4.1},{"position":3,"mutated_peptide":"AAAAAAAAAAARAAA","wild_type_peptide":"AAAAAAAAAAAMAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1458.83,"rank_mutated":2.75,"affinity_wild_type":1103.83,"rank_wild_type":4.27},{"position":4,"mutated_peptide":"AAAAAAAAAARAAAA","wild_type_peptide":"AAAAAAAAAAMAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1244.72,"rank_mutated":2.32,"affinity_wild_type":970.05,"rank_wild_type":4.64},{"position":5,"mutated_peptide":"AAAAAAAAARAAAAA","wild_type_peptide":"AAAAAAAAAMAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1313.38,"rank_mutated":2.71,"affinity_wild_type":947.08,"rank_wild_type":4.03},{"position":6,"mutated_peptide":"AAAAAAAARAAAAAA","wild_type_peptide":"AAAAAAAAMAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1483.05,"rank_mutated":3.03,"affinity_wild_type":917.88,"rank_wild_type":4.47},{"position":7,"mutated_peptide":"AAAAAAARAAAAAAA","wild_type_peptide":"AAAAAAAMAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1461.69,"rank_mutated":2.32,"affinity_wild_type":915.62,"rank_wild_type":5.39},{"position":8,"mutated_peptide":"AAAAAARAAAAAAAA","wild_type_peptide":"AAAAAAMAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1508.36,"rank_mutated":3.04,"affinity_wild_type":950.08,"rank_wild_type":4.72},{"position":9,"mutated_peptide":"AAAAARAAAAAAAAA","wild_type_peptide":"AAAAAMAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1466.44,"rank_mutated":2.71,"affinity_wild_type":907.21,"rank_wild_type":3.1},{"position":10,"mutated_peptide":"AAAARAAAAAAAAAA","wild_type_peptide":"AAAAMAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1395.27,"rank_mutated":2.26,"affinity_wild_type":897.36,"rank_wild_type":1.96},{"position":11,"mutated_peptide":"AAARAAAAAAAAAAA","wild_type_peptide":"AAAMAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1464.32,"rank_mutated":2.77,"affinity_wild_type":995.3,"rank_wild_type":1.78},{"position":1,"mutated_peptide":"AAAAAAAAAAAAARA","wild_type_peptide":"AAAAAAAAAAAAAMA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":5840.63,"rank_mutated":2.39,"affinity_wild_type":5798.69,"rank_wild_type":6.28},{"position":2,"mutated_peptide":"AAAAAAAAAAAARAA","wild_type_peptide":"AAAAAAAAAAAAMAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":5167.9,"rank_mutated":1.5,"affinity_wild_type":4819.91,"rank_wild_type":4.45},{"position":3,"mutated_peptide":"AAAAAAAAAAARAAA","wild_type_peptide":"AAAAAAAAAAAMAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":4422.07,"rank_mutated":1.26,"affinity_wild_type":3998.71,"rank_wild_type":3.26},{"position":4,"mutated_peptide":"AAAAAAAAAARAAAA","wild_type_peptide":"AAAAAAAAAAMAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":4015.1,"rank_mutated":1.59,"affinity_wild_type":3412.01,"rank_wild_type":3.32},{"position":5,"mutated_peptide":"AAAAAAAAARAAAAA","wild_type_peptide":"AAAAAAAAAMAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":4553.8,"rank_mutated":2.74,"affinity_wild_type":3508.29,"rank_wild_type":4.28},{"position":6,"mutated_peptide":"AAAAAAAARAAAAAA","wild_type_peptide":"AAAAAAAAMAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":4749.3,"rank_mutated":1.95,"affinity_wild_type":3495.48,"rank_wild_type":3.68},{"position":7,"mutated_peptide":"AAAAAAARAAAAAAA","wild_type_peptide":"AAAAAAAMAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":3586.24,"rank_mutated":0.44,"affinity_wild_type":3424.69,"rank_wild_type":2.93},{"position":8,"mutated_peptide":"AAAAAARAAAAAAAA","wild_type_peptide":"AAAAAAMAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":668.32,"affinity_wild_type":3016.89,"rank_wild_type":3.7},{"position":9,"mutated_peptide":"AAAAARAAAAAAAAA","wild_type_peptide":"AAAAAMAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":394.13,"affinity_wild_type":2471.8,"rank_wild_type":2.33},{"position":10,"mutated_peptide":"AAAARAAAAAAAAAA","wild_type_peptide":"AAAAMAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":296.13,"affinity_wild_type":2096.25,"rank_wild_type":1.31},{"position":11,"mutated_peptide":"AAARAAAAAAAAAAA","wild_type_peptide":"AAAMAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DPA1*01:03-DPB1*06:01","alpha_chain":{"full_name":"HLA-DPA1*01:03","name":"HLA-DPA1*01:03","gene":"DPA1","group":"01","protein":"03"},"beta_chain":{"full_name":"HLA-DPB1*06:01","name":"HLA-DPB1*06:01","gene":"DPB1","group":"06","protein":"01"}},"affinity_mutated":264.65,"affinity_wild_type":1835.64,"rank_wild_type":1.03},{"position":1,"mutated_peptide":"AAAAAAAAAAAAARA","wild_type_peptide":"AAAAAAAAAAAAAMA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":34.83,"rank_mutated":0.03,"affinity_wild_type":33.35,"rank_wild_type":0.09},{"position":2,"mutated_peptide":"AAAAAAAAAAAARAA","wild_type_peptide":"AAAAAAAAAAAAMAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":30.04,"rank_mutated":0.03,"affinity_wild_type":26.8,"rank_wild_type":0.09},{"position":3,"mutated_peptide":"AAAAAAAAAAARAAA","wild_type_peptide":"AAAAAAAAAAAMAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":28.35,"rank_mutated":0.03,"affinity_wild_type":19.73,"rank_wild_type":0.15},{"position":4,"mutated_peptide":"AAAAAAAAAARAAAA","wild_type_peptide":"AAAAAAAAAAMAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":23.91,"rank_mutated":0.01,"affinity_wild_type":15.95,"rank_wild_type":0.06},{"position":5,"mutated_peptide":"AAAAAAAAARAAAAA","wild_type_peptide":"AAAAAAAAAMAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":21.67,"rank_mutated":0.01,"affinity_wild_type":14.36,"rank_wild_type":0.05},{"position":6,"mutated_peptide":"AAAAAAAARAAAAAA","wild_type_peptide":"AAAAAAAAMAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":22.36,"rank_mutated":0.02,"affinity_wild_type":14.69,"rank_wild_type":0.05},{"position":7,"mutated_peptide":"AAAAAAARAAAAAAA","wild_type_peptide":"AAAAAAAMAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":22.74,"rank_mutated":0.03,"affinity_wild_type":16.47,"rank_wild_type":0.07},{"position":8,"mutated_peptide":"AAAAAARAAAAAAAA","wild_type_peptide":"AAAAAAMAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":27.93,"rank_mutated":0.06,"affinity_wild_type":17.78,"rank_wild_type":0.27},{"position":9,"mutated_peptide":"AAAAARAAAAAAAAA","wild_type_peptide":"AAAAAMAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":30.43,"rank_mutated":0.07,"affinity_wild_type":18.01,"rank_wild_type":0.13},{"position":10,"mutated_peptide":"AAAARAAAAAAAAAA","wild_type_peptide":"AAAAMAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":32.5,"rank_mutated":0.05,"affinity_wild_type":17.62,"rank_wild_type":0.06},{"position":11,"mutated_peptide":"AAARAAAAAAAAAAA","wild_type_peptide":"AAAMAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":34.6,"rank_mutated":0.05,"affinity_wild_type":17.16,"rank_wild_type":0.04},{"position":1,"mutated_peptide":"AAAAAAAAAAAAARA","wild_type_peptide":"AAAAAAAAAAAAAMA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":851.97,"rank_mutated":0.14,"affinity_wild_type":1053.73,"rank_wild_type":0.64},{"position":2,"mutated_peptide":"AAAAAAAAAAAARAA","wild_type_peptide":"AAAAAAAAAAAAMAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":677.62,"rank_mutated":0.13,"affinity_wild_type":898.47,"rank_wild_type":0.58},{"position":3,"mutated_peptide":"AAAAAAAAAAARAAA","wild_type_peptide":"AAAAAAAAAAAMAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":622.85,"rank_mutated":0.14,"affinity_wild_type":752.33,"rank_wild_type":0.82},{"position":4,"mutated_peptide":"AAAAAAAAAARAAAA","wild_type_peptide":"AAAAAAAAAAMAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":516.75,"rank_mutated":0.08,"affinity_wild_type":691.14,"rank_wild_type":0.46},{"position":5,"mutated_peptide":"AAAAAAAAARAAAAA","wild_type_peptide":"AAAAAAAAAMAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":482.08,"rank_mutated":0.07,"affinity_wild_type":689.4,"rank_wild_type":0.35},{"position":6,"mutated_peptide":"AAAAAAAARAAAAAA","wild_type_peptide":"AAAAAAAAMAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":507.17,"rank_mutated":0.08,"affinity_wild_type":721.2,"rank_wild_type":0.45},{"position":7,"mutated_peptide":"AAAAAAARAAAAAAA","wild_type_peptide":"AAAAAAAMAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":497.47,"rank_mutated":0.07,"affinity_wild_type":710.44,"rank_wild_type":0.48},{"position":8,"mutated_peptide":"AAAAAARAAAAAAAA","wild_type_peptide":"AAAAAAMAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":528.6,"rank_mutated":0.13,"affinity_wild_type":628.26,"rank_wild_type":0.83},{"position":9,"mutated_peptide":"AAAAARAAAAAAAAA","wild_type_peptide":"AAAAAMAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":540.55,"rank_mutated":0.06,"affinity_wild_type":555.02,"rank_wild_type":0.19},{"position":10,"mutated_peptide":"AAAARAAAAAAAAAA","wild_type_peptide":"AAAAMAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":543.22,"rank_mutated":0.05,"affinity_wild_type":551.31,"rank_wild_type":0.05},{"position":11,"mutated_peptide":"AAARAAAAAAAAAAA","wild_type_peptide":"AAAMAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*03:01-DQB1*06:02","alpha_chain":{"full_name":"HLA-DQA1*03:01","name":"HLA-DQA1*03:01","gene":"DQA1","group":"03","protein":"01"},"beta_chain":{"full_name":"HLA-DQB1*06:02","name":"HLA-DQB1*06:02","gene":"DQB1","group":"06","protein":"02"}},"affinity_mutated":631.3,"rank_mutated":0.11,"affinity_wild_type":527.33,"rank_wild_type":0.03},{"position":1,"mutated_peptide":"AAAAAAAAAAAAARA","wild_type_peptide":"AAAAAAAAAAAAAMA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":1037.44,"rank_mutated":3.94,"affinity_wild_type":860.3,"rank_wild_type":5.49},{"position":2,"mutated_peptide":"AAAAAAAAAAAARAA","wild_type_peptide":"AAAAAAAAAAAAMAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":923.33,"rank_mutated":3.37,"affinity_wild_type":703.91,"rank_wild_type":4.81},{"position":3,"mutated_peptide":"AAAAAAAAAAARAAA","wild_type_peptide":"AAAAAAAAAAAMAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":933.47,"rank_mutated":3.57,"affinity_wild_type":621.56,"rank_wild_type":4.73},{"position":4,"mutated_peptide":"AAAAAAAAAARAAAA","wild_type_peptide":"AAAAAAAAAAMAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":784.97,"rank_mutated":2.47,"affinity_wild_type":591.88,"rank_wild_type":5.99},{"position":5,"mutated_peptide":"AAAAAAAAARAAAAA","wild_type_peptide":"AAAAAAAAAMAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":791.86,"rank_mutated":3.32,"affinity_wild_type":626.93,"rank_wild_type":9.48},{"position":6,"mutated_peptide":"AAAAAAAARAAAAAA","wild_type_peptide":"AAAAAAAAMAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":826.67,"rank_mutated":3.38,"affinity_wild_type":674.0,"rank_wild_type":8.54},{"position":7,"mutated_peptide":"AAAAAAARAAAAAAA","wild_type_peptide":"AAAAAAAMAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":793.58,"rank_mutated":2.06,"affinity_wild_type":661.79,"rank_wild_type":5.5},{"position":8,"mutated_peptide":"AAAAAARAAAAAAAA","wild_type_peptide":"AAAAAAMAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":759.9,"rank_mutated":2.5,"affinity_wild_type":592.84,"rank_wild_type":6.39},{"position":9,"mutated_peptide":"AAAAARAAAAAAAAA","wild_type_peptide":"AAAAAMAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":738.78,"rank_mutated":1.21,"affinity_wild_type":533.46,"rank_wild_type":2.07},{"position":10,"mutated_peptide":"AAAARAAAAAAAAAA","wild_type_peptide":"AAAAMAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":713.14,"rank_mutated":0.69,"affinity_wild_type":491.92,"rank_wild_type":0.84},{"position":11,"mutated_peptide":"AAARAAAAAAAAAAA","wild_type_peptide":"AAAMAAAAAAAAAAA","isoform_mhc_i_i":{"name":"HLA-DQA1*01:02-DQB1*03:02","alpha_chain":{"full_name":"HLA-DQA1*01:02","name":"HLA-DQA1*01:02","gene":"DQA1","group":"01","protein":"02"},"beta_chain":{"full_name":"HLA-DQB1*03:02","name":"HLA-DQB1*03:02","gene":"DQB1","group":"03","protein":"02"}},"affinity_mutated":774.9,"rank_mutated":1.37,"affinity_wild_type":494.5,"rank_wild_type":0.56}]}] \ No newline at end of file diff --git a/docs/source/_static/test_neoantigen_candidates_annotated.tsv b/docs/source/_static/test_neoantigen_candidates_annotated.tsv index d518c559..bd846a42 100644 --- a/docs/source/_static/test_neoantigen_candidates_annotated.tsv +++ b/docs/source/_static/test_neoantigen_candidates_annotated.tsv @@ -1,3 +1,3 @@ -dnaVariantAlleleFrequency gene imputedGeneExpression mutation.mutatedXmer mutation.position mutation.wildTypeXmer patientIdentifier rnaExpression rnaVariantAlleleFrequency ADN_MHCI ADN_MHCII Amplitude_MHCII_rank Amplitude_MHCI_affinity Amplitude_MHCI_affinity_9mer Best_affinity_MHCII_allele Best_affinity_MHCII_allele_WT Best_affinity_MHCII_epitope Best_affinity_MHCII_epitope_WT Best_affinity_MHCII_score Best_affinity_MHCII_score_WT Best_affinity_MHCI_9mer_allele Best_affinity_MHCI_9mer_allele_WT Best_affinity_MHCI_9mer_anchor_mutated Best_affinity_MHCI_9mer_epitope Best_affinity_MHCI_9mer_epitope_WT Best_affinity_MHCI_9mer_position_mutation Best_affinity_MHCI_9mer_score Best_affinity_MHCI_9mer_score_WT Best_affinity_MHCI_allele Best_affinity_MHCI_allele_WT Best_affinity_MHCI_epitope Best_affinity_MHCI_epitope_WT Best_affinity_MHCI_score Best_affinity_MHCI_score_WT Best_rank_MHCII_score Best_rank_MHCII_score_WT Best_rank_MHCII_score_allele Best_rank_MHCII_score_allele_WT Best_rank_MHCII_score_epitope Best_rank_MHCII_score_epitope_WT Best_rank_MHCI_9mer_allele Best_rank_MHCI_9mer_allele_WT Best_rank_MHCI_9mer_epitope Best_rank_MHCI_9mer_epitope_WT Best_rank_MHCI_9mer_score Best_rank_MHCI_9mer_score_WT Best_rank_MHCI_score Best_rank_MHCI_score_WT Best_rank_MHCI_score_allele Best_rank_MHCI_score_allele_WT Best_rank_MHCI_score_epitope Best_rank_MHCI_score_epitope_WT CDN_MHCI CDN_MHCII DAI_MHCI_affinity_cutoff500nM Dissimilarity_MHCI_cutoff500nM Expression_mutated_transcript Generator_rate IEDB_Immunogenicity_MHCI_cutoff500nM Improved_Binder_MHCI MixMHC2pred_best_allele MixMHC2pred_best_peptide MixMHC2pred_best_rank MixMHCpred_best_allele MixMHCpred_best_peptide MixMHCpred_best_rank MixMHCpred_best_score Neoag_immunogenicity Number_of_mismatches_MCHI PHBR-I PHBR-II Pathogensimiliarity_MHCI_affinity_9mer Priority_score Recognition_Potential_MHCI_affinity_9mer Selfsimilarity_MHCI_conserved_binder Tcell_predictor_score_cutoff500nM mutation_not_found_in_proteome transcript_expression vaxrank_binding_score vaxrank_total_score -0.857 BRCA2 0.5365996317 AAAAAAAAAAAAALAAAAAAAAAAAAA 14 AAAAAAAAAAAAAFAAAAAAAAAAAAA Ptx 0.5365996317 0.294 0 0 1 2.5475 2.5475 HLA-DQA10102-DQB10602 HLA-DQA10102-DQB10602 AAAAAAAAALAAAAA AAAAAAAAAFAAAAA 29.39 27.2 HLA-C*03:04 HLA-C*03:04 1 AAAAAAAAL AAAAAAAAF 9 5.2 13.3 HLA-C*03:04 HLA-C*03:04 AAAAAAAAL AAAAAAAAF 5.2 13.3 0.01 0.01 HLA-DQA10301-DQB10602 HLA-DQA10301-DQB10602 AAAAAAAAAAALAAA AAAAAAAAAAAFAAA HLA-C*03:04 HLA-C*03:04 AAAAAAAAL AAAAAAAAF 0.019 0.0495 0.019 0.0495 HLA-C*03:04 HLA-C*03:04 AAAAAAAAL AAAAAAAAF 1 1 8.1 0.99994 0.45987 1 0.18288 1 DPA1_01_03__DPB1_06_01 AAAALAAAAAAAAAAA 0.296 B0702 AAAAAAAAL 0.03 0.622 13.16998 1 0.10424 0.05128 0 0.21014 0 NA 0.40831263969265064 1 0.5195068939999999 6.9366 3.1899 -0.556 BRCA2 0.5365996317 AAAAAAAAAAAAARAAAAAAAAAAAAA 14 AAAAAAAAAAAAAMAAAAAAAAAAAAA Ptx 0.5365996317 0.17300000000000001 0 0 2 4.3574 4.3574 HLA-DQA10102-DQB10602 HLA-DQA10102-DQB10602 AAAAAAAARAAAAAA AAAAAAAAMAAAAAA 29.75 24.29 HLA-B*07:02 HLA-B*07:02 0 AARAAAAAA AAMAAAAAA 3 96.8 482.9 HLA-B*07:02 HLA-B*07:02 AARAAAAAA AAMAAAAAA 96.8 482.9 0.01 0.02 HLA-DQA10301-DQB10602 HLA-DQA10301-DQB10602 AAAAAAAAAAAARAA AAAAAAAAAAAAMAA HLA-B*07:02 HLA-B*07:02 AARAAAAAA AAMAAAAAA 0.2895 0.8137 0.2895 0.8137 HLA-B*07:02 HLA-B*07:02 AARAAAAAA AAMAAAAAA 0 1 386.1 1 0.29835 0 0.18698 1 DPA1_01_03__DPB1_06_01 AAAARAAAAAAAAAA 0.0176 B0702 AARAAAAAA 0.06 0.54766 13.16998 1 1.0223 0.05913 0 0.13667 0 NA 0.4769777371767941 1 0.715756594 3.7801 1.1278 +patientIdentifier gene mutatedXmer wildTypeXmer position dnaVariantAlleleFrequency rnaVariantAlleleFrequency rnaExpression imputedGeneExpression ADN_MHCI ADN_MHCII Amplitude_MHCII_rank Amplitude_MHCI_affinity Amplitude_MHCI_affinity_9mer Best_affinity_MHCII_allele Best_affinity_MHCII_epitope Best_affinity_MHCII_epitope_WT Best_affinity_MHCII_score Best_affinity_MHCII_score_WT Best_affinity_MHCI_9mer_allele Best_affinity_MHCI_9mer_anchor_mutated Best_affinity_MHCI_9mer_epitope Best_affinity_MHCI_9mer_epitope_WT Best_affinity_MHCI_9mer_position_mutation Best_affinity_MHCI_9mer_score Best_affinity_MHCI_9mer_score_WT Best_affinity_MHCI_allele Best_affinity_MHCI_epitope Best_affinity_MHCI_epitope_WT Best_affinity_MHCI_score Best_affinity_MHCI_score_WT Best_rank_MHCII_score Best_rank_MHCII_score_WT Best_rank_MHCII_score_allele Best_rank_MHCII_score_epitope Best_rank_MHCII_score_epitope_WT Best_rank_MHCI_9mer_allele Best_rank_MHCI_9mer_epitope Best_rank_MHCI_9mer_epitope_WT Best_rank_MHCI_9mer_score Best_rank_MHCI_9mer_score_WT Best_rank_MHCI_score Best_rank_MHCI_score_WT Best_rank_MHCI_score_allele Best_rank_MHCI_score_epitope Best_rank_MHCI_score_epitope_WT CDN_MHCI CDN_MHCII DAI_MHCI_affinity Dissimilarity_MHCI Dissimilarity_MHCII Expression_mutated_transcript Generator_rate_ADN_MHCI Generator_rate_ADN_MHCII Generator_rate_CDN_MHCI Generator_rate_CDN_MHCII Generator_rate_MHCI Generator_rate_MHCII Hex_alignment_score_MHCI Hex_alignment_score_MHCII IEDB_Immunogenicity_MHCI IEDB_Immunogenicity_MHCII Improved_Binder_MHCI MixMHC2pred_best_allele MixMHC2pred_best_peptide MixMHC2pred_best_rank MixMHCpred_best_allele MixMHCpred_best_peptide MixMHCpred_best_rank MixMHCpred_best_score Neoag_immunogenicity Number_of_mismatches_MCHI PHBR_I PHBR_II PRIME_best_allele PRIME_best_peptide PRIME_best_rank PRIME_best_score Pathogensimiliarity_MHCII Pathogensimiliarity_MHCI_9mer Priority_score Recognition_Potential_MHCI_9mer Selfsimilarity_MHCI Selfsimilarity_MHCII Selfsimilarity_MHCI_conserved_binder Tcell_predictor_score mutation_not_found_in_proteome vaxrank_binding_score vaxrank_total_score +Ptx BRCA2 AAAAAAAAAAAAALAAAAAAAAAAAAA AAAAAAAAAAAAAFAAAAAAAAAAAAA 14 0.857 NA 0.5195068939999999 0.5365996317 0 1 9 2.3262 0.56811 HLA-DQA1*01:02-DQB1*06:02 AAAAAAAALAAAAAA AAAAAAAAFAAAAAA 15.65 22.47 HLA-C*03:04 0 AAALAAAAA AAAFAAAAA 4 1534.8 1180.8 HLA-A*02:01 AALAAAAAAA AAFAAAAAAA 1018.2 8184.1 0.03 0.27 HLA-DQA1*01:02-DQB1*06:02 AAAAAAAAALAAAAA AAAAAAAAAFAAAAA HLA-B*07:02 AAALAAAAA AAAFAAAAA 4.389 5.068 4.389 5.068 HLA-B*07:02 AAALAAAAA AAAFAAAAA 0 1 7165.9 1 3e-05 0.44522 0 4 0 21 0 25 177 389 0.18182 0.36258 0 HLA-DPA1*01:03-DPB1*06:01 AAAALAAAAAAAAAAA 0.296 HLA-B*07:02 AAALAAAAA 0.3 0.3862 13.16998 1 8.5743 NA HLA-B*07:02 AAALAAAAA 0.04 0.18933 0 0 0 0 0.9763465205057597 0.981263768075705 0.9763465205057597 0.39121828442992074 1 0.01301 0.00579 +Ptx BRCA2 AAAAAAAAAAAAARAAAAAAAAAAAAA AAAAAAAAAAAAAMAAAAAAAAAAAAA 14 0.556 NA 0.715756594 0.5365996317 0 NA NA 5.6409 5.6409 HLA-DQA1*01:02-DQB1*06:02 AAAAAAAAARAAAAA AAAAAAAAAMAAAAA 21.67 14.36 HLA-B*07:02 0 AAAAARAAA AAAAAMAAA 6 199.38 1697.4 HLA-B*07:02 AAAAARAAA AAAAAMAAA 199.38 1697.4 0 1.03 HLA-DPA1*01:03-DPB1*06:01 AAARAAAAAAAAAAA AAAMAAAAAAAAAAA HLA-B*07:02 AAAAARAAA AAAAAMAAA 1.346 4.347 1.346 4.347 HLA-B*07:02 AAAAARAAA AAAAAMAAA 0 1 1498 1 0.00382 0.39796 0 17 0 28 0 45 132 289 0.19477 0.42378 1 HLA-DPA1*01:03-DPB1*06:01 AAAARAAAAAAAAAA 0.0176 HLA-B*07:02 AAAAAARAA 0.08 0.51814 13.16998 1 4.6189 NA HLA-B*07:02 AAAAARAAA 0.06 0.18732 0 0 0.32903 0 0.966307602001344 0.9748046960972773 NA 0.5173265191848659 1 0.80336 0.31971 diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 00000000..16bc7a1b --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +neofox +====== + +.. toctree:: + :maxdepth: 4 + + neofox diff --git a/docs/source/neofox.expression_imputation.rst b/docs/source/neofox.expression_imputation.rst new file mode 100644 index 00000000..677e8852 --- /dev/null +++ b/docs/source/neofox.expression_imputation.rst @@ -0,0 +1,21 @@ +neofox.expression\_imputation package +===================================== + +Submodules +---------- + +neofox.expression\_imputation.expression\_imputation module +----------------------------------------------------------- + +.. automodule:: neofox.expression_imputation.expression_imputation + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: neofox.expression_imputation + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/neofox.model.rst b/docs/source/neofox.model.rst new file mode 100644 index 00000000..1a5a496b --- /dev/null +++ b/docs/source/neofox.model.rst @@ -0,0 +1,53 @@ +neofox.model package +==================== + +Submodules +---------- + +neofox.model.conversion module +------------------------------ + +.. automodule:: neofox.model.conversion + :members: + :undoc-members: + :show-inheritance: + +neofox.model.factories module +----------------------------- + +.. automodule:: neofox.model.factories + :members: + :undoc-members: + :show-inheritance: + +neofox.model.mhc\_parser module +------------------------------- + +.. automodule:: neofox.model.mhc_parser + :members: + :undoc-members: + :show-inheritance: + +neofox.model.neoantigen module +------------------------------ + +.. automodule:: neofox.model.neoantigen + :members: + :undoc-members: + :show-inheritance: + +neofox.model.validation module +------------------------------ + +.. automodule:: neofox.model.validation + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: neofox.model + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/neofox.rst b/docs/source/neofox.rst new file mode 100644 index 00000000..e555eb0e --- /dev/null +++ b/docs/source/neofox.rst @@ -0,0 +1,46 @@ +neofox package +============== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + neofox.expression_imputation + neofox.model + +Submodules +---------- + +neofox.annotator module +----------------------- + +.. automodule:: neofox.annotator + :members: + :undoc-members: + :show-inheritance: + +neofox.exceptions module +------------------------ + +.. automodule:: neofox.exceptions + :members: + :undoc-members: + :show-inheritance: + +neofox.neofox module +-------------------- + +.. automodule:: neofox.neofox + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: neofox + :members: + :undoc-members: + :show-inheritance: diff --git a/neofox/MHC_predictors/MixMHCpred/mixmhc2pred.py b/neofox/MHC_predictors/MixMHCpred/mixmhc2pred.py index 98236dae..8f17985a 100755 --- a/neofox/MHC_predictors/MixMHCpred/mixmhc2pred.py +++ b/neofox/MHC_predictors/MixMHCpred/mixmhc2pred.py @@ -18,7 +18,6 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see .# from typing import List -from neofox.exceptions import NeofoxCommandException from pandas.errors import EmptyDataError from neofox.helpers.epitope_helper import EpitopeHelper @@ -28,7 +27,8 @@ from neofox.helpers.runner import Runner -from neofox.model.neoantigen import Annotation, Mhc2, Mhc2GeneName, MhcAllele, Mutation +from neofox.model.neoantigen import Annotation, Mhc2, Mhc2GeneName, MhcAllele, PredictedEpitope, Mhc2Isoform, \ + Neoantigen from neofox.model.factories import AnnotationFactory from neofox.helpers import intermediate_files import pandas as pd @@ -40,13 +40,19 @@ RANK = "%Rank_best" -class MixMhc2Pred: +class MixMHC2pred: + + ANNOTATION_PREFIX = 'MixMHC2pred' + ANNOTATION_PREFIX_WT = 'MixMHC2pred_WT' + def __init__(self, runner: Runner, configuration: DependenciesConfiguration, mhc_parser: MhcParser): self.runner = runner self.configuration = configuration self.available_alleles = self._load_available_alleles() self.mhc_parser = mhc_parser + self.results = None + def _load_available_alleles(self): """ loads file with available HLA II alllels for MixMHC2pred prediction, returns set @@ -58,22 +64,22 @@ def _load_available_alleles(self): return list(alleles["AlleleName"]) @staticmethod - def _combine_dq_dp_alleles(list_alleles: List[str]): + def _combine_dq_dp_alleles(alpha_alleles: List[str], beta_alleles: List[str]): """returns patient HLA-DQ/HLA-DP allele combination that are relevant for MixMHC2pred""" - # TODO: we need to clarify the formation of pairs here AA, BB, AB - # TODO: what are these triplets? + # NOTE: there are some pairs of alleles which positive/negative binding could not be deconvoluted + # hence the triplets. In MixMHC2pred the triplets are only of the form of two alpha chains and one beta chain. + # NOTE2: this may be gone after upgrading to MixMHC2pred alleles_pairs = [ "__".join([allele_1, allele_2]) - for allele_1 in list_alleles - for allele_2 in list_alleles - if allele_1 != allele_2 + for allele_1 in alpha_alleles + for allele_2 in beta_alleles ] alleles_triplets = [ "__".join([allele_1, allele_2, allele_3]) - for allele_1 in list_alleles - for allele_2 in list_alleles - for allele_3 in list_alleles - if allele_1 != allele_2 and allele_1 != allele_3 and allele_2 != allele_3 + for allele_1 in alpha_alleles + for allele_2 in alpha_alleles + for allele_3 in beta_alleles + if allele_1 != allele_2 ] return alleles_pairs + alleles_triplets @@ -88,6 +94,16 @@ def _get_mixmhc2_allele_representation(hla_alleles: List[MhcAllele]): ) ) + @staticmethod + def _get_mixmhc2_isoform_representation(isoform: Mhc2Isoform): + + beta_chain = MixMHC2pred._get_mixmhc2_allele_representation([isoform.beta_chain])[0] + if isoform.alpha_chain is not None and isoform.alpha_chain.name: + # for DR only beta chain is provided + alpha_chain = MixMHC2pred._get_mixmhc2_allele_representation([isoform.alpha_chain])[0] + return "{alpha}__{beta}".format(alpha=alpha_chain, beta=beta_chain) + return beta_chain + def transform_hla_ii_alleles_for_prediction(self, mhc: List[Mhc2]) -> List[str]: """ prepares list of HLA II alleles for prediction in required format @@ -99,10 +115,12 @@ def transform_hla_ii_alleles_for_prediction(self, mhc: List[Mhc2]) -> List[str]: dqb1_alleles = get_alleles_by_gene(mhc, Mhc2GeneName.DQB1) dp_allele_combinations = self._combine_dq_dp_alleles( - self._get_mixmhc2_allele_representation(dpa1_alleles + dpb1_alleles) + alpha_alleles=self._get_mixmhc2_allele_representation(dpa1_alleles), + beta_alleles=self._get_mixmhc2_allele_representation(dpb1_alleles) ) dq_allele_combinations = self._combine_dq_dp_alleles( - self._get_mixmhc2_allele_representation(dqa1_alleles + dqb1_alleles) + alpha_alleles=self._get_mixmhc2_allele_representation(dqa1_alleles), + beta_alleles=self._get_mixmhc2_allele_representation(dqb1_alleles) ) return [ @@ -113,49 +131,58 @@ def transform_hla_ii_alleles_for_prediction(self, mhc: List[Mhc2]) -> List[str]: if a in self.available_alleles ] - def _mixmhc2prediction( - self, mhc2: List[str], potential_ligand_sequences - ) -> pd.DataFrame: - """ - Performs MixMHC2pred prediction for desired hla allele and writes result to temporary file. - """ - tmpfasta = intermediate_files.create_temp_fasta( - potential_ligand_sequences, prefix="tmp_sequence_" - ) - outtmp = intermediate_files.create_temp_file( - prefix="mixmhc2pred", suffix=".txt" - ) + def _parse_mixmhc2pred_output(self, filename: str) -> List[PredictedEpitope]: + + parsed_results = [] + try: + results = pd.read_csv(filename, sep="\t", comment="#") + except EmptyDataError: + logger.error("Results from MixMHC2pred are empty, something went wrong") + results = pd.DataFrame() + + for _, row in results.iterrows(): + # when MixMHC2pred returns no results it provides a row with the peptide and NAs for other fields + # pandas reads NAs as float nan. Skip these + if isinstance(row[ALLELE], str): + parsed_results.append( + PredictedEpitope( + isoform_mhc_i_i=self.mhc_parser.parse_mhc2_isoform(row[ALLELE]), + mutated_peptide=row[PEPTIDE], + rank_mutated=float(row[RANK]), + affinity_mutated=None + )) + return parsed_results + + def _mixmhc2prediction(self, isoforms: List[str], potential_ligand_sequences: List[str]) -> List[PredictedEpitope]: + + tmpfasta = intermediate_files.create_temp_fasta(potential_ligand_sequences, prefix="tmp_sequence_") + outtmp = intermediate_files.create_temp_file(prefix="mixmhc2pred", suffix=".txt") cmd = [ self.configuration.mix_mhc2_pred, "-a", - " ".join(mhc2), + " ".join(isoforms), "-i", tmpfasta, "-o", outtmp, ] self.runner.run_command(cmd) - try: - results = pd.read_csv(outtmp, sep="\t", comment="#") - except EmptyDataError: - message = "Results from MixMHC2pred are empty, something went wrong" - logger.error(message) - raise NeofoxCommandException(message) + results = self._parse_mixmhc2pred_output(filename=outtmp) os.remove(outtmp) + os.remove(tmpfasta) return results - def run(self, mhc: List[Mhc2], mutation: Mutation, uniprot): + def run(self, mhc: List[Mhc2], neoantigen: Neoantigen, uniprot): """ Runs MixMHC2pred: prediction for peptides of length 13 to 18 based on Suppl Fig. 6 a in Racle, J., et al., Nat. Biotech. (2019). Robust prediction of HLA class II epitopes by deep motif deconvolution of immunopeptidomes. """ - best_peptide = None - best_rank = None - best_allele = None + # TODO: get rid of this + self.results = None + potential_ligand_sequences = EpitopeHelper.generate_nmers( - mutation=mutation, lengths=[13, 14, 15, 16, 17, 18], uniprot=uniprot - ) + neoantigen=neoantigen, lengths=[13, 14, 15, 16, 17, 18], uniprot=uniprot) # filter mps shorter < 13aa filtered_sequences = list( filter(lambda x: len(x) >= 13, potential_ligand_sequences) @@ -163,29 +190,35 @@ def run(self, mhc: List[Mhc2], mutation: Mutation, uniprot): if len(filtered_sequences) > 0: mhc2_alleles = self.transform_hla_ii_alleles_for_prediction(mhc) if len(mhc2_alleles) > 0: - results = self._mixmhc2prediction(mhc2_alleles, filtered_sequences) - # get best result by minimum rank - best_result = results[results[RANK] == results[RANK].min()] - try: - best_peptide = best_result[PEPTIDE].iat[0] - best_rank = best_result[RANK].iat[0] - best_allele = self.mhc_parser.parse_mhc2_isoform(best_result[ALLELE].iat[0]).name - except IndexError: - logger.info("MixMHC2pred returned no best result") + self.results = self._mixmhc2prediction( + isoforms=mhc2_alleles, potential_ligand_sequences=filtered_sequences) else: logger.warning("None of the MHC II alleles are supported by MixMHC2pred") - return best_peptide, best_rank, best_allele - def get_annotations(self, mhc: List[Mhc2], mutation: Mutation, uniprot) -> List[Annotation]: - best_peptide, best_rank, best_allele = self.run(mhc=mhc, mutation=mutation, uniprot=uniprot) + def run_peptide(self, peptide: str, isoform: Mhc2Isoform) -> PredictedEpitope: + """ + Performs MixMHC2pred prediction for desired hla allele and writes result to temporary file. + """ + result = None + isoform_representation = self._get_mixmhc2_isoform_representation(isoform) + if isoform_representation in self.available_alleles: + results = self._mixmhc2prediction( + isoforms=[isoform_representation], + potential_ligand_sequences=[peptide]) + if results: + result = results[0] + return result + + def get_annotations(self) -> List[Annotation]: + best_result = EpitopeHelper.select_best_by_rank(predictions=self.results) return [ AnnotationFactory.build_annotation( - value=best_peptide, name="MixMHC2pred_best_peptide" + value=best_result.mutated_peptide, name="MixMHC2pred_bestRank_peptide" ), AnnotationFactory.build_annotation( - value=best_rank, name="MixMHC2pred_best_rank" + value=best_result.rank_mutated, name="MixMHC2pred_bestRank_rank" ), AnnotationFactory.build_annotation( - value=best_allele, name="MixMHC2pred_best_allele" + value=best_result.isoform_mhc_i_i.name, name="MixMHC2pred_bestRank_allele" ), ] diff --git a/neofox/MHC_predictors/MixMHCpred/mixmhcpred.py b/neofox/MHC_predictors/MixMHCpred/mixmhcpred.py index 36132f9d..82c06603 100755 --- a/neofox/MHC_predictors/MixMHCpred/mixmhcpred.py +++ b/neofox/MHC_predictors/MixMHCpred/mixmhcpred.py @@ -22,7 +22,7 @@ from neofox.helpers.epitope_helper import EpitopeHelper from neofox.helpers.runner import Runner from neofox.model.mhc_parser import MhcParser -from neofox.model.neoantigen import Annotation, Mhc1, MhcAllele, Mutation +from neofox.model.neoantigen import Annotation, Mhc1, MhcAllele, PredictedEpitope, Neoantigen from neofox.model.factories import AnnotationFactory from neofox.helpers import intermediate_files import pandas as pd @@ -38,12 +38,18 @@ class MixMHCpred: + + ANNOTATION_PREFIX = 'MixMHCpred' + ANNOTATION_PREFIX_WT = 'MixMHCpred_WT' + def __init__(self, runner: Runner, configuration: DependenciesConfiguration, mhc_parser: MhcParser): self.runner = runner self.configuration = configuration self.available_alleles = self._load_available_alleles() self.mhc_parser = mhc_parser + self.results = None + def _load_available_alleles(self): """ loads file with available HLA II alllels for MixMHC2pred prediction, returns set @@ -64,9 +70,29 @@ def _get_mixmhc_allele_representation(self, mhc_alleles: List[MhcAllele]): ) ) - def _mixmhcprediction( - self, mhc_alleles: List[str], potential_ligand_sequences - ) -> pd.DataFrame: + def _parse_mixmhcpred_output(self, filename: str) -> List[PredictedEpitope]: + + parsed_results = [] + try: + results = pd.read_csv(filename, sep="\t", comment="#") + except EmptyDataError: + logger.error("Results from MixMHCpred are empty, something went wrong") + results = pd.DataFrame() + + for _, row in results.iterrows(): + # when MixMHCpred returns no results it provides a row with the peptide and NAs for other fields + # pandas reads NAs as float nan. Skip these + if isinstance(row[ALLELE], str): + parsed_results.append( + PredictedEpitope( + allele_mhc_i=self.mhc_parser.parse_mhc_allele(row[ALLELE]), + mutated_peptide=row[PEPTIDE], + affinity_mutated=float(row[SCORE]), + rank_mutated=float(row[RANK]), + )) + return parsed_results + + def _mixmhcprediction(self, mhc_alleles: List[str], potential_ligand_sequences) -> List[PredictedEpitope]: """ Performs MixMHCpred prediction for desired hla allele and writes result to temporary file. """ @@ -86,59 +112,54 @@ def _mixmhcprediction( self.runner.run_command( cmd=command ) - try: - results = pd.read_csv(outtmp, sep="\t", comment="#") - except EmptyDataError: - message = "Results from MixMHCpred are empty, something went wrong [{}]. MHC I alleles {}, ligands {}".format( - " ".join(command), ",".join(mhc_alleles), potential_ligand_sequences - ) - logger.error(message) - results = pd.DataFrame() + results = self._parse_mixmhcpred_output(filename=outtmp) os.remove(outtmp) + os.remove(tmpfasta) return results - def run(self, mutation: Mutation, mhc: List[Mhc1], uniprot): + def run(self, neoantigen: Neoantigen, mhc: List[Mhc1], uniprot): """Wrapper for MHC binding prediction, extraction of best epitope and check if mutation is directed to TCR""" - best_peptide = None - best_rank = None - best_allele = None - best_score = None + + # TODO: get rid of this + self.results = None + + # TODO: we may want to extend this to 8 to 14 bp (coordinate this with netMHCpan) potential_ligand_sequences = EpitopeHelper.generate_nmers( - mutation=mutation, lengths=[8, 9, 10, 11], uniprot=uniprot + neoantigen=neoantigen, lengths=[8, 9, 10, 11], uniprot=uniprot ) if len(potential_ligand_sequences) > 0: mhc1_alleles = self._get_mixmhc_allele_representation([a for m in mhc for a in m.alleles]) if len(mhc1_alleles) > 0: - results = self._mixmhcprediction(mhc1_alleles, potential_ligand_sequences) - try: - # get best result by maximum score - best_result = results[results[SCORE] == results[SCORE].max()] - best_peptide = best_result[PEPTIDE].iat[0] - best_rank = best_result[RANK].iat[0] - # normalize the HLA allele name - best_allele = self.mhc_parser.parse_mhc_allele(best_result[ALLELE].iat[0]).name - best_score = best_result[SCORE].iat[0] - except (IndexError, KeyError): - logger.info("MixMHCpred returned no best result") + self.results = self._mixmhcprediction(mhc1_alleles, potential_ligand_sequences) else: logger.warning("None of the MHC I alleles are supported by MixMHCpred") - return best_peptide, best_rank, best_allele, best_score - def get_annotations(self, mutation: Mutation, mhc: List[Mhc1], uniprot) -> List[Annotation]: - best_peptide, best_rank, best_allele, best_score = self.run( - mhc=mhc, mutation=mutation, uniprot=uniprot - ) + def run_peptide(self, peptide: str, allele: MhcAllele) -> PredictedEpitope: + """Runs MixMHCpred on a single peptide""" + result = None + mhc1_alleles = self._get_mixmhc_allele_representation([allele]) + if len(mhc1_alleles) > 0 and 8 <= len(peptide) <= 14: + results = self._mixmhcprediction(mhc1_alleles, [peptide]) + if results: + result = results[0] + else: + logger.warning("None of the MHC I alleles are supported by MixMHCpred") + return result + + def get_annotations(self) -> List[Annotation]: + + best_result = EpitopeHelper.select_best_by_affinity(predictions=self.results, maximum=True) return [ AnnotationFactory.build_annotation( - value=best_peptide, name="MixMHCpred_best_peptide" + value=best_result.mutated_peptide, name="MixMHCpred_bestScore_peptide" ), AnnotationFactory.build_annotation( - value=best_score, name="MixMHCpred_best_score" + value=best_result.affinity_mutated, name="MixMHCpred_bestScore_score" ), AnnotationFactory.build_annotation( - value=best_rank, name="MixMHCpred_best_rank" + value=best_result.rank_mutated, name="MixMHCpred_bestScore_rank" ), AnnotationFactory.build_annotation( - value=best_allele, name="MixMHCpred_best_allele" + value=best_result.allele_mhc_i.name, name="MixMHCpred_bestScore_allele" ), ] diff --git a/neofox/MHC_predictors/netmhcpan/abstract_netmhcpan_predictor.py b/neofox/MHC_predictors/netmhcpan/abstract_netmhcpan_predictor.py deleted file mode 100755 index 715d1fe2..00000000 --- a/neofox/MHC_predictors/netmhcpan/abstract_netmhcpan_predictor.py +++ /dev/null @@ -1,144 +0,0 @@ -# -# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH. -# -# This file is part of Neofox -# (see https://github.com/tron-bioinformatics/neofox). -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see .# -from typing import List, Union -from dataclasses import dataclass -from neofox.model.neoantigen import Mhc2Isoform, MhcAllele -from neofox.helpers.epitope_helper import EpitopeHelper -from neofox.helpers.runner import Runner -from neofox.helpers.blastp_runner import BlastpRunner -from neofox.references.references import DependenciesConfiguration -from neofox.model.mhc_parser import MhcParser - - -@dataclass -class PredictedEpitope: - """this is a common data class for both netmhcpan and netmhc2pan""" - pos: int - hla: Union[ - MhcAllele, Mhc2Isoform - ] # for MHC I a str is enough, but for MCH II we need a complex object - peptide: str - affinity_score: float - rank: float - - -class AbstractNetMhcPanPredictor: - def __init__(self, runner: Runner, configuration: DependenciesConfiguration, - blastp_runner: BlastpRunner, mhc_parser: MhcParser): - self.runner = runner - self.configuration = configuration - self.mhc_parser = mhc_parser - self.blastp_runner = blastp_runner - - @staticmethod - def select_best_by_rank(predictions: List[PredictedEpitope], none_value=None) -> PredictedEpitope: - """reports best predicted epitope (over all alleles). indicate by rank = true if rank score should be used. - if rank = False, Aff(nM) is used - In case of a tie, it chooses the first peptide in alphabetical order - """ - return min(predictions, key=lambda p: (p.rank, p.peptide)) \ - if predictions is not None and len(predictions) > 0 else none_value - - @staticmethod - def select_best_by_affinity(predictions: List[PredictedEpitope], none_value=None) -> PredictedEpitope: - """reports best predicted epitope (over all alleles). indicate by rank = true if rank score should be used. - if rank = False, Aff(nM) is used - In case of a tie, it chooses the first peptide in alphabetical order - """ - return min(predictions, key=lambda p: (p.affinity_score, p.peptide)) \ - if predictions is not None and len(predictions) > 0 else none_value - - @staticmethod - def filter_wt_predictions_from_best_mutated( - predictions: List[PredictedEpitope], mutated_prediction: PredictedEpitope - ) -> List[PredictedEpitope]: - """returns wt epitope info for given mutated sequence. best wt is restricted to the allele of best neoepitope""" - return list( - filter( - lambda p: mutated_prediction.peptide is not None and - len(p.peptide) == len(mutated_prediction.peptide) and - p.pos == mutated_prediction.pos and - p.hla.name == mutated_prediction.hla.name, - predictions, - ) - ) - - def find_wt_epitope_for_alternative_mutated_epitope( - self, - mutated_predictions: List[PredictedEpitope] - ) -> List: - """returns wt epitope for each neoepitope candidate of a neoantigen candidate from an alternative mutation - class by a BLAST search.""" - mut_peptides = set([mp.peptide for mp in mutated_predictions]) - most_similar_wt_epitopes = { - mutated_peptide: self.blastp_runner.get_most_similar_wt_epitope(mutated_peptide) - for mutated_peptide in mut_peptides - } - wt_peptides_full = [] - for mp in mutated_predictions: - wt_peptides_full.append(most_similar_wt_epitopes.get(mp.peptide)) - return wt_peptides_full - - def filter_wt_predictions_from_best_mutated_alernative( - self, mut_predictions: List[PredictedEpitope], wt_predictions: List[PredictedEpitope], - best_mutated_epitope: PredictedEpitope - ) -> PredictedEpitope: - """returns wt epitope info for given mutated sequence. best wt is restricted to the allele of best neoepitope""" - best_wt = None - for mut, wt in zip(mut_predictions, wt_predictions): - if wt.hla.name == best_mutated_epitope.hla.name and mut.peptide == best_mutated_epitope.peptide: - best_wt = wt - break - return best_wt - - @staticmethod - def remove_peptides_in_proteome(predictions: List[PredictedEpitope], uniprot - ) -> List[PredictedEpitope]: - """filters prediction file for predicted epitopes that cover mutations by searching for epitope - in uniprot proteome database with an exact match search""" - return list( - filter( - lambda p: uniprot.is_sequence_not_in_uniprot( - p.peptide - ), - predictions, - ) - ) - - def filter_for_9mers( - self, predictions: List[PredictedEpitope] - ) -> List[PredictedEpitope]: - """returns only predicted 9mers""" - return list(filter(lambda p: len(p.peptide) == 9, predictions)) - - @staticmethod - def filter_peptides_covering_snv( - position_of_mutation, predictions: List[PredictedEpitope] - ) -> List[PredictedEpitope]: - """filters prediction file for predicted epitopes that cover mutations""" - return list( - filter( - lambda p: EpitopeHelper.epitope_covers_mutation( - position_of_mutation, p.pos, len(p.peptide) - ), - predictions, - ) - ) - - diff --git a/neofox/MHC_predictors/netmhcpan/combine_netmhcIIpan_pred_multiple_binders.py b/neofox/MHC_predictors/netmhcpan/combine_netmhcIIpan_pred_multiple_binders.py index 504ef80f..19913722 100755 --- a/neofox/MHC_predictors/netmhcpan/combine_netmhcIIpan_pred_multiple_binders.py +++ b/neofox/MHC_predictors/netmhcpan/combine_netmhcIIpan_pred_multiple_binders.py @@ -20,19 +20,16 @@ from typing import List, Set import scipy.stats as stats from logzero import logger -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import ( - PredictedEpitope, -) -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import AbstractNetMhcPanPredictor from neofox.MHC_predictors.netmhcpan.netmhcIIpan_prediction import NetMhcIIPanPredictor from neofox.helpers.blastp_runner import BlastpRunner +from neofox.helpers.epitope_helper import EpitopeHelper from neofox.helpers.runner import Runner from neofox.model.mhc_parser import MhcParser -from neofox.model.neoantigen import Annotation, Mhc2, Zygosity, Mhc2Isoform, Mutation, Mhc2GeneName +from neofox.model.neoantigen import Annotation, Mhc2, Zygosity, Mhc2Isoform, Mhc2GeneName, PredictedEpitope, Neoantigen from neofox.model.factories import AnnotationFactory from neofox.references.references import DependenciesConfiguration, ORGANISM_HOMO_SAPIENS -LENGTH_MHC2_EPITOPE = 15 +MIN_LENGTH_MHC2_EPITOPE = 15 class BestAndMultipleBinderMhcII: @@ -55,33 +52,20 @@ def _initialise(self): self.generator_rate_adn = None self.generator_rate_cdn = None self.best_predicted_epitope_rank = PredictedEpitope( - peptide=None, - pos=None, - hla=Mhc2Isoform(name=None), - affinity_score=None, - rank=None, + mutated_peptide=None, + position=None, + isoform_mhc_i_i=Mhc2Isoform(name=None), + affinity_mutated=None, + rank_mutated=None, ) self.best_predicted_epitope_affinity = PredictedEpitope( - peptide=None, - pos=None, - hla=Mhc2Isoform(name=None), - affinity_score=None, - rank=None, - ) - self.best_predicted_epitope_rank_wt = PredictedEpitope( - peptide=None, - pos=None, - hla=Mhc2Isoform(name=None), - affinity_score=None, - rank=None, - ) - self.best_predicted_epitope_affinity_wt = PredictedEpitope( - peptide=None, - pos=None, - hla=Mhc2Isoform(name=None), - affinity_score=None, - rank=None, + mutated_peptide=None, + position=None, + isoform_mhc_i_i=Mhc2Isoform(name=None), + affinity_mutated=None, + rank_mutated=None, ) + self.predictions = [] def calculate_phbr_ii(self, best_epitope_per_allele_mhc2: List[PredictedEpitope]): """ @@ -93,13 +77,11 @@ def calculate_phbr_ii(self, best_epitope_per_allele_mhc2: List[PredictedEpitope] phbr_ii = None for allele_with_score in best_epitope_per_allele_mhc2: # add DRB1 - if Mhc2GeneName.DRB1.name in allele_with_score.hla.name: + if Mhc2GeneName.DRB1.name in allele_with_score.isoform_mhc_i_i.name: best_epitope_per_allele_mhc2_new.append(allele_with_score) if len(best_epitope_per_allele_mhc2_new) == 12: # 12 genes gene copies should be included into PHBR_II - best_mhc_ii_scores_per_allele = [ - epitope.rank for epitope in best_epitope_per_allele_mhc2_new - ] + best_mhc_ii_scores_per_allele = [epitope.rank_mutated for epitope in best_epitope_per_allele_mhc2_new] phbr_ii = stats.hmean(best_mhc_ii_scores_per_allele) return phbr_ii @@ -108,7 +90,7 @@ def determine_number_of_binders(predictions: List[PredictedEpitope], threshold=1 """ Determines the number of HLA II binders per mutation based on a rank threshold. Default is set to 1, which is threshold used in generator rate. """ - scores = [epitope.rank for epitope in predictions] + scores = [epitope.rank_mutated for epitope in predictions] number_binders = 0 for score in scores: if score < threshold: @@ -116,8 +98,7 @@ def determine_number_of_binders(predictions: List[PredictedEpitope], threshold=1 return number_binders if not len(scores) == 0 else None @staticmethod - def determine_number_of_alternative_binders(predictions: List[PredictedEpitope], - predictions_wt: List[PredictedEpitope], threshold=4): + def determine_number_of_alternative_binders(predictions: List[PredictedEpitope], threshold=4): """ Determines the number of HLA II neoepitope candidates that bind stronger (4:1) to HLA in comparison to corresponding WT. With the netMHCIIpan4.0 the rank score can get a value of 0.0. If this is the case, the next smaller possible @@ -128,158 +109,72 @@ def determine_number_of_alternative_binders(predictions: List[PredictedEpitope], number_binders = 0 values = [] for epitope in predictions: - values.append(epitope.rank) - if epitope.rank < 4: - wt_peptide = AbstractNetMhcPanPredictor.select_best_by_rank( - predictions=AbstractNetMhcPanPredictor.filter_wt_predictions_from_best_mutated( - predictions_wt, epitope - ) - ) - rank_mutation = epitope.rank + values.append(epitope.rank_mutated) + if epitope.rank_mutated < 4: + rank_mutation = epitope.rank_mutated if rank_mutation == 0: rank_mutation = 0.01 - if wt_peptide is not None: - dai = wt_peptide.rank / rank_mutation + if epitope.wild_type_peptide is not None: + dai = epitope.rank_wild_type / rank_mutation if dai > threshold: number_binders += 1 return number_binders if not len(values) == 0 else None - @staticmethod - def determine_number_of_alternative_binders_alternative(predictions: List[PredictedEpitope], - predictions_wt: List[PredictedEpitope], threshold=4): - """ - Determines the number of HLA I neoepitope candidates that bind stronger (10:1) to HLA in comparison to corresponding WT - """ - number_binders = 0 - dai_values = [] - for mut, wt in zip(predictions, predictions_wt): - dai_values.append(mut.rank) - if mut.rank < 4: - dai = mut.affinity_score / wt.affinity_score - if dai > threshold: - number_binders += 1 - return number_binders if not len(dai_values) == 0 else None - - def run( - self, - mutation: Mutation, - mhc2_alleles_patient: List[Mhc2], - mhc2_alleles_available: Set, - uniprot - ): + def run(self, neoantigen: Neoantigen, mhc2_alleles_patient: List[Mhc2], mhc2_alleles_available: Set, uniprot): """predicts MHC II epitopes; returns on one hand best binder and on the other hand multiple binder analysis is performed""" # mutation self._initialise() allele_combinations = self.netmhc2pan.generate_mhc2_alelle_combinations(mhc2_alleles_patient) # TODO: migrate the available alleles into the model for alleles - patient_mhc2_isoforms = self._get_only_available_combinations( - allele_combinations, mhc2_alleles_available - ) + patient_mhc2_isoforms = self._get_only_available_combinations(allele_combinations, mhc2_alleles_available) + + # only process neoepitopes with a minimum length + if len(neoantigen.mutated_xmer) >= MIN_LENGTH_MHC2_EPITOPE: + + predictions = self.netmhc2pan.get_predictions(neoantigen, patient_mhc2_isoforms, uniprot) + + if neoantigen.wild_type_xmer and len(neoantigen.wild_type_xmer) >= MIN_LENGTH_MHC2_EPITOPE: + + # SNVs with available WT + # runs the netMHCIIpan WT predictions and then pair them with previous predictions + # based on length, position within neoepitope and HLA allele + predictions_wt = self.netmhc2pan.get_wt_predictions(neoantigen, patient_mhc2_isoforms) + predictions = EpitopeHelper.pair_mhcii_predictions(predictions=predictions, predictions_wt=predictions_wt) + else: + + # alternative mutation classes or missing WT + # do BLAST search for all predicted epitopes to identify the closest WT peptide and + # predict MHC binding for the identified peptide sequence + predictions = EpitopeHelper.set_wt_epitope_by_homology(predictions, blastp_runner=self.proteome_blastp_runner) + predictions = self.netmhc2pan.set_wt_netmhcpan_scores(predictions) + + self.predictions = predictions + + if len(predictions) > 0: - predictions = self.netmhc2pan.mhc2_prediction( - patient_mhc2_isoforms, mutation.mutated_xmer - ) - if len(mutation.mutated_xmer) >= LENGTH_MHC2_EPITOPE: - if mutation.wild_type_xmer: - # make sure that predicted epitopes cover mutation in case of SNVs - predictions = self.netmhc2pan.filter_peptides_covering_snv( - position_of_mutation=mutation.position, predictions=predictions - ) - filtered_predictions = self.netmhc2pan.remove_peptides_in_proteome( - predictions, uniprot - ) - if len(filtered_predictions) > 0: # multiple binding best_predicted_epitopes_per_alelle = ( - self.extract_best_epitope_per_mhc2_alelle( - filtered_predictions, mhc2_alleles_patient - ) - ) + self.extract_best_epitope_per_mhc2_alelle(predictions, mhc2_alleles_patient)) self.phbr_ii = self.calculate_phbr_ii(best_predicted_epitopes_per_alelle) # best prediction - self.best_predicted_epitope_rank = self.netmhc2pan.select_best_by_rank( - filtered_predictions - ) - self.best_predicted_epitope_affinity = self.netmhc2pan.select_best_by_affinity( - filtered_predictions - ) - self.generator_rate_cdn = self.determine_number_of_binders( - predictions=filtered_predictions - ) - # MHC binding predictions for WT pepti - if mutation.wild_type_xmer: - predictions = self.netmhc2pan.mhc2_prediction( - patient_mhc2_isoforms, mutation.wild_type_xmer - ) - if len(mutation.wild_type_xmer) >= LENGTH_MHC2_EPITOPE: - filtered_predictions_wt = self.netmhc2pan.filter_peptides_covering_snv( - mutation.position, predictions - ) - # best prediction - if self.best_predicted_epitope_rank: - self.best_predicted_epitope_rank_wt = self.netmhc2pan.select_best_by_rank( - self.netmhc2pan.filter_wt_predictions_from_best_mutated( - filtered_predictions_wt, self.best_predicted_epitope_rank - ) - ) - if self.best_predicted_epitope_affinity: - self.best_predicted_epitope_affinity_wt = ( - self.netmhc2pan.select_best_by_affinity( - self.netmhc2pan.filter_wt_predictions_from_best_mutated( - filtered_predictions_wt, self.best_predicted_epitope_affinity - ) - ) - ) - if len(mutation.mutated_xmer) >= LENGTH_MHC2_EPITOPE: - self.generator_rate_adn = self.determine_number_of_alternative_binders( - predictions=filtered_predictions, predictions_wt=filtered_predictions_wt - ) - if self.generator_rate_adn is not None: - if self.generator_rate_cdn is not None: - self.generator_rate = self.generator_rate_adn + self.generator_rate_cdn - else: - # alternative mutation classes - # do BLAST search for all predicted epitopes covering mutation to identify WT peptide and - # predict MHC binding for the identified peptide sequence - peptides_wt = self.netmhc2pan.find_wt_epitope_for_alternative_mutated_epitope(filtered_predictions) - filtered_predictions_wt = [] - for wt_peptide, mut_peptide in zip(peptides_wt, filtered_predictions): - if wt_peptide is not None: - filtered_predictions_wt.extend(self.netmhc2pan.mhc2_prediction_peptide(mut_peptide.hla, wt_peptide)) - if self.best_predicted_epitope_rank: - self.best_predicted_epitope_rank_wt = self.netmhc2pan.filter_wt_predictions_from_best_mutated_alernative( - mut_predictions=filtered_predictions, wt_predictions=filtered_predictions_wt, - best_mutated_epitope=self.best_predicted_epitope_rank) - if self.best_predicted_epitope_affinity: - self.best_predicted_epitope_affinity_wt = \ - self.netmhc2pan.filter_wt_predictions_from_best_mutated_alernative( - mut_predictions=filtered_predictions, wt_predictions=filtered_predictions_wt, - best_mutated_epitope=self.best_predicted_epitope_affinity - ) - if len(mutation.mutated_xmer) >= LENGTH_MHC2_EPITOPE: - # generator rate for MHC II - self.generator_rate_cdn = self.determine_number_of_binders( - predictions=filtered_predictions - ) - self.generator_rate_adn = self.determine_number_of_alternative_binders_alternative( - predictions=filtered_predictions, predictions_wt=filtered_predictions_wt - ) - - if self.generator_rate_adn is not None: - if self.generator_rate_cdn is not None: - self.generator_rate = self.generator_rate_adn + self.generator_rate_cdn - - def _get_only_available_combinations(self, allele_combinations: List[Mhc2Isoform], set_available_mhc: List[str]) -> List[str]: + self.best_predicted_epitope_rank = EpitopeHelper.select_best_by_rank(predictions) + self.best_predicted_epitope_affinity = EpitopeHelper.select_best_by_affinity(predictions) + self.generator_rate_cdn = self.determine_number_of_binders(predictions=predictions) + self.generator_rate_adn = self.determine_number_of_alternative_binders(predictions=predictions) + if self.generator_rate_adn is not None and self.generator_rate_cdn is not None: + self.generator_rate = self.generator_rate_adn + self.generator_rate_cdn + + def _get_only_available_combinations(self, allele_combinations: List[Mhc2Isoform], set_available_mhc: Set[str]) -> List[str]: # parses isoforms into internal representation parsed_allele_combinations = self.netmhc2pan.represent_mhc2_isoforms(allele_combinations) patients_available_alleles = list( - set(parsed_allele_combinations).intersection(set(set_available_mhc)) + set(parsed_allele_combinations).intersection(set_available_mhc) ) patients_not_available_alleles = list( - set(parsed_allele_combinations).difference(set(set_available_mhc)) + set(parsed_allele_combinations).difference(set_available_mhc) ) if len(patients_not_available_alleles) > 0: logger.warning( @@ -293,68 +188,58 @@ def get_annotations(self) -> List[Annotation]: if self.best_predicted_epitope_rank: annotations.extend([ AnnotationFactory.build_annotation( - value=self.best_predicted_epitope_rank.rank, - name="Best_rank_MHCII_score", + value=self.best_predicted_epitope_rank.rank_mutated, + name="NetMHCIIpan_bestRank_rank", ), AnnotationFactory.build_annotation( - value=self.best_predicted_epitope_rank.peptide, - name="Best_rank_MHCII_score_epitope", + value=self.best_predicted_epitope_rank.mutated_peptide, + name="NetMHCIIpan_bestRank_peptide", ), AnnotationFactory.build_annotation( - value=self.best_predicted_epitope_rank.hla.name, - name="Best_rank_MHCII_score_allele", - )]) - if self.best_predicted_epitope_affinity: - annotations.extend([ - AnnotationFactory.build_annotation( - value=self.best_predicted_epitope_affinity.affinity_score, - name="Best_affinity_MHCII_score", + value=self.best_predicted_epitope_rank.isoform_mhc_i_i.name, + name="NetMHCIIpan_bestRank_allele", ), AnnotationFactory.build_annotation( - value=self.best_predicted_epitope_affinity.peptide, - name="Best_affinity_MHCII_epitope", + value=self.best_predicted_epitope_rank.rank_wild_type, + name="NetMHCIIpan_bestRank_rankWT", ), AnnotationFactory.build_annotation( - value=self.best_predicted_epitope_affinity.hla.name, - name="Best_affinity_MHCII_allele", - )]) - if self.best_predicted_epitope_rank_wt: + value=self.best_predicted_epitope_rank.wild_type_peptide, + name="NetMHCIIpan_bestRank_peptideWT", + ), + ]) + if self.best_predicted_epitope_affinity: annotations.extend([ AnnotationFactory.build_annotation( - value=self.best_predicted_epitope_rank_wt.rank, - name="Best_rank_MHCII_score_WT", + value=self.best_predicted_epitope_affinity.affinity_mutated, + name="NetMHCIIpan_bestAffinity_affinity", ), AnnotationFactory.build_annotation( - value=self.best_predicted_epitope_rank_wt.peptide, - name="Best_rank_MHCII_score_epitope_WT", + value=self.best_predicted_epitope_affinity.mutated_peptide, + name="NetMHCIIpan_bestAffinity_peptide", ), AnnotationFactory.build_annotation( - value=self.best_predicted_epitope_rank_wt.hla.name, - name="Best_rank_MHCII_score_allele_WT", - )]) - if self.best_predicted_epitope_affinity_wt: - annotations.extend([ - AnnotationFactory.build_annotation( - value=self.best_predicted_epitope_affinity_wt.affinity_score, - name="Best_affinity_MHCII_score_WT", + value=self.best_predicted_epitope_affinity.isoform_mhc_i_i.name, + name="NetMHCIIpan_bestAffinity_allele", ), AnnotationFactory.build_annotation( - value=self.best_predicted_epitope_affinity_wt.peptide, - name="Best_affinity_MHCII_epitope_WT", + value=self.best_predicted_epitope_affinity.affinity_wild_type, + name="NetMHCIIpan_bestAffinity_affinityWT", ), AnnotationFactory.build_annotation( - value=self.best_predicted_epitope_affinity_wt.hla.name, - name="Best_affinity_MHCII_allele_WT", - )]) + value=self.best_predicted_epitope_affinity.wild_type_peptide, + name="NetMHCIIpan_bestAffinity_peptideWT", + ) + ]) if self.organism == ORGANISM_HOMO_SAPIENS: annotations.extend([AnnotationFactory.build_annotation(value=self.phbr_ii, name="PHBR_II")]) annotations.extend([ # generator rate - AnnotationFactory.build_annotation(value=self.generator_rate, name="Generator_rate_MHCII"), - AnnotationFactory.build_annotation(value=self.generator_rate_cdn, name="Generator_rate_CDN_MHCII"), - AnnotationFactory.build_annotation(value=self.generator_rate_adn, name="Generator_rate_ADN_MHCII"), + AnnotationFactory.build_annotation(value=self.generator_rate, name="GeneratorRate_MHCII"), + AnnotationFactory.build_annotation(value=self.generator_rate_cdn, name="GeneratorRate_CDN_MHCII"), + AnnotationFactory.build_annotation(value=self.generator_rate_adn, name="GeneratorRate_ADN_MHCII"), ]) return annotations @@ -399,24 +284,24 @@ def _get_sorted_epitopes_mhc2( # groups epitopes by allele epitopes_by_allele = {} for p in predictions: - allele = p.hla.name + allele = p.isoform_mhc_i_i.name epitopes_by_allele.setdefault(allele, []).append(p) # chooses the best epitope per allele and considers zygosity best_epitopes_per_allele = [] for allele, epitopes in epitopes_by_allele.items(): # sort by rank to choose the best epitope, fixes ties with peptide by alphabetical order - epitopes.sort(key=lambda e: (e.rank, e.peptide)) + epitopes.sort(key=lambda e: (e.rank_mutated, e.mutated_peptide)) best_epitope = epitopes[0] num_repetitions = 0 if ( - best_epitope.hla.name in hetero_hemizygous_allele_names - or best_epitope.hla.name in hetero_hemizygous_allele_names + best_epitope.isoform_mhc_i_i.name in hetero_hemizygous_allele_names + or best_epitope.isoform_mhc_i_i.name in hetero_hemizygous_allele_names ): # adds the epitope once if alleles heterozygous num_repetitions = 1 if ( - best_epitope.hla in homozygous_allele_names + best_epitope.isoform_mhc_i_i in homozygous_allele_names ): # adds the epitope twice if one allele is homozygous num_repetitions = 2 diff --git a/neofox/MHC_predictors/netmhcpan/combine_netmhcpan_pred_multiple_binders.py b/neofox/MHC_predictors/netmhcpan/combine_netmhcpan_pred_multiple_binders.py index 7a0107a1..a6da0863 100755 --- a/neofox/MHC_predictors/netmhcpan/combine_netmhcpan_pred_multiple_binders.py +++ b/neofox/MHC_predictors/netmhcpan/combine_netmhcpan_pred_multiple_binders.py @@ -20,23 +20,20 @@ from typing import List, Set import scipy.stats as stats from neofox.MHC_predictors.netmhcpan.netmhcpan_prediction import NetMhcPanPredictor -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import ( - PredictedEpitope, -) -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import AbstractNetMhcPanPredictor from neofox.helpers.blastp_runner import BlastpRunner from neofox.helpers.epitope_helper import EpitopeHelper +from neofox.helpers.mhc_helper import MhcHelper from neofox.helpers.runner import Runner from neofox.model.mhc_parser import MhcParser -from neofox.model.neoantigen import Annotation, Mhc1, Zygosity, Mutation, MhcAllele +from neofox.model.neoantigen import Annotation, Mhc1, PredictedEpitope, Neoantigen from neofox.model.factories import AnnotationFactory from neofox.references.references import DependenciesConfiguration, ORGANISM_HOMO_SAPIENS -from logzero import logger class BestAndMultipleBinder: - def __init__(self, runner: Runner, configuration: DependenciesConfiguration, mhc_parser: MhcParser, - blastp_runner: BlastpRunner): + def __init__( + self, runner: Runner, configuration: DependenciesConfiguration, mhc_parser: MhcParser, + blastp_runner: BlastpRunner): self.runner = runner self.configuration = configuration self.mhc_parser = mhc_parser @@ -50,34 +47,19 @@ def __init__(self, runner: Runner, configuration: DependenciesConfiguration, mhc def _initialise(self): self.phbr_i = None - self.epitope_affinities = None self.generator_rate = None self.mutation_in_anchor_9mer = None self.generator_rate = None self.generator_rate_adn = None self.generator_rate_cdn = None - self.best_epitope_by_rank = self._get_empty_epitope() - self.best_epitope_by_affinity = self._get_empty_epitope() - self.best_ninemer_epitope_by_affinity = self._get_empty_epitope() - self.best_ninemer_epitope_by_rank = self._get_empty_epitope() - self.best_wt_epitope_by_rank = self._get_empty_epitope() - self.best_wt_epitope_by_affinity = self._get_empty_epitope() - self.best_ninemer_wt_epitope_by_rank = self._get_empty_epitope() - self.best_ninemer_wt_epitope_by_affinity = self._get_empty_epitope() - - @staticmethod - def _get_empty_epitope(): - return PredictedEpitope( - peptide=None, - pos=None, - hla=MhcAllele(name=None), - affinity_score=None, - rank=None, - ) + self.best_epitope_by_rank = EpitopeHelper.get_empty_epitope() + self.best_epitope_by_affinity = EpitopeHelper.get_empty_epitope() + self.best_ninemer_epitope_by_affinity = EpitopeHelper.get_empty_epitope() + self.best_ninemer_epitope_by_rank = EpitopeHelper.get_empty_epitope() + self.predictions = [] def calculate_phbr_i( - self, predictions: List[PredictedEpitope], mhc1_alleles: List[Mhc1] - ): + self, predictions: List[PredictedEpitope], mhc1_alleles: List[Mhc1]): """returns list of multiple binding scores for mhcII considering best epitope per allele, applying different types of means (harmonic ==> PHRB-II, Marty et al). 2 copies of DRA - DRB1 --> consider this gene 2x when averaging mhcii binding scores """ @@ -88,77 +70,37 @@ def calculate_phbr_i( ) phbr_i = None if len(best_epitopes_per_allele) == 6: - phbr_i = stats.hmean(list(map(lambda e: e.rank, best_epitopes_per_allele))) + phbr_i = stats.hmean(list(map(lambda e: e.rank_mutated, best_epitopes_per_allele))) return phbr_i @staticmethod def extract_best_epitope_per_alelle( - epitopes: List[PredictedEpitope], mhc_isoforms: List[Mhc1] - ) -> List[PredictedEpitope]: + epitopes: List[PredictedEpitope], mhc_isoforms: List[Mhc1]) -> List[PredictedEpitope]: """ This function returns the predicted epitope with the lowest binding score for each patient allele, considering homozyogosity """ - homozygous_alleles = BestAndMultipleBinder._get_homozygous_mhc1_alleles( - mhc_isoforms - ) - hetero_hemizygous_alleles = ( - BestAndMultipleBinder._get_heterozygous_or_hemizygous_mhc1_alleles( - mhc_isoforms - ) - ) - return BestAndMultipleBinder._get_sorted_epitopes( - hetero_hemizygous_alleles, homozygous_alleles, epitopes - ) - - @staticmethod - def _get_homozygous_mhc1_alleles(mhc_isoforms: List[Mhc1]) -> List[str]: - """ - Returns alleles that occur more than one time in list of patient alleles and hence are homozygous alleles. - Otherwise retunrs empty list - """ - return [ - a.name - for m in mhc_isoforms - for a in m.alleles - if m.zygosity == Zygosity.HOMOZYGOUS - ] - - @staticmethod - def _get_heterozygous_or_hemizygous_mhc1_alleles( - mhc_isoforms: List[Mhc1], - ) -> List[str]: - """ - Returns alleles that occur more than one time in list of patient alleles and hence are homozygous alleles. - Otherwise retunrs empty list - """ - return [ - a.name - for m in mhc_isoforms - for a in m.alleles - if m.zygosity in [Zygosity.HETEROZYGOUS, Zygosity.HEMIZYGOUS] - ] + homozygous_alleles = MhcHelper.get_homozygous_mhc1_alleles(mhc_isoforms) + hetero_hemizygous_alleles = (MhcHelper.get_heterozygous_or_hemizygous_mhc1_alleles(mhc_isoforms)) + return BestAndMultipleBinder._get_sorted_epitopes(hetero_hemizygous_alleles, homozygous_alleles, epitopes) @staticmethod def _get_sorted_epitopes( - hetero_hemizygous_alleles, - homozygous_alleles, - predictions: List[PredictedEpitope], - ) -> List[PredictedEpitope]: + hetero_hemizygous_alleles, homozygous_alleles, predictions: List[PredictedEpitope]) -> List[PredictedEpitope]: # groups epitopes by allele epitopes_by_allele = {} for p in predictions: - epitopes_by_allele.setdefault(p.hla.name, []).append(p) + epitopes_by_allele.setdefault(p.allele_mhc_i.name, []).append(p) # chooses the best epitope per allele while considering zygosity best_epis_per_allele = [] for list_alleles in epitopes_by_allele.values(): # sort by rank to choose the best epitope, ties are solved choosing the first peptide in alphabetcial order - list_alleles.sort(key=lambda x: (x.rank, x.peptide)) + list_alleles.sort(key=lambda x: (x.rank_mutated, x.mutated_peptide)) best_epitope = list_alleles[0] - if best_epitope.hla.name in hetero_hemizygous_alleles: + if best_epitope.allele_mhc_i.name in hetero_hemizygous_alleles: best_epis_per_allele.append(best_epitope) # adds the epitope once - if best_epitope.hla.name in homozygous_alleles: + if best_epitope.allele_mhc_i.name in homozygous_alleles: best_epis_per_allele.append(best_epitope) best_epis_per_allele.append(best_epitope) # adds the epitope twice return best_epis_per_allele @@ -168,7 +110,7 @@ def determine_number_of_binders(predictions: List[PredictedEpitope], threshold=5 """ Determines the number of HLA I binders per mutation based on an affinity threshold. Default is set to 50, which is threshold used in generator rate. """ - scores = [epitope.affinity_score for epitope in predictions] + scores = [epitope.affinity_mutated for epitope in predictions] number_binders = 0 for score in scores: if score < threshold: @@ -176,48 +118,27 @@ def determine_number_of_binders(predictions: List[PredictedEpitope], threshold=5 return number_binders if not len(scores) == 0 else None @staticmethod - def determine_number_of_alternative_binders(predictions: List[PredictedEpitope], - predictions_wt: List[PredictedEpitope], threshold=10): + def determine_number_of_alternative_binders(predictions: List[PredictedEpitope], threshold=10): """ Determines the number of HLA I neoepitope candidates that bind stronger (10:1) to HLA in comparison to corresponding WT """ number_binders = 0 dai_values = [] for epitope in predictions: - dai_values.append(epitope.affinity_score) - if epitope.affinity_score < 5000: - wt_peptide = AbstractNetMhcPanPredictor.select_best_by_affinity( - AbstractNetMhcPanPredictor.filter_wt_predictions_from_best_mutated( - predictions=predictions_wt, mutated_prediction=epitope), - none_value=BestAndMultipleBinder._get_empty_epitope()) - if wt_peptide is not None and wt_peptide.affinity_score is not None: - dai = wt_peptide.affinity_score / epitope.affinity_score + dai_values.append(epitope.affinity_mutated) + if epitope.affinity_mutated < 5000: + if epitope.wild_type_peptide is not None and epitope.affinity_wild_type is not None: + dai = epitope.affinity_wild_type / epitope.affinity_mutated if dai > threshold: number_binders += 1 - if len(dai_values) == 0: + if len(predictions) == 0: number_binders = None return number_binders - @staticmethod - def determine_number_of_alternative_binders_alternative(predictions: List[PredictedEpitope], - predictions_wt: List[PredictedEpitope], threshold=10): - """ - Determines the number of HLA I neoepitope candidates that bind stronger (10:1) to HLA in comparison to corresponding WT - """ - number_binders = 0 - dai_values = [] - for mut, wt in zip(predictions, predictions_wt): - dai_values.append(mut.affinity_score) - if mut.affinity_score < 5000 and wt.affinity_score: - dai = mut.affinity_score / wt.affinity_score - if dai > threshold: - number_binders += 1 - return number_binders if not len(dai_values) == 0 else None - def run( self, - mutation: Mutation, + neoantigen: Neoantigen, mhc1_alleles_patient: List[Mhc1], mhc1_alleles_available: Set, uniprot, @@ -227,274 +148,179 @@ def run( """ self._initialise() - predictions = self.netmhcpan.mhc_prediction( - mhc1_alleles_patient, mhc1_alleles_available, mutation.mutated_xmer - ) - if mutation.wild_type_xmer: - # make sure that predicted epitopes cover mutation in case of SNVs - predictions = self.netmhcpan.filter_peptides_covering_snv( - position_of_mutation=mutation.position, predictions=predictions - ) - # make sure that predicted neoepitopes are part of the WT proteome - filtered_predictions = self.netmhcpan.remove_peptides_in_proteome( - predictions=predictions, uniprot=uniprot - ) + # gets all predictions overlapping the mutation and not present in the WT proteome + available_alleles = self.netmhcpan.get_only_available_alleles(mhc1_alleles_patient, mhc1_alleles_available) + predictions = self.netmhcpan.get_predictions(available_alleles, neoantigen, uniprot) + if neoantigen.wild_type_xmer: + # SNVs with available WT + # runs the netMHCpan WT predictions and then pair them with previous predictions + # based on length, position within neoepitope and HLA allele + predictions_wt = self.netmhcpan.get_wt_predictions(available_alleles, neoantigen) + predictions = EpitopeHelper.pair_predictions(predictions=predictions, predictions_wt=predictions_wt) + else: + # alternative mutation classes or missing WT + # do BLAST search for all predicted epitopes to identify the closest WT peptide and + # predict MHC binding for the identified peptide sequence + predictions = EpitopeHelper.set_wt_epitope_by_homology(predictions, self.blastp_runner) + predictions = self.netmhcpan.set_wt_netmhcpan_scores(predictions) - if len(filtered_predictions) > 0: - # multiple binding - self.epitope_affinities = "/".join( - [str(epitope.affinity_score) for epitope in filtered_predictions] - ) + self.predictions = predictions + + if len(predictions) > 0: # best prediction - self.best_epitope_by_rank = self.netmhcpan.select_best_by_rank( - filtered_predictions, none_value=self._get_empty_epitope()) - self.best_epitope_by_affinity = self.netmhcpan.select_best_by_affinity( - filtered_predictions, none_value=self._get_empty_epitope()) - logger.info(self.best_epitope_by_rank) + self.best_epitope_by_rank = EpitopeHelper.select_best_by_rank(predictions) + self.best_epitope_by_affinity = EpitopeHelper.select_best_by_affinity(predictions) # best predicted epitope of length 9 - ninemer_predictions = self.netmhcpan.filter_for_9mers(filtered_predictions) - self.best_ninemer_epitope_by_rank = self.netmhcpan.select_best_by_rank( - ninemer_predictions, none_value=self._get_empty_epitope()) - self.best_ninemer_epitope_by_affinity = self.netmhcpan.select_best_by_affinity( - ninemer_predictions, none_value=self._get_empty_epitope()) + ninemer_predictions = EpitopeHelper.filter_for_9mers(predictions) + self.best_ninemer_epitope_by_rank = EpitopeHelper.select_best_by_rank(ninemer_predictions) + self.best_ninemer_epitope_by_affinity = EpitopeHelper.select_best_by_affinity(ninemer_predictions) # multiple binding based on affinity - self.generator_rate_cdn = self.determine_number_of_binders( - predictions=filtered_predictions, threshold=50 - ) + self.generator_rate_cdn = self.determine_number_of_binders(predictions=predictions, threshold=50) + self.generator_rate_adn = self.determine_number_of_alternative_binders(predictions=predictions) + if self.generator_rate_adn is not None and self.generator_rate_cdn is not None: + self.generator_rate = self.generator_rate_adn + self.generator_rate_cdn # PHBR-I - self.phbr_i = self.calculate_phbr_i( - predictions=filtered_predictions, mhc1_alleles=mhc1_alleles_patient - ) - - self.best_wt_epitope_by_rank = None - self.best_wt_epitope_by_affinity = None - self.best_ninemer_wt_epitope_by_rank = None - self.best_ninemer_wt_epitope_by_affinity = None - - # MHC binding predictions for WT peptides - if mutation.wild_type_xmer: - # SNVs - predictions_wt = self.netmhcpan.mhc_prediction( - mhc1_alleles_patient, mhc1_alleles_available, mutation.wild_type_xmer - ) - filtered_predictions_wt = self.netmhcpan.filter_peptides_covering_snv( - position_of_mutation=mutation.position, predictions=predictions_wt - ) - self.best_wt_epitope_by_rank = self.netmhcpan.select_best_by_rank( - self.netmhcpan.filter_wt_predictions_from_best_mutated( - filtered_predictions_wt, self.best_epitope_by_rank - ), - none_value=BestAndMultipleBinder._get_empty_epitope() - ) - self.best_wt_epitope_by_affinity = self.netmhcpan.select_best_by_affinity( - self.netmhcpan.filter_wt_predictions_from_best_mutated( - filtered_predictions_wt, self.best_epitope_by_affinity), - none_value=BestAndMultipleBinder._get_empty_epitope() - ) - # best predicted epitope of length 9 - ninemer_predictions_wt = self.netmhcpan.filter_for_9mers(filtered_predictions_wt) - self.best_ninemer_wt_epitope_by_rank = self.netmhcpan.select_best_by_rank( - self.netmhcpan.filter_wt_predictions_from_best_mutated( - ninemer_predictions_wt, self.best_ninemer_epitope_by_rank - ), - none_value=BestAndMultipleBinder._get_empty_epitope() - ) - self.best_ninemer_wt_epitope_by_affinity = self.netmhcpan.select_best_by_affinity( - self.netmhcpan.filter_wt_predictions_from_best_mutated( - ninemer_predictions_wt, self.best_ninemer_epitope_by_affinity), - none_value=BestAndMultipleBinder._get_empty_epitope() - ) - # multiple binding based on affinity - self.generator_rate_adn = self.determine_number_of_alternative_binders( - predictions=filtered_predictions, predictions_wt=filtered_predictions_wt - ) - else: - # alternative mutation classes - # do BLAST search for all predicted epitopes covering mutation to identify WT peptide and - # predict MHC binding for the identified peptide sequence - peptides_wt = self.netmhcpan.find_wt_epitope_for_alternative_mutated_epitope(filtered_predictions) - filtered_predictions_wt = [] - for wt_peptide, mut_peptide in zip(peptides_wt, filtered_predictions): - if wt_peptide is not None: - hla = Mhc1(name=mut_peptide.hla.gene, zygosity=Zygosity.HOMOZYGOUS, alleles=[mut_peptide.hla]) - filtered_predictions_wt.extend(self.netmhcpan.mhc_prediction_peptide( - [hla], mhc1_alleles_available, wt_peptide - )) - if self.best_epitope_by_rank: - self.best_wt_epitope_by_rank = self.netmhcpan.filter_wt_predictions_from_best_mutated_alernative( - mut_predictions=filtered_predictions, wt_predictions=filtered_predictions_wt, - best_mutated_epitope=self.best_epitope_by_rank) - if self.best_epitope_by_affinity: - self.best_wt_epitope_by_affinity = self.netmhcpan.filter_wt_predictions_from_best_mutated_alernative( - mut_predictions=filtered_predictions, wt_predictions=filtered_predictions_wt, - best_mutated_epitope=self.best_epitope_by_affinity) - if self.best_ninemer_epitope_by_rank: - self.best_ninemer_wt_epitope_by_rank = self.netmhcpan.filter_wt_predictions_from_best_mutated_alernative( - mut_predictions=filtered_predictions, wt_predictions=filtered_predictions_wt, - best_mutated_epitope=self.best_ninemer_epitope_by_rank) - if self.best_ninemer_epitope_by_affinity: - self.best_ninemer_wt_epitope_by_affinity = self.netmhcpan.filter_wt_predictions_from_best_mutated_alernative( - mut_predictions=filtered_predictions, wt_predictions=filtered_predictions_wt, - best_mutated_epitope=self.best_ninemer_epitope_by_affinity) - # multiple binding based on affinity - self.generator_rate_adn = self.determine_number_of_alternative_binders_alternative( - predictions=filtered_predictions, predictions_wt=filtered_predictions_wt - ) - - if self.generator_rate_adn is not None: - if self.generator_rate_cdn is not None: - self.generator_rate = self.generator_rate_adn + self.generator_rate_cdn + self.phbr_i = self.calculate_phbr_i(predictions=predictions, mhc1_alleles=mhc1_alleles_patient) - def get_annotations(self, mutation) -> List[Annotation]: + def get_annotations(self) -> List[Annotation]: annotations = [] if self.best_epitope_by_rank: annotations.extend([ AnnotationFactory.build_annotation( - value=self.best_epitope_by_rank.rank, name="Best_rank_MHCI_score" + value=self.best_epitope_by_rank.rank_mutated, name="NetMHCpan_MHCI_bestRank_rank" ), AnnotationFactory.build_annotation( - value=self.best_epitope_by_rank.peptide, - name="Best_rank_MHCI_score_epitope", + value=self.best_epitope_by_rank.mutated_peptide, + name="NetMHCpan_bestRank_peptide", ), AnnotationFactory.build_annotation( - value=self.best_epitope_by_rank.hla.name, name="Best_rank_MHCI_score_allele" - )]) - if self.best_epitope_by_affinity: - annotations.extend([ - AnnotationFactory.build_annotation( - value=self.best_epitope_by_affinity.affinity_score, - name="Best_affinity_MHCI_score", + value=self.best_epitope_by_rank.allele_mhc_i.name, name="NetMHCpan_bestRank_allele" ), AnnotationFactory.build_annotation( - value=self.best_epitope_by_affinity.peptide, - name="Best_affinity_MHCI_epitope", + value=self.best_epitope_by_rank.rank_wild_type, name="NetMHCpan_bestRank_rankWT" ), AnnotationFactory.build_annotation( - value=self.best_epitope_by_affinity.hla.name, - name="Best_affinity_MHCI_allele", - )]) - if self.best_ninemer_epitope_by_rank: + value=self.best_epitope_by_rank.wild_type_peptide, + name="NetMHCpan_bestRank_peptideWT", + ) + ]) + if self.best_epitope_by_affinity: annotations.extend([ AnnotationFactory.build_annotation( - value=self.best_ninemer_epitope_by_rank.rank, - name="Best_rank_MHCI_9mer_score", + value=self.best_epitope_by_affinity.affinity_mutated, + name="NetMHCpan_bestAffinity_affinity", ), AnnotationFactory.build_annotation( - value=self.best_ninemer_epitope_by_rank.peptide, - name="Best_rank_MHCI_9mer_epitope", + value=self.best_epitope_by_affinity.mutated_peptide, + name="NetMHCpan_bestAffinity_peptide", ), AnnotationFactory.build_annotation( - value=self.best_ninemer_epitope_by_rank.hla.name, - name="Best_rank_MHCI_9mer_allele", - )]) - if self.best_ninemer_epitope_by_affinity: - annotations.extend([ - AnnotationFactory.build_annotation( - value=self.best_ninemer_epitope_by_affinity.affinity_score, - name="Best_affinity_MHCI_9mer_score", + value=self.best_epitope_by_affinity.allele_mhc_i.name, + name="NetMHCpan_bestAffinity_allele", ), AnnotationFactory.build_annotation( - value=self.best_ninemer_epitope_by_affinity.hla.name, - name="Best_affinity_MHCI_9mer_allele", + value=self.best_epitope_by_affinity.affinity_wild_type, + name="NetMHCpan_bestAffinity_affinityWT", ), AnnotationFactory.build_annotation( - value=self.best_ninemer_epitope_by_affinity.peptide, - name="Best_affinity_MHCI_9mer_epitope", + value=self.best_epitope_by_affinity.wild_type_peptide, + name="NetMHCpan_bestAffinity_peptideWT", )]) - # wt - if self.best_wt_epitope_by_affinity: + if self.best_ninemer_epitope_by_rank: annotations.extend([ AnnotationFactory.build_annotation( - value=self.best_wt_epitope_by_affinity.affinity_score, - name="Best_affinity_MHCI_score_WT", + value=self.best_ninemer_epitope_by_rank.rank_mutated, + name="NetMHCpan_bestRank9mer_rank", ), AnnotationFactory.build_annotation( - value=self.best_wt_epitope_by_affinity.peptide, - name="Best_affinity_MHCI_epitope_WT", + value=self.best_ninemer_epitope_by_rank.mutated_peptide, + name="NetMHCpan_bestRank9mer_peptide", ), AnnotationFactory.build_annotation( - value=self.best_wt_epitope_by_affinity.hla.name, - name="Best_affinity_MHCI_allele_WT", - )]) - if self.best_wt_epitope_by_rank: - annotations.extend([ - AnnotationFactory.build_annotation( - value=self.best_wt_epitope_by_rank.rank, name="Best_rank_MHCI_score_WT" + value=self.best_ninemer_epitope_by_rank.allele_mhc_i.name, + name="NetMHCpan_bestRank9mer_allele", ), AnnotationFactory.build_annotation( - value=self.best_wt_epitope_by_rank.peptide, - name="Best_rank_MHCI_score_epitope_WT", + value=self.best_ninemer_epitope_by_rank.rank_wild_type, + name="NetMHCpan_bestRank9mer_rankWT", ), AnnotationFactory.build_annotation( - value=self.best_wt_epitope_by_rank.hla.name, - name="Best_rank_MHCI_score_allele_WT", - )]) - if self.best_ninemer_wt_epitope_by_rank: + value=self.best_ninemer_epitope_by_rank.wild_type_peptide, + name="NetMHCpan_bestRank9mer_peptideWT", + ) + ]) + if self.best_ninemer_epitope_by_affinity: annotations.extend([ AnnotationFactory.build_annotation( - value=self.best_ninemer_wt_epitope_by_rank.rank, - name="Best_rank_MHCI_9mer_score_WT", + value=self.best_ninemer_epitope_by_affinity.affinity_mutated, + name="NetMHCpan_bestAffinity9mer_affinity", ), AnnotationFactory.build_annotation( - value=self.best_ninemer_wt_epitope_by_rank.peptide, - name="Best_rank_MHCI_9mer_epitope_WT", + value=self.best_ninemer_epitope_by_affinity.allele_mhc_i.name, + name="NetMHCpan_bestAffinity9mer_allele", ), AnnotationFactory.build_annotation( - value=self.best_ninemer_wt_epitope_by_rank.hla.name, - name="Best_rank_MHCI_9mer_allele_WT", - )]) - if self.best_ninemer_wt_epitope_by_affinity: - annotations.extend([ - AnnotationFactory.build_annotation( - value=self.best_ninemer_wt_epitope_by_affinity.affinity_score, - name="Best_affinity_MHCI_9mer_score_WT", + value=self.best_ninemer_epitope_by_affinity.mutated_peptide, + name="NetMHCpan_bestAffinity9mer_peptide", ), AnnotationFactory.build_annotation( - value=self.best_ninemer_wt_epitope_by_affinity.hla.name, - name="Best_affinity_MHCI_9mer_allele_WT", + value=self.best_ninemer_epitope_by_affinity.affinity_wild_type, + name="NetMHCpan_bestAffinity9mer_affinityWT", ), AnnotationFactory.build_annotation( - value=self.best_ninemer_wt_epitope_by_affinity.peptide, - name="Best_affinity_MHCI_9mer_epitope_WT", - )]) + value=self.best_ninemer_epitope_by_affinity.wild_type_peptide, + name="NetMHCpan_bestAffinity9mer_peptideWT", + ) + ]) if self.organism == ORGANISM_HOMO_SAPIENS: annotations.extend([AnnotationFactory.build_annotation(value=self.phbr_i, name="PHBR_I")]) annotations.extend([ # generator rate - AnnotationFactory.build_annotation(value=self.generator_rate, name="Generator_rate_MHCI"), - AnnotationFactory.build_annotation(value=self.generator_rate_cdn, name="Generator_rate_CDN_MHCI"), - AnnotationFactory.build_annotation(value=self.generator_rate_adn, name="Generator_rate_ADN_MHCI") + AnnotationFactory.build_annotation(value=self.generator_rate, name="GeneratorRate_MHCI"), + AnnotationFactory.build_annotation(value=self.generator_rate_cdn, name="GeneratorRate_CDN_MHCI"), + AnnotationFactory.build_annotation(value=self.generator_rate_adn, name="GeneratorRate_ADN_MHCI") ]) - annotations.extend(self._get_positions_and_mutation_in_anchor(mutation)) + annotations.extend(self._get_positions_and_mutation_in_anchor()) return annotations - def _get_positions_and_mutation_in_anchor(self, mutation): + def _get_positions_and_mutation_in_anchor(self): """ returns if mutation is in anchor position for best affinity epitope over all lengths and best 9mer affinity """ position_9mer = None mutation_in_anchor_9mer = None - if self.best_ninemer_epitope_by_affinity.peptide and mutation.wild_type_xmer: - position_9mer = EpitopeHelper.position_of_mutation_epitope( - wild_type=self.best_ninemer_wt_epitope_by_affinity.peptide, - mutation=self.best_ninemer_epitope_by_affinity.peptide, - ) + if self.best_ninemer_epitope_by_affinity.mutated_peptide and self.best_ninemer_epitope_by_affinity.wild_type_peptide: + position_9mer = EpitopeHelper.position_of_mutation_epitope(epitope=self.best_ninemer_epitope_by_affinity) mutation_in_anchor_9mer = EpitopeHelper.position_in_anchor_position( position_mhci=position_9mer, - peptide_length=len(self.best_ninemer_epitope_by_affinity.peptide), + peptide_length=len(self.best_ninemer_epitope_by_affinity.mutated_peptide), ) annotations = [ AnnotationFactory.build_annotation( - value=position_9mer, name="Best_affinity_MHCI_9mer_position_mutation" + value=position_9mer, name="NetMHCpan_bestAffinity9mer_positionMutation" ), AnnotationFactory.build_annotation( value=mutation_in_anchor_9mer, - name="Best_affinity_MHCI_9mer_anchor_mutated", + name="NetMHCpan_bestAffinity9mer_anchorMutated", ), ] return annotations + + @staticmethod + def get_annotations_epitope_mhci(epitope: PredictedEpitope) -> List[Annotation]: + position = EpitopeHelper.position_of_mutation_epitope(epitope=epitope) + mutation_in_anchor = EpitopeHelper.position_in_anchor_position( + position_mhci=position, peptide_length=len(epitope.mutated_peptide)) + return [ + AnnotationFactory.build_annotation( + value=position, + name='position_mutation'), + AnnotationFactory.build_annotation( + value=mutation_in_anchor, + name='anchor_mutated') + ] diff --git a/neofox/MHC_predictors/netmhcpan/netmhcIIpan_prediction.py b/neofox/MHC_predictors/netmhcpan/netmhcIIpan_prediction.py index 06d6ff44..a42933e9 100755 --- a/neofox/MHC_predictors/netmhcpan/netmhcIIpan_prediction.py +++ b/neofox/MHC_predictors/netmhcpan/netmhcIIpan_prediction.py @@ -20,17 +20,26 @@ import tempfile from typing import List - -from neofox.exceptions import NeofoxConfigurationException +import os from neofox.helpers import intermediate_files -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import ( - AbstractNetMhcPanPredictor, - PredictedEpitope, -) -from neofox.model.neoantigen import Mhc2, MhcAllele, Mhc2Name, Mhc2Isoform, Mhc2GeneName +from neofox.helpers.blastp_runner import BlastpRunner +from neofox.helpers.epitope_helper import EpitopeHelper +from neofox.helpers.runner import Runner +from neofox.model.mhc_parser import MhcParser +from neofox.model.neoantigen import Mhc2, Mhc2Name, Mhc2Isoform, PredictedEpitope, Neoantigen +from neofox.references.references import DependenciesConfiguration + + +class NetMhcIIPanPredictor: + def __init__( + self, runner: Runner, configuration: DependenciesConfiguration, + blastp_runner: BlastpRunner, mhc_parser: MhcParser): -class NetMhcIIPanPredictor(AbstractNetMhcPanPredictor): + self.runner = runner + self.configuration = configuration + self.mhc_parser = mhc_parser + self.blastp_runner = blastp_runner @staticmethod def generate_mhc2_alelle_combinations(mhc_alleles: List[Mhc2]) -> List[Mhc2Isoform]: @@ -60,15 +69,12 @@ def generate_mhc2_alelle_combinations(mhc_alleles: List[Mhc2]) -> List[Mhc2Isofo def represent_mhc2_isoforms(self, isoforms: List[Mhc2Isoform]) -> List[str]: return [self.mhc_parser.get_netmhc2pan_representation(i) for i in isoforms] - def mhc2_prediction( - self, mhc_alleles: List[str], sequence - ) -> List[PredictedEpitope]: + def mhc2_prediction(self, mhc_alleles: List[str], sequence) -> List[PredictedEpitope]: """ Performs netmhcIIpan prediction for desired hla alleles and writes result to temporary file.""" # TODO: integrate generate_mhc_ii_alelle_combinations() here to easu utilisation tmp_fasta = intermediate_files.create_temp_fasta( [sequence], prefix="tmp_singleseq_" ) - tmp_folder = tempfile.mkdtemp(prefix="tmp_netmhcIIpan_") lines, _ = self.runner.run_command( [ self.configuration.net_mhc2_pan, @@ -76,21 +82,17 @@ def mhc2_prediction( "-a", ",".join(mhc_alleles), "-f", - tmp_fasta, - "-tdir", - tmp_folder, - "-dirty", + tmp_fasta ] ) + os.remove(tmp_fasta) return self._parse_netmhcpan_output(lines) def mhc2_prediction_peptide( - self, mhc2_isoform: Mhc2Isoform, sequence ) -> List[PredictedEpitope]: + self, mhc2_isoform: Mhc2Isoform, sequence ) -> PredictedEpitope: """ Performs netmhcIIpan prediction for desired hla allele and writes result to temporary file.""" - tmp_peptide = intermediate_files.create_temp_peptide( - [sequence], prefix="tmp_singleseq_" - ) - tmp_folder = tempfile.mkdtemp(prefix="tmp_netmhcIIpan_") + result = None + tmp_peptide = intermediate_files.create_temp_peptide([sequence], prefix="tmp_singleseq_") lines, _ = self.runner.run_command( cmd=[ self.configuration.net_mhc2_pan, @@ -101,13 +103,14 @@ def mhc2_prediction_peptide( "1", "-f", tmp_peptide, - "-tdir", - tmp_folder, - "-dirty", ], print_log=False ) - return self._parse_netmhcpan_output(lines) + predicted_epitopes = self._parse_netmhcpan_output(lines) + if predicted_epitopes: + result = predicted_epitopes[0] + os.remove(tmp_peptide) + return result def _parse_netmhcpan_output(self, lines: str) -> List[PredictedEpitope]: results = [] @@ -120,11 +123,40 @@ def _parse_netmhcpan_output(self, lines: str) -> List[PredictedEpitope]: line = line[0:-1] if len(line) > 12 else line results.append( PredictedEpitope( - pos=int(line[0]), - hla=self.mhc_parser.parse_mhc2_isoform(line[1]), - peptide=line[2], - affinity_score=float(line[11]), - rank=float(line[8]), + position=int(line[0]), + isoform_mhc_i_i=self.mhc_parser.parse_mhc2_isoform(line[1]), + mutated_peptide=line[2], + affinity_mutated=float(line[11]), + rank_mutated=float(line[8]), ) ) return results + + def set_wt_netmhcpan_scores(self, predictions) -> List[PredictedEpitope]: + for p in predictions: + if p.wild_type_peptide is not None: + wt_prediction = self.mhc2_prediction_peptide( + mhc2_isoform=p.isoform_mhc_i_i, + sequence=p.wild_type_peptide) + if wt_prediction is not None: + # NOTE: netmhcpan in peptide mode should return only one epitope + p.rank_wild_type = wt_prediction.rank_mutated + p.affinity_wild_type = wt_prediction.affinity_mutated + return predictions + + def get_wt_predictions(self, neoantigen: Neoantigen, patient_mhc2_isoforms): + predictions = self.mhc2_prediction(patient_mhc2_isoforms, neoantigen.wild_type_xmer) + predictions = EpitopeHelper.filter_peptides_covering_snv(neoantigen.position, predictions) + return predictions + + def get_predictions(self, neoantigen: Neoantigen, patient_mhc2_isoforms, uniprot): + + predictions = self.mhc2_prediction(patient_mhc2_isoforms, neoantigen.mutated_xmer) + if neoantigen.wild_type_xmer: + # make sure that predicted epitopes cover mutation in case of SNVs + predictions = EpitopeHelper.filter_peptides_covering_snv( + position_of_mutation=neoantigen.position, predictions=predictions + ) + # make sure that predicted neoepitopes are not part of the WT proteome + filtered_predictions = EpitopeHelper.remove_peptides_in_proteome(predictions, uniprot) + return filtered_predictions diff --git a/neofox/MHC_predictors/netmhcpan/netmhcpan_prediction.py b/neofox/MHC_predictors/netmhcpan/netmhcpan_prediction.py index 79f9ac71..df20503c 100755 --- a/neofox/MHC_predictors/netmhcpan/netmhcpan_prediction.py +++ b/neofox/MHC_predictors/netmhcpan/netmhcpan_prediction.py @@ -19,56 +19,78 @@ # along with this program. If not, see .# from typing import List, Set from logzero import logger - +import os from neofox.exceptions import NeofoxCommandException from neofox.helpers import intermediate_files -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import ( - AbstractNetMhcPanPredictor, - PredictedEpitope, -) -from neofox.model.neoantigen import Mhc1 +from neofox.helpers.blastp_runner import BlastpRunner +from neofox.helpers.epitope_helper import EpitopeHelper +from neofox.helpers.runner import Runner +from neofox.model.mhc_parser import MhcParser +from neofox.model.neoantigen import Mhc1, PredictedEpitope, Zygosity, Neoantigen +from neofox.references.references import DependenciesConfiguration + + +PEPTIDE_LENGTHS = ["8", "9", "10", "11", "12", "13", "14"] -class NetMhcPanPredictor(AbstractNetMhcPanPredictor): +class NetMhcPanPredictor: - def mhc_prediction( - self, mhc_alleles: List[Mhc1], set_available_mhc: Set, sequence + def __init__( + self, runner: Runner, configuration: DependenciesConfiguration, + blastp_runner: BlastpRunner, mhc_parser: MhcParser): - ) -> List[PredictedEpitope]: + self.runner = runner + self.configuration = configuration + self.mhc_parser = mhc_parser + self.blastp_runner = blastp_runner + + def mhc_prediction(self, available_alleles, sequence) -> List[PredictedEpitope]: """Performs netmhcpan4 prediction for desired hla allele and writes result to temporary file.""" - input_fasta = intermediate_files.create_temp_fasta( - sequences=[sequence], prefix="tmp_singleseq_" - ) - available_alleles = self._get_only_available_alleles(mhc_alleles, set_available_mhc) + + input_file = intermediate_files.create_temp_fasta(sequences=[sequence], prefix="tmp_singleseq_") + if available_alleles is None or available_alleles == "": - raise NeofoxCommandException("None of the provided MHC I alleles are supported: {}".format(mhc_alleles)) + raise NeofoxCommandException("None of the provided MHC I alleles are supported: {}".format(available_alleles)) cmd = [ self.configuration.net_mhc_pan, "-a", available_alleles, "-f", - input_fasta, + input_file, "-BA", + "-l {}".format(",".join(PEPTIDE_LENGTHS)) ] + lines, _ = self.runner.run_command(cmd) + os.remove(input_file) return self._parse_netmhcpan_output(lines) - def mhc_prediction_peptide(self, mhc_alleles: List[Mhc1], set_available_mhc: Set, sequence - ) -> List[PredictedEpitope]: - """Performs netmhcpan4 prediction for desired hla allele and writes result to temporary file.""" - input_peptide = intermediate_files.create_temp_peptide( - sequences=[sequence], prefix="tmp_singleseq_" - ) + def mhc_prediction_peptide(self, alleles, sequence) -> PredictedEpitope: + """ + Performs netmhcpan4 prediction for desired hla allele and writes result to temporary file. + peptide mode cannot use FASTA format and does not provide peptide lengths + """ + + result = None + input_file = intermediate_files.create_temp_peptide(sequences=[sequence], prefix="tmp_singleseq_") + + if alleles is None or alleles == "": + raise NeofoxCommandException("None of the provided MHC I alleles are supported: {}".format(alleles)) cmd = [ self.configuration.net_mhc_pan, "-a", - self._get_only_available_alleles(mhc_alleles, set_available_mhc), + alleles, "-p", - input_peptide, - "-BA", + input_file, + "-BA" ] - lines, _ = self.runner.run_command(cmd, print_log=False) - return self._parse_netmhcpan_output(lines) + + lines, _ = self.runner.run_command(cmd) + predicted_epitopes = self._parse_netmhcpan_output(lines) + if predicted_epitopes: + result = predicted_epitopes[0] + os.remove(input_file) + return result def _parse_netmhcpan_output(self, lines: str) -> List[PredictedEpitope]: results = [] @@ -85,27 +107,24 @@ def _parse_netmhcpan_output(self, lines: str) -> List[PredictedEpitope]: line = line[0:-2] if len(line) > 16 else line results.append( PredictedEpitope( - pos=int(line[0]), - hla=self.mhc_parser.parse_mhc_allele(line[1]), - peptide=line[2], - affinity_score=float(line[15]), - rank=float(line[12]), + position=int(line[0]), + allele_mhc_i=self.mhc_parser.parse_mhc_allele(line[1]), + mutated_peptide=line[2], + affinity_mutated=float(line[15]), + rank_mutated=float(line[12]), ) ) return results - def get_alleles_netmhcpan_representation(self, mhc_isoforms: List[Mhc1]) -> List[str]: + def get_alleles_netmhcpan_representation(self, mhc: List[Mhc1]) -> List[str]: return list( map( - self.mhc_parser.get_netmhcpan_representation, [a for m in mhc_isoforms for a in m.alleles], + self.mhc_parser.get_netmhcpan_representation, [a for m in mhc for a in m.alleles], ) ) - def _get_only_available_alleles(self, mhc_alleles: List[Mhc1], set_available_mhc: Set[str] - ) -> str: - hla_alleles_names = self.get_alleles_netmhcpan_representation( - mhc_alleles - ) + def get_only_available_alleles(self, mhc_alleles: List[Mhc1], set_available_mhc: Set[str]) -> str: + hla_alleles_names = self.get_alleles_netmhcpan_representation(mhc_alleles) patients_available_alleles = ",".join( list(filter(lambda x: x in set_available_mhc, hla_alleles_names)) ) @@ -118,3 +137,35 @@ def _get_only_available_alleles(self, mhc_alleles: List[Mhc1], set_available_mhc "include it".format(",".join(patients_not_available_alleles)) ) return patients_available_alleles + + def set_wt_netmhcpan_scores(self, predictions) -> List[PredictedEpitope]: + for p in predictions: + if p.wild_type_peptide is not None: + wt_prediction = self.mhc_prediction_peptide( + alleles=self.mhc_parser.get_netmhcpan_representation(p.allele_mhc_i), sequence=p.wild_type_peptide) + if wt_prediction is not None: + # NOTE: netmhcpan in peptide mode should return only one epitope + p.rank_wild_type = wt_prediction.rank_mutated + p.affinity_wild_type = wt_prediction.affinity_mutated + return predictions + + def get_predictions(self, available_alleles, neoantigen: Neoantigen, uniprot) -> List[PredictedEpitope]: + predictions = self.mhc_prediction(available_alleles, neoantigen.mutated_xmer) + if neoantigen.wild_type_xmer: + # make sure that predicted epitopes cover mutation in case of SNVs + predictions = EpitopeHelper.filter_peptides_covering_snv( + position_of_mutation=neoantigen.position, predictions=predictions + ) + # make sure that predicted neoepitopes are not part of the WT proteome + filtered_predictions = EpitopeHelper.remove_peptides_in_proteome( + predictions=predictions, uniprot=uniprot + ) + return filtered_predictions + + def get_wt_predictions(self, available_alleles, neoantigen) -> List[PredictedEpitope]: + predictions = self.mhc_prediction(available_alleles, neoantigen.wild_type_xmer) + # make sure that predicted epitopes cover mutation in case of SNVs + predictions = EpitopeHelper.filter_peptides_covering_snv( + position_of_mutation=neoantigen.position, predictions=predictions + ) + return predictions diff --git a/neofox/published_features/prime.py b/neofox/MHC_predictors/prime.py similarity index 61% rename from neofox/published_features/prime.py rename to neofox/MHC_predictors/prime.py index b6c0854a..4931925f 100755 --- a/neofox/published_features/prime.py +++ b/neofox/MHC_predictors/prime.py @@ -21,12 +21,11 @@ from pandas.errors import EmptyDataError -from neofox.exceptions import NeofoxCommandException from neofox.helpers.epitope_helper import EpitopeHelper from neofox.helpers.runner import Runner from neofox.model.mhc_parser import MhcParser -from neofox.model.neoantigen import Annotation, Mhc1, MhcAllele, Mutation +from neofox.model.neoantigen import Annotation, Mhc1, MhcAllele, PredictedEpitope, Neoantigen from neofox.model.factories import AnnotationFactory from neofox.helpers import intermediate_files import pandas as pd @@ -42,12 +41,22 @@ class Prime: + + ANNOTATION_PREFIX = 'PRIME' + ANNOTATION_PREFIX_WT = 'PRIME_WT' + def __init__(self, runner: Runner, configuration: DependenciesConfiguration, mhc_parser: MhcParser): self.runner = runner self.configuration = configuration self.available_alleles = self._load_available_alleles() self.mhc_parser = mhc_parser + self.best_peptide = None + self.best_rank = None + self.best_allele = None + self.best_score = None + self.results = None + def _load_available_alleles(self): """ loads file with available HLA II alllels for Prime prediction, returns set @@ -68,9 +77,26 @@ def _get_mixmhc_allele_representation(self, mhc_alleles: List[MhcAllele]): ) ) - def _prime( - self, mhc_alleles: List[str], potential_ligand_sequences - ) -> pd.DataFrame: + def _parse_prime_output(self, filename: str) -> List[PredictedEpitope]: + + parsed_results = [] + try: + results = pd.read_csv(filename, sep="\t", comment="#") + except EmptyDataError: + logger.error("Results from PRIME are empty, something went wrong") + results = pd.DataFrame() + + for _, row in results.iterrows(): + parsed_results.append( + PredictedEpitope( + allele_mhc_i=self.mhc_parser.parse_mhc_allele(row[ALLELE]), + mutated_peptide=row[PEPTIDE], + affinity_mutated=float(row[SCORE]), + rank_mutated=float(row[RANK]), + )) + return parsed_results + + def _prime(self, mhc_alleles: List[str], potential_ligand_sequences) -> List[PredictedEpitope]: """ Runs PRIME for desired hla allele and writes result to temporary file. """ @@ -92,62 +118,53 @@ def _prime( self.runner.run_command( cmd=command ) - try: - results = pd.read_csv(outtmp, sep="\t", comment="#") - except EmptyDataError: - message = "Results from PRIME are empty, something went wrong [{}]. MHC I alleles {}, ligands {}".format( - " ".join(command), ",".join(mhc_alleles), potential_ligand_sequences - ) - logger.error(message) - results = pd.DataFrame() + results = self._parse_prime_output(filename=outtmp) os.remove(outtmp) + os.remove(tmpfasta) return results - def run(self, mutation: Mutation, mhc: List[Mhc1], uniprot): + def run(self, neoantigen: Neoantigen, mhc: List[Mhc1], uniprot): """Wrapper PRIME prediction, extraction of best epitope per mutations""" - best_peptide = None - best_rank = None - best_allele = None - best_score = None + # TODO: get rid of this + self.results = None - if not EpitopeHelper.contains_rare_amino_acid(mutation.mutated_xmer): + if not EpitopeHelper.contains_rare_amino_acid(neoantigen.mutated_xmer): potential_ligand_sequences = EpitopeHelper.generate_nmers( - mutation=mutation, lengths=[8, 9, 10, 11, 12, 13, 14], uniprot=uniprot + neoantigen=neoantigen, lengths=[8, 9, 10, 11, 12, 13, 14], uniprot=uniprot ) if len(potential_ligand_sequences) > 0: mhc1_alleles = self._get_mixmhc_allele_representation([a for m in mhc for a in m.alleles]) if len(mhc1_alleles) > 0: - results = self._prime(mhc1_alleles, potential_ligand_sequences) - try: - # get best result by maximum score - best_result = results[results[SCORE] == results[SCORE].max()] - best_peptide = best_result[PEPTIDE].iat[0] - best_rank = best_result[RANK].iat[0] - # normalize the HLA allele name - best_allele = self.mhc_parser.parse_mhc_allele(best_result[ALLELE].iat[0]).name - best_score = best_result[SCORE].iat[0] - except (IndexError, KeyError): - logger.info("PRIME returned no best result") + self.results = self._prime(mhc1_alleles, potential_ligand_sequences) else: logger.warning("None of the MHC I alleles are supported by PRIME") - return best_peptide, best_rank, best_allele, best_score + def run_peptide(self, peptide: str, allele: MhcAllele) -> PredictedEpitope: + result = None + if not EpitopeHelper.contains_rare_amino_acid(peptide=peptide): + mhc1_alleles = self._get_mixmhc_allele_representation([allele]) + if len(mhc1_alleles) > 0: + results = self._prime(mhc1_alleles, [peptide]) + if results: + result = results[0] + else: + logger.warning("None of the MHC I alleles are supported by PRIME") + return result - def get_annotations(self, mutation: Mutation, mhc: List[Mhc1], uniprot) -> List[Annotation]: - best_peptide, best_rank, best_allele, best_score = self.run( - mhc=mhc, mutation=mutation, uniprot=uniprot - ) + def get_annotations(self) -> List[Annotation]: + + best_result = EpitopeHelper.select_best_by_affinity(predictions=self.results, maximum=True) return [ AnnotationFactory.build_annotation( - value=best_peptide, name="PRIME_best_peptide" + value=best_result.mutated_peptide, name="PRIME_best_peptide" ), AnnotationFactory.build_annotation( - value=best_score, name="PRIME_best_score" + value=best_result.affinity_mutated, name="PRIME_best_score" ), AnnotationFactory.build_annotation( - value=best_rank, name="PRIME_best_rank" + value=best_result.rank_mutated, name="PRIME_best_rank" ), AnnotationFactory.build_annotation( - value=best_allele, name="PRIME_best_allele" + value=best_result.allele_mhc_i.name, name="PRIME_best_allele" ), ] diff --git a/neofox/__init__.py b/neofox/__init__.py index c6321016..4898d33d 100755 --- a/neofox/__init__.py +++ b/neofox/__init__.py @@ -16,7 +16,9 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see .# -VERSION = "0.6.4" + + +VERSION = "1.0.0" REFERENCE_FOLDER_ENV = "NEOFOX_REFERENCE_FOLDER" @@ -36,4 +38,5 @@ NOT_AVAILABLE_VALUE = "NA" -AFFINITY_THRESHOLD_DEFAULT = 500000 +RANK_MHCI_THRESHOLD_DEFAULT = 2.0 +RANK_MHCII_THRESHOLD_DEFAULT = 5.0 diff --git a/neofox/annotation_resources/uniprot/uniprot.py b/neofox/annotation_resources/uniprot/uniprot.py index 4e313822..ec81978b 100755 --- a/neofox/annotation_resources/uniprot/uniprot.py +++ b/neofox/annotation_resources/uniprot/uniprot.py @@ -19,7 +19,7 @@ import pickle from typing import List from logzero import logger -from neofox.model.neoantigen import Annotation +from neofox.model.neoantigen import Annotation, PredictedEpitope from neofox.model.factories import AnnotationFactory @@ -50,3 +50,10 @@ def get_annotations(self, sequence_not_in_uniprot: bool) -> List[Annotation]: name="mutation_not_found_in_proteome", value=sequence_not_in_uniprot ) ] + + def get_annotations_epitope(self, epitope: PredictedEpitope) -> List[Annotation]: + return [ + AnnotationFactory.build_annotation( + value=self.is_sequence_not_in_uniprot(epitope.mutated_peptide), + name='mutation_not_found_in_proteome') + ] diff --git a/neofox/annotator.py b/neofox/annotator.py index d68659ac..e69de29b 100755 --- a/neofox/annotator.py +++ b/neofox/annotator.py @@ -1,571 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH. -# -# This file is part of Neofox -# (see https://github.com/tron-bioinformatics/neofox). -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see .# -from logzero import logger -from datetime import datetime -from distributed import get_client, secede, rejoin -import neofox -import time -from neofox.annotation_resources.uniprot.uniprot import Uniprot -from neofox.helpers.blastp_runner import BlastpRunner -from neofox.helpers.epitope_helper import EpitopeHelper -from neofox.helpers.runner import Runner -from neofox.MHC_predictors.MixMHCpred.mixmhc2pred import MixMhc2Pred -from neofox.MHC_predictors.MixMHCpred.mixmhcpred import MixMHCpred -from neofox.MHC_predictors.netmhcpan.combine_netmhcIIpan_pred_multiple_binders import ( - BestAndMultipleBinderMhcII, -) -from neofox.MHC_predictors.netmhcpan.combine_netmhcpan_pred_multiple_binders import ( - BestAndMultipleBinder, -) -from neofox.model.mhc_parser import MhcParser -from neofox.published_features.differential_binding.amplitude import Amplitude -from neofox.published_features.differential_binding.differential_binding import ( - DifferentialBinding, -) -from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import ( - TcellPrediction, -) -from neofox.published_features.dissimilarity_garnish.dissimilaritycalculator import ( - DissimilarityCalculator, -) -from neofox.published_features.neoag.neoag_gbm_model import NeoagCalculator -from neofox.published_features.neoantigen_fitness.neoantigen_fitness import ( - NeoantigenFitnessCalculator, -) -from neofox.published_features.self_similarity.self_similarity import ( - SelfSimilarityCalculator, -) -from neofox.published_features.vaxrank import vaxrank -from neofox.published_features.iedb_immunogenicity.iedb import IEDBimmunogenicity -from neofox.published_features.expression import Expression -from neofox.published_features.priority_score import PriorityScore -from neofox.published_features.prime import Prime -from neofox.published_features.hex.hex import Hex -from neofox.model.neoantigen import Patient, Neoantigen, NeoantigenAnnotations -from neofox.references.references import ( - ReferenceFolder, - DependenciesConfiguration, - AvailableAlleles, ORGANISM_HOMO_SAPIENS -) - - -class NeoantigenAnnotator: - def __init__( - self, - references: ReferenceFolder, - configuration: DependenciesConfiguration, - tcell_predictor: TcellPrediction, - self_similarity: SelfSimilarityCalculator, - affinity_threshold=neofox.AFFINITY_THRESHOLD_DEFAULT - ): - """class to annotate neoantigens""" - self.runner = Runner() - self.configuration = configuration - self.proteome_db = references.proteome_db - self.available_alleles = references.get_available_alleles() - self.tcell_predictor = tcell_predictor - self.self_similarity = self_similarity - self.organism = references.organism - - # NOTE: this one loads a big file, but it is faster loading it multiple times than passing it around - self.uniprot = Uniprot(references.uniprot_pickle) - - # initialise proteome and IEDB BLASTP runners - self.proteome_blastp_runner = BlastpRunner( - runner=self.runner, configuration=configuration, - database=references.get_proteome_database()) - self.iedb_blastp_runner = BlastpRunner( - runner=self.runner, configuration=configuration, - database=references.get_iedb_database()) - - # NOTE: these resources do not read any file thus can be initialised fast - self.dissimilarity_calculator = DissimilarityCalculator( - proteome_blastp_runner=self.proteome_blastp_runner, affinity_threshold=affinity_threshold) - self.neoantigen_fitness_calculator = NeoantigenFitnessCalculator(iedb_blastp_runner=self.iedb_blastp_runner) - self.neoag_calculator = NeoagCalculator( - runner=self.runner, configuration=configuration, affinity_threshold=affinity_threshold - ) - self.differential_binding = DifferentialBinding(affinity_threshold=affinity_threshold) - self.priority_score_calculator = PriorityScore() - self.iedb_immunogenicity = IEDBimmunogenicity(affinity_threshold=affinity_threshold) - self.amplitude = Amplitude() - self.hex = Hex(runner=self.runner, configuration=configuration, references=references) - self.mhc_database = references.get_mhc_database() - self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database) - - self.resources_versions = references.get_resources_versions() - - def get_annotation(self, neoantigen: Neoantigen, patient: Patient) -> Neoantigen: - """Calculate new epitope features and add to dictonary that stores all properties""" - neoantigen.neofox_annotations = NeoantigenAnnotations( - annotator="NeoFox", - annotator_version=neofox.VERSION, - timestamp="{:%Y%m%d%H%M%S%f}".format(datetime.now()), - resources=self.resources_versions, - annotations=[] - ) - - # Runs netmhcpan, netmhc2pan, mixmhcpred and mixmhc2prd in parallel - ( - mixmhc2pred_annotations, - mixmhcpred_annotations, - netmhc2pan, - netmhcpan, - prime_annotations - ) = self._compute_long_running_tasks(neoantigen, patient) - - # HLA I predictions: NetMHCpan - if netmhcpan: - neoantigen.neofox_annotations.annotations.extend(netmhcpan.get_annotations(mutation=neoantigen.mutation)) - - # HLA II predictions: NetMHCIIpan - if netmhc2pan: - neoantigen.neofox_annotations.annotations.extend(netmhc2pan.get_annotations()) - - # MixMHCpred - if mixmhcpred_annotations is not None: - neoantigen.neofox_annotations.annotations.extend(mixmhcpred_annotations) - - # PRIME - if prime_annotations is not None: - neoantigen.neofox_annotations.annotations.extend(prime_annotations) - - # MixMHC2pred - if mixmhc2pred_annotations is not None: - neoantigen.neofox_annotations.annotations.extend(mixmhc2pred_annotations) - - # decides which VAF to use - vaf_rna = neoantigen.rna_variant_allele_frequency - if not patient.is_rna_available and neoantigen.dna_variant_allele_frequency is not None: - logger.warning( - "Using the DNA VAF to estimate the RNA VAF as the patient does not have RNA available" - ) - # TODO: overwrite value in the neoantigen object - vaf_rna = neoantigen.dna_variant_allele_frequency - - # MHC binding independent features - start = time.time() - expression_calculator = Expression( - transcript_expression=neoantigen.rna_expression, vaf_rna=vaf_rna - ) - neoantigen.neofox_annotations.annotations.extend(expression_calculator.get_annotations()) - end = time.time() - logger.info( - "Expression annotation elapsed time {} seconds".format( - round(end - start, 3) - ) - ) - - start = time.time() - sequence_not_in_uniprot = self.uniprot.is_sequence_not_in_uniprot( - neoantigen.mutation.mutated_xmer - ) - neoantigen.neofox_annotations.annotations.extend( - self.uniprot.get_annotations(sequence_not_in_uniprot) - ) - end = time.time() - logger.info( - "Uniprot annotation elapsed time {} seconds".format(round(end - start, 3)) - ) - - # Amplitude - start = time.time() - self.amplitude.run(netmhcpan=netmhcpan, netmhc2pan=netmhc2pan) - neoantigen.neofox_annotations.annotations.extend(self.amplitude.get_annotations()) - neoantigen.neofox_annotations.annotations.extend(self.amplitude.get_annotations_mhc2()) - end = time.time() - logger.info( - "Amplitude annotation elapsed time {} seconds".format(round(end - start, 3)) - ) - - # Neoantigen fitness - start = time.time() - neoantigen.neofox_annotations.annotations.extend( - self.neoantigen_fitness_calculator.get_annotations( - mutated_peptide_mhci=netmhcpan.best_ninemer_epitope_by_affinity if netmhcpan else None, - mutation_in_anchor=netmhcpan.mutation_in_anchor_9mer if netmhcpan else None, - amplitude=self.amplitude.amplitude_mhci_affinity_9mer, - mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None - ) - ) - end = time.time() - logger.info( - "Neoantigen annotation elapsed time {} seconds".format( - round(end - start, 3) - ) - ) - - # Differential Binding - start = time.time() - if netmhcpan: - neoantigen.neofox_annotations.annotations.extend( - self.differential_binding.get_annotations_dai( - mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity, - wt_peptide_mhcii=netmhcpan.best_wt_epitope_by_affinity - ) - ) - neoantigen.neofox_annotations.annotations.extend( - self.differential_binding.get_annotations(mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity, - amplitude=self.amplitude) - ) - if netmhc2pan: - neoantigen.neofox_annotations.annotations.extend( - self.differential_binding.get_annotations_mhc2(mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_rank, - amplitude=self.amplitude) - ) - end = time.time() - logger.info( - "Differential binding annotation elapsed time {} seconds".format( - round(end - start, 3) - ) - ) - - # T cell predictor - if netmhcpan: - start = time.time() - neoantigen.neofox_annotations.annotations.extend( - self.tcell_predictor.get_annotations( - neoantigen=neoantigen, netmhcpan=netmhcpan - ) - ) - end = time.time() - logger.info( - "T-cell predictor annotation elapsed time {} seconds".format( - round(end - start, 3) - ) - ) - - # self-similarity - start = time.time() - neoantigen.neofox_annotations.annotations.extend( - self.self_similarity.get_annnotations( - mutated_peptide_mhci=netmhcpan.best_epitope_by_rank if netmhcpan else None, - wt_peptide_mhci=netmhcpan.best_wt_epitope_by_rank if netmhcpan else None, - mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None, - wt_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity_wt if netmhc2pan else None, - ) - ) - end = time.time() - logger.info( - "Self similarity annotation elapsed time {} seconds".format( - round(end - start, 3) - ) - ) - - # number of mismatches and priority score - if netmhcpan and netmhcpan: - start = time.time() - neoantigen.neofox_annotations.annotations.extend( - self.priority_score_calculator.get_annotations( - netmhcpan=netmhcpan, - vaf_transcr=vaf_rna, - vaf_tum=neoantigen.dna_variant_allele_frequency, - expr=neoantigen.rna_expression, - mut_not_in_prot=sequence_not_in_uniprot, - ) - ) - end = time.time() - logger.info( - "Priotity score annotation elapsed time {} seconds".format( - round(end - start, 3) - ) - ) - - # neoag immunogenicity model - if netmhcpan and netmhcpan.best_epitope_by_affinity: - start = time.time() - peptide_variant_position = EpitopeHelper.position_of_mutation_epitope( - wild_type=netmhcpan.best_wt_epitope_by_affinity.peptide, - mutation=netmhcpan.best_epitope_by_affinity.peptide, - ) - neoantigen.neofox_annotations.annotations.append( - self.neoag_calculator.get_annotation( - sample_id=patient.identifier, - mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity, - wt_peptide_mhci=netmhcpan.best_wt_epitope_by_affinity, - peptide_variant_position=peptide_variant_position, - mutation=neoantigen.mutation) - ) - end = time.time() - logger.info( - "Neoag annotation elapsed time {} seconds".format(round(end - start, 3)) - ) - - # IEDB immunogenicity - if self.organism == ORGANISM_HOMO_SAPIENS: - start = time.time() - neoantigen.neofox_annotations.annotations.extend( - self.iedb_immunogenicity.get_annotations( - mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None, - mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None - ) - ) - end = time.time() - logger.info( - "IEDB annotation elapsed time {} seconds".format(round(end - start, 3)) - ) - - # dissimilarity to self-proteome - start = time.time() - neoantigen.neofox_annotations.annotations.extend( - self.dissimilarity_calculator.get_annotations( - mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None, - mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None) - ) - end = time.time() - logger.info( - "Dissimilarity annotation elapsed time {} seconds".format( - round(end - start, 3) - ) - ) - - # vaxrank - if netmhcpan and netmhcpan.epitope_affinities: - start = time.time() - vaxrankscore = vaxrank.VaxRank() - vaxrankscore.run( - mutation_scores=netmhcpan.epitope_affinities, - expression_score=expression_calculator.expression, - ) - neoantigen.neofox_annotations.annotations.extend(vaxrankscore.get_annotations()) - end = time.time() - logger.info( - "Vaxrank annotation elapsed time {} seconds".format(round(end - start, 3)) - ) - - # hex - # TODO: hex is failing for mouse with the current IEDB fasta with only 2 entries - if self.organism == ORGANISM_HOMO_SAPIENS: - start = time.time() - neoantigen.neofox_annotations.annotations.extend( - self.hex.get_annotation( - mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None, - mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None) - ) - end = time.time() - logger.info( - "Hex annotation elapsed time {} seconds".format(round(end - start, 3)) - ) - - return neoantigen - - def _compute_long_running_tasks(self, neoantigen, patient, sequential=True): - - has_mhc1 = patient.mhc1 is not None and len(patient.mhc1) > 0 - has_mhc2 = patient.mhc2 is not None and len(patient.mhc2) > 0 - - netmhcpan = None - netmhc2pan = None - mixmhcpred_annotations = None - mixmhc2pred_annotations = None - prime_annotations = None - - if sequential: - if has_mhc1: - netmhcpan = self.run_netmhcpan( - self.runner, - self.configuration, - self.available_alleles, - self.mhc_parser, - neoantigen, - patient) - if has_mhc2: - netmhc2pan = self.run_netmhc2pan( - self.runner, - self.configuration, - self.available_alleles, - self.mhc_parser, - neoantigen, - patient - ) - # avoids running MixMHCpred and PRIME for non human organisms - if self.organism == ORGANISM_HOMO_SAPIENS: - if self.configuration.mix_mhc2_pred is not None and has_mhc2: - mixmhc2pred_annotations = self.run_mixmhc2pred( - self.runner, - self.configuration, - self.mhc_parser, - neoantigen, - patient, - ) - if self.configuration.mix_mhc_pred is not None and has_mhc1: - mixmhcpred_annotations = self.run_mixmhcpred( - self.runner, - self.configuration, - self.mhc_parser, - neoantigen, - patient, - ) - if self.configuration.mix_mhc_pred is not None and has_mhc1: - prime_annotations = self.run_prime( - self.runner, - self.configuration, - self.mhc_parser, - neoantigen, - patient, - ) - else: - dask_client = get_client() - - netmhcpan_future = None - if has_mhc1: - netmhcpan_future = dask_client.submit( - self.run_netmhcpan, - self.runner, - self.references, - self.configuration, - self.available_alleles, - self.mhc_parser, - neoantigen, - patient, - ) - netmhc2pan_future = None - if has_mhc2: - netmhc2pan_future = dask_client.submit( - self.run_netmhc2pan, - self.runner, - self.configuration, - self.available_alleles, - self.mhc_parser, - neoantigen, - patient, - ) - # avoids running MixMHCpred and PRIME for non human organisms - mixmhc2pred_future = None - mixmhcpred_future = None - prime_future = None - if self.organism == ORGANISM_HOMO_SAPIENS: - if self.configuration.mix_mhc2_pred is not None and has_mhc2: - mixmhc2pred_future = dask_client.submit( - self.run_mixmhc2pred, - self.runner, - self.configuration, - self.mhc_parser, - neoantigen, - patient, - ) - if self.configuration.mix_mhc_pred is not None and has_mhc1: - mixmhcpred_future = dask_client.submit( - self.run_mixmhcpred, - self.runner, - self.configuration, - self.mhc_parser, - neoantigen, - patient, - ) - if self.configuration.mix_mhc_pred is not None and has_mhc1: - prime_future = dask_client.submit( - self.run_prime, - self.runner, - self.configuration, - self.mhc_parser, - neoantigen, - patient, - ) - - secede() - - if netmhcpan_future: - netmhcpan = dask_client.gather([netmhcpan_future])[0] - if netmhc2pan_future: - netmhc2pan = dask_client.gather([netmhc2pan_future])[0] - - if self.organism == ORGANISM_HOMO_SAPIENS: - if mixmhcpred_future: - mixmhcpred_annotations = dask_client.gather([mixmhcpred_future])[0] - if mixmhc2pred_future: - mixmhc2pred_annotations = dask_client.gather([mixmhc2pred_future])[0] - if prime_future: - prime_annotations = dask_client.gather([prime_future])[0] - rejoin() - - return mixmhc2pred_annotations, mixmhcpred_annotations, netmhc2pan, netmhcpan, prime_annotations - - def run_netmhcpan( - self, - runner: Runner, - configuration: DependenciesConfiguration, - available_alleles: AvailableAlleles, - mhc_parser: MhcParser, - neoantigen: Neoantigen, - patient: Patient, - ): - netmhcpan = BestAndMultipleBinder(runner=runner, configuration=configuration, mhc_parser=mhc_parser, - blastp_runner=self.proteome_blastp_runner) - netmhcpan.run( - mutation=neoantigen.mutation, - mhc1_alleles_patient=patient.mhc1, - mhc1_alleles_available=available_alleles.get_available_mhc_i(), - uniprot=self.uniprot, - ) - return netmhcpan - - def run_netmhc2pan( - self, - runner: Runner, - configuration: DependenciesConfiguration, - available_alleles: AvailableAlleles, - mhc_parser: MhcParser, - neoantigen: Neoantigen, - patient: Patient, - ): - netmhc2pan = BestAndMultipleBinderMhcII( - runner=runner, configuration=configuration, mhc_parser=mhc_parser, - blastp_runner=self.proteome_blastp_runner) - netmhc2pan.run( - mutation=neoantigen.mutation, - mhc2_alleles_patient=patient.mhc2, - mhc2_alleles_available=available_alleles.get_available_mhc_ii(), - uniprot=self.uniprot - ) - return netmhc2pan - - def run_mixmhcpred( - self, - runner: Runner, - configuration: DependenciesConfiguration, - mhc_parser: MhcParser, - neoantigen: Neoantigen, - patient: Patient, - ): - mixmhc = MixMHCpred(runner, configuration, mhc_parser) - return mixmhc.get_annotations(mutation=neoantigen.mutation, mhc=patient.mhc1, uniprot=self.uniprot) - - def run_prime( - self, - runner: Runner, - configuration: DependenciesConfiguration, - mhc_parser: MhcParser, - neoantigen: Neoantigen, - patient: Patient, - ): - prime = Prime(runner, configuration, mhc_parser) - return prime.get_annotations(mutation=neoantigen.mutation, mhc=patient.mhc1, uniprot=self.uniprot) - - def run_mixmhc2pred( - self, - runner: Runner, - configuration: DependenciesConfiguration, - mhc_parser: MhcParser, - neoantigen: Neoantigen, - patient: Patient, - ): - mixmhc2 = MixMhc2Pred(runner, configuration, mhc_parser) - return mixmhc2.get_annotations(mhc=patient.mhc2, mutation=neoantigen.mutation, uniprot=self.uniprot) diff --git a/neofox/annotator/__init__.py b/neofox/annotator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/neofox/annotator/abstract_annotator.py b/neofox/annotator/abstract_annotator.py new file mode 100644 index 00000000..fb08f2ff --- /dev/null +++ b/neofox/annotator/abstract_annotator.py @@ -0,0 +1,121 @@ +from abc import ABC + +from neofox.MHC_predictors.netmhcpan.combine_netmhcpan_pred_multiple_binders import BestAndMultipleBinder +from neofox.annotation_resources.uniprot.uniprot import Uniprot +from neofox.helpers.blastp_runner import BlastpRunner +from neofox.helpers.epitope_helper import EpitopeHelper +from neofox.helpers.runner import Runner +from neofox.model.factories import AnnotationFactory +from neofox.model.neoantigen import PredictedEpitope, Neoantigen, Patient +from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import TcellPrediction +from neofox.published_features.differential_binding.amplitude import Amplitude +from neofox.published_features.differential_binding.differential_binding import DifferentialBinding +from neofox.published_features.dissimilarity_garnish.dissimilaritycalculator import DissimilarityCalculator +from neofox.published_features.hex.hex import Hex +from neofox.published_features.iedb_immunogenicity.iedb import IEDBimmunogenicity +from neofox.published_features.neoantigen_fitness.neoantigen_fitness import NeoantigenFitnessCalculator +from neofox.published_features.priority_score import PriorityScore +from neofox.published_features.self_similarity.self_similarity import SelfSimilarityCalculator +from neofox.references.references import ORGANISM_HOMO_SAPIENS, ReferenceFolder, DependenciesConfiguration + + +class AbstractAnnotator(ABC): + + def __init__( + self, + references: ReferenceFolder, + configuration: DependenciesConfiguration, + tcell_predictor: TcellPrediction, + self_similarity: SelfSimilarityCalculator + ): + """class to annotate neoantigens""" + self.runner = Runner() + self.configuration = configuration + self.tcell_predictor = tcell_predictor + self.self_similarity = self_similarity + self.organism = references.organism + + # NOTE: this one loads a big file, but it is faster loading it multiple times than passing it around + self.uniprot = Uniprot(references.uniprot_pickle) + + # initialise proteome and IEDB BLASTP runners + self.proteome_blastp_runner = BlastpRunner( + runner=self.runner, configuration=configuration, + database=references.get_proteome_database()) + self.iedb_blastp_runner = BlastpRunner( + runner=self.runner, configuration=configuration, + database=references.get_iedb_database()) + + # NOTE: these resources do not read any file thus can be initialised fast + self.dissimilarity_calculator = DissimilarityCalculator(proteome_blastp_runner=self.proteome_blastp_runner) + self.neoantigen_fitness_calculator = NeoantigenFitnessCalculator(iedb_blastp_runner=self.iedb_blastp_runner) + self.differential_binding = DifferentialBinding() + self.priority_score_calculator = PriorityScore() + self.iedb_immunogenicity = IEDBimmunogenicity() + self.amplitude = Amplitude() + self.hex = Hex(runner=self.runner, configuration=configuration, references=references) + + def get_additional_annotations_neoepitope_mhci( + self, epitope: PredictedEpitope, neoantigen: Neoantigen = None) -> PredictedEpitope: + + if neoantigen is not None: + gene = neoantigen.gene + vaf_tumor_dna = neoantigen.dna_variant_allele_frequency + vaf_tumor_rna = neoantigen.rna_variant_allele_frequency + transcript_exp = neoantigen.rna_expression + else: + gene = epitope.gene + vaf_tumor_dna = epitope.dna_variant_allele_frequency + vaf_tumor_rna = epitope.rna_variant_allele_frequency + transcript_exp = epitope.rna_expression + + epitope.neofox_annotations.annotations.extend( + BestAndMultipleBinder.get_annotations_epitope_mhci(epitope=epitope) + + self.amplitude.get_annotations_epitope_mhci(epitope=epitope) + ) + + # NOTE: this extend() call cannot be joined with the previous as some of the previous annotations are expected + epitope.neofox_annotations.annotations.extend( + self.neoantigen_fitness_calculator.get_annotations_epitope_mhci(epitope=epitope) + + self.differential_binding.get_annotations_epitope_mhci(epitope=epitope) + + self.self_similarity.get_annotations_epitope_mhci(epitope=epitope) + + self.uniprot.get_annotations_epitope(epitope=epitope) + + self.dissimilarity_calculator.get_annotations_epitope(epitope=epitope) + ) + + epitope.neofox_annotations.annotations.extend(self.tcell_predictor.get_annotations_epitope_mhci( + epitope=epitope, gene=gene)) + + num_mismatches = EpitopeHelper.number_of_mismatches( + epitope_wild_type=epitope.wild_type_peptide, epitope_mutation=epitope.mutated_peptide, ) + epitope.neofox_annotations.annotations.append(AnnotationFactory.build_annotation( + value=num_mismatches, + name='number_of_mismatches')) + + epitope.neofox_annotations.annotations.extend( + self.priority_score_calculator.get_annotations_epitope_mhci( + epitope=epitope, vaf_rna=vaf_tumor_rna, vaf_tumor=vaf_tumor_dna, transcript_exp=transcript_exp)) + + if self.organism == ORGANISM_HOMO_SAPIENS: + epitope.neofox_annotations.annotations.extend( + self.iedb_immunogenicity.get_annotations_epitope_mhci(epitope=epitope) + + self.hex.get_annotations_epitope(epitope=epitope)) + + return epitope + + def get_additional_annotations_neoepitope_mhcii(self, epitope: PredictedEpitope) -> PredictedEpitope: + + epitope.neofox_annotations.annotations.extend( + self.amplitude.get_annotations_epitope_mhcii(epitope=epitope) + + self.neoantigen_fitness_calculator.get_annotations_epitope_mhcii(epitope=epitope) + + self.self_similarity.get_annotations_epitope_mhcii(epitope=epitope) + + self.uniprot.get_annotations_epitope(epitope=epitope) + + self.dissimilarity_calculator.get_annotations_epitope(epitope=epitope)) + + if self.organism == ORGANISM_HOMO_SAPIENS: + + epitope.neofox_annotations.annotations.extend( + self.iedb_immunogenicity.get_annotations_epitope_mhcii(epitope=epitope) + + self.hex.get_annotations_epitope(epitope=epitope)) + + return epitope \ No newline at end of file diff --git a/neofox/annotator/neoantigen_annotator.py b/neofox/annotator/neoantigen_annotator.py new file mode 100755 index 00000000..a6e5f1e2 --- /dev/null +++ b/neofox/annotator/neoantigen_annotator.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python +# +# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH. +# +# This file is part of Neofox +# (see https://github.com/tron-bioinformatics/neofox). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .# + +from logzero import logger +from datetime import datetime +import neofox +from neofox.MHC_predictors.MixMHCpred.mixmhc2pred import MixMHC2pred +from neofox.MHC_predictors.MixMHCpred.mixmhcpred import MixMHCpred +from neofox.MHC_predictors.prime import Prime +from neofox.annotator.abstract_annotator import AbstractAnnotator +from neofox.annotator.neoantigen_mhc_binding_annotator import NeoantigenMhcBindingAnnotator +from neofox.helpers.epitope_helper import EpitopeHelper +from neofox.MHC_predictors.netmhcpan.combine_netmhcpan_pred_multiple_binders import BestAndMultipleBinder +from neofox.model.factories import AnnotationFactory +from neofox.model.mhc_parser import MhcParser +from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import TcellPrediction +from neofox.published_features.neoag.neoag_gbm_model import NeoagCalculator +from neofox.published_features.self_similarity.self_similarity import SelfSimilarityCalculator +from neofox.published_features.expression import Expression +from neofox.model.neoantigen import Patient, Neoantigen, Annotations, PredictedEpitope +from neofox.published_features.vaxrank.vaxrank import VaxRank +from neofox.references.references import ( + ReferenceFolder, + DependenciesConfiguration, + ORGANISM_HOMO_SAPIENS +) + + +class NeoantigenAnnotator(AbstractAnnotator): + def __init__(self, references: ReferenceFolder, configuration: DependenciesConfiguration, + tcell_predictor: TcellPrediction, self_similarity: SelfSimilarityCalculator, + rank_mhci_threshold=neofox.RANK_MHCI_THRESHOLD_DEFAULT, + rank_mhcii_threshold=neofox.RANK_MHCII_THRESHOLD_DEFAULT): + """class to annotate neoantigens""" + + super().__init__(references, configuration, tcell_predictor, self_similarity) + self.proteome_db = references.proteome_db + self.available_alleles = references.get_available_alleles() + self.rank_mhci_threshold = rank_mhci_threshold + self.rank_mhcii_threshold = rank_mhcii_threshold + + # NOTE: these resources do not read any file thus can be initialised fast + self.neoag_calculator = NeoagCalculator(runner=self.runner, configuration=configuration) + self.expression_calculator = Expression() + self.mhc_database = references.get_mhc_database() + self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database) + + self.neoantigen_mhc_binding_annotator = NeoantigenMhcBindingAnnotator( + references=references, configuration=configuration, proteome_blastp_runner=self.proteome_blastp_runner, + uniprot=self.uniprot) + + self.resources_versions = references.get_resources_versions() + + def get_annotated_neoantigen(self, neoantigen: Neoantigen, patient: Patient, with_all_neoepitopes=False) -> Neoantigen: + """Calculate new epitope features and add to dictionary that stores all properties""" + neoantigen.neofox_annotations = Annotations( + annotator="NeoFox", + annotator_version=neofox.VERSION, + timestamp="{:%Y%m%d%H%M%S%f}".format(datetime.now()), + resources=self.resources_versions, + annotations=[] + ) + + # Runs netmhcpan, netmhc2pan, mixmhcpred and mixmhc2prd in parallel + ( + mixmhc2pred, + mixmhcpred, + netmhc2pan, + netmhcpan, + prime + ) = self.neoantigen_mhc_binding_annotator.get_mhc_binding_annotations(neoantigen=neoantigen, patient=patient) + + # HLA I predictions: NetMHCpan + if netmhcpan: + neoantigen.neofox_annotations.annotations.extend(netmhcpan.get_annotations()) + neoantigen.neoepitopes_mhc_i = [e for e in netmhcpan.predictions if e.rank_mutated < self.rank_mhci_threshold] + + # HLA II predictions: NetMHCIIpan + if netmhc2pan: + neoantigen.neofox_annotations.annotations.extend(netmhc2pan.get_annotations()) + neoantigen.neoepitopes_mhc_i_i = [e for e in netmhc2pan.predictions if e.rank_mutated < self.rank_mhcii_threshold] + + # MixMHCpred + if mixmhcpred is not None: + neoantigen.neofox_annotations.annotations.extend(mixmhcpred.get_annotations()) + neoantigen.neoepitopes_mhc_i = AnnotationFactory.annotate_epitopes_with_other_scores( + epitopes=neoantigen.neoepitopes_mhc_i, + annotated_epitopes=mixmhcpred.results, + annotation_name=MixMHCpred.ANNOTATION_PREFIX) + + # PRIME + if prime is not None: + neoantigen.neofox_annotations.annotations.extend(prime.get_annotations()) + neoantigen.neoepitopes_mhc_i = AnnotationFactory.annotate_epitopes_with_other_scores( + epitopes=neoantigen.neoepitopes_mhc_i, + annotated_epitopes=prime.results, + annotation_name=Prime.ANNOTATION_PREFIX) + + # MixMHC2pred + if mixmhc2pred is not None: + neoantigen.neofox_annotations.annotations.extend(mixmhc2pred.get_annotations()) + neoantigen.neoepitopes_mhc_i_i = AnnotationFactory.annotate_epitopes_with_other_scores( + epitopes=neoantigen.neoepitopes_mhc_i_i, + annotated_epitopes=mixmhc2pred.results, + annotation_name=MixMHC2pred.ANNOTATION_PREFIX) + + # MHC binding independent features + expression_annotation = self.expression_calculator.get_annotations(neoantigen=neoantigen) + neoantigen.neofox_annotations.annotations.extend(expression_annotation) + + sequence_not_in_uniprot = self.uniprot.is_sequence_not_in_uniprot( + neoantigen.mutated_xmer + ) + neoantigen.neofox_annotations.annotations.extend( + self.uniprot.get_annotations(sequence_not_in_uniprot) + ) + + # Amplitude + self.amplitude.run(netmhcpan=netmhcpan, netmhc2pan=netmhc2pan) + neoantigen.neofox_annotations.annotations.extend(self.amplitude.get_annotations()) + neoantigen.neofox_annotations.annotations.extend(self.amplitude.get_annotations_mhc2()) + + # Neoantigen fitness + neoantigen.neofox_annotations.annotations.extend( + self.neoantigen_fitness_calculator.get_annotations( + mutated_peptide_mhci=netmhcpan.best_ninemer_epitope_by_affinity if netmhcpan else None, + amplitude=self.amplitude.amplitude_mhci_affinity_9mer, + mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None + ) + ) + neoantigen.neofox_annotations.annotations.extend( + self.neoantigen_fitness_calculator.get_annotations_extended( + mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None, + amplitude=self.amplitude.amplitude_mhci_affinity + ) + ) + + # Differential Binding + if netmhcpan: + neoantigen.neofox_annotations.annotations.extend( + self.differential_binding.get_annotations_dai(epitope=netmhcpan.best_epitope_by_affinity) + ) + neoantigen.neofox_annotations.annotations.extend( + self.differential_binding.get_annotations(mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity, + amplitude=self.amplitude) + ) + if netmhc2pan: + neoantigen.neofox_annotations.annotations.extend( + self.differential_binding.get_annotations_mhc2(mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_rank, + amplitude=self.amplitude) + ) + + # T cell predictor + if netmhcpan: + neoantigen.neofox_annotations.annotations.extend( + self.tcell_predictor.get_annotations( + neoantigen=neoantigen, netmhcpan=netmhcpan + ) + ) + + # self-similarity + neoantigen.neofox_annotations.annotations.extend( + self.self_similarity.get_annnotations( + epitope_mhci=netmhcpan.best_epitope_by_rank if netmhcpan else None, + epitope_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None + ) + ) + + # number of mismatches and priority score + if netmhcpan: + neoantigen.neofox_annotations.annotations.extend( + self.priority_score_calculator.get_annotations( + netmhcpan=netmhcpan, + neoantigen=neoantigen, + mut_not_in_prot=sequence_not_in_uniprot, + ) + ) + + # neoag immunogenicity model + if netmhcpan and netmhcpan.best_epitope_by_affinity: + neoantigen.neofox_annotations.annotations.append( + self.neoag_calculator.get_annotation( + epitope_mhci=netmhcpan.best_epitope_by_affinity, + neoantigen=neoantigen) + ) + + # IEDB immunogenicity + if self.organism == ORGANISM_HOMO_SAPIENS: + neoantigen.neofox_annotations.annotations.extend( + self.iedb_immunogenicity.get_annotations( + mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None, + mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None + ) + ) + + # dissimilarity to self-proteome + neoantigen.neofox_annotations.annotations.extend( + self.dissimilarity_calculator.get_annotations( + mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None, + mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None) + ) + + # vaxrank + if netmhcpan and netmhcpan.predictions: + neoantigen.neofox_annotations.annotations.extend(VaxRank().get_annotations( + epitope_predictions=netmhcpan.predictions, + expression_score=expression_annotation[0].value, + )) + + # hex + # TODO: hex is failing for mouse with the current IEDB fasta with only 2 entries + if self.organism == ORGANISM_HOMO_SAPIENS: + neoantigen.neofox_annotations.annotations.extend( + self.hex.get_annotation( + mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None, + mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None) + ) + + # annotate neoepitopes + if with_all_neoepitopes: + neoantigen.neoepitopes_mhc_i = [ + self.get_additional_annotations_neoepitope_mhci( + epitope=e, neoantigen=neoantigen) + for e in neoantigen.neoepitopes_mhc_i] + neoantigen.neoepitopes_mhc_i_i = [ + self.get_additional_annotations_neoepitope_mhcii(epitope=e) for e in neoantigen.neoepitopes_mhc_i_i] + + return neoantigen diff --git a/neofox/annotator/neoantigen_mhc_binding_annotator.py b/neofox/annotator/neoantigen_mhc_binding_annotator.py new file mode 100644 index 00000000..327bf382 --- /dev/null +++ b/neofox/annotator/neoantigen_mhc_binding_annotator.py @@ -0,0 +1,161 @@ +from neofox.MHC_predictors.MixMHCpred.mixmhc2pred import MixMHC2pred +from neofox.MHC_predictors.MixMHCpred.mixmhcpred import MixMHCpred +from neofox.MHC_predictors.netmhcpan.combine_netmhcIIpan_pred_multiple_binders import BestAndMultipleBinderMhcII +from neofox.MHC_predictors.netmhcpan.combine_netmhcpan_pred_multiple_binders import BestAndMultipleBinder +from neofox.MHC_predictors.prime import Prime +from neofox.annotation_resources.uniprot.uniprot import Uniprot +from neofox.helpers.blastp_runner import BlastpRunner +from neofox.helpers.runner import Runner +from neofox.model.mhc_parser import MhcParser +from neofox.model.neoantigen import Neoantigen, Patient +from neofox.references.references import DependenciesConfiguration, AvailableAlleles, ReferenceFolder, \ + ORGANISM_HOMO_SAPIENS + + +class NeoantigenMhcBindingAnnotator: + + def __init__(self, references: ReferenceFolder, configuration: DependenciesConfiguration, + uniprot: Uniprot, proteome_blastp_runner: BlastpRunner): + """class to annotate neoantigens""" + self.runner = Runner() + self.configuration = configuration + self.proteome_db = references.proteome_db + self.available_alleles = references.get_available_alleles() + self.organism = references.organism + self.uniprot = uniprot + self.proteome_blastp_runner = proteome_blastp_runner + + self.mhc_database = references.get_mhc_database() + self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database) + + def get_mhc_binding_annotations(self, neoantigen: Neoantigen, patient: Patient): + + has_mhc1 = patient.mhc1 is not None and len(patient.mhc1) > 0 + has_mhc2 = patient.mhc2 is not None and len(patient.mhc2) > 0 + + netmhcpan = None + netmhc2pan = None + mixmhcpred = None + mixmhc2pred = None + prime = None + + if has_mhc1: + netmhcpan = self.run_netmhcpan( + self.runner, + self.configuration, + self.available_alleles, + self.mhc_parser, + neoantigen, + patient) + if has_mhc2: + netmhc2pan = self._run_netmhc2pan( + self.runner, + self.configuration, + self.available_alleles, + self.mhc_parser, + neoantigen, + patient + ) + # avoids running MixMHCpred and PRIME for non human organisms + if self.organism == ORGANISM_HOMO_SAPIENS: + if self.configuration.mix_mhc2_pred is not None and has_mhc2: + mixmhc2pred = self._run_mixmhc2pred( + self.runner, + self.configuration, + self.mhc_parser, + neoantigen, + patient, + ) + if self.configuration.mix_mhc_pred is not None and has_mhc1: + mixmhcpred = self._run_mixmhcpred( + self.runner, + self.configuration, + self.mhc_parser, + neoantigen, + patient, + ) + if self.configuration.mix_mhc_pred is not None and self.configuration.prime is not None and has_mhc1: + prime = self._run_prime( + self.runner, + self.configuration, + self.mhc_parser, + neoantigen, + patient, + ) + + return mixmhc2pred, mixmhcpred, netmhc2pan, netmhcpan, prime + + def run_netmhcpan( + self, + runner: Runner, + configuration: DependenciesConfiguration, + available_alleles: AvailableAlleles, + mhc_parser: MhcParser, + neoantigen: Neoantigen, + patient: Patient, + ): + netmhcpan = BestAndMultipleBinder(runner=runner, configuration=configuration, mhc_parser=mhc_parser, + blastp_runner=self.proteome_blastp_runner) + netmhcpan.run( + neoantigen=neoantigen, + mhc1_alleles_patient=patient.mhc1, + mhc1_alleles_available=available_alleles.get_available_mhc_i(), + uniprot=self.uniprot, + ) + return netmhcpan + + def _run_netmhc2pan( + self, + runner: Runner, + configuration: DependenciesConfiguration, + available_alleles: AvailableAlleles, + mhc_parser: MhcParser, + neoantigen: Neoantigen, + patient: Patient, + ): + netmhc2pan = BestAndMultipleBinderMhcII( + runner=runner, configuration=configuration, mhc_parser=mhc_parser, + blastp_runner=self.proteome_blastp_runner) + netmhc2pan.run( + neoantigen=neoantigen, + mhc2_alleles_patient=patient.mhc2, + mhc2_alleles_available=available_alleles.get_available_mhc_ii(), + uniprot=self.uniprot + ) + return netmhc2pan + + def _run_mixmhcpred( + self, + runner: Runner, + configuration: DependenciesConfiguration, + mhc_parser: MhcParser, + neoantigen: Neoantigen, + patient: Patient, + ): + mixmhc = MixMHCpred(runner, configuration, mhc_parser) + mixmhc.run(neoantigen=neoantigen, mhc=patient.mhc1, uniprot=self.uniprot) + return mixmhc + + def _run_prime( + self, + runner: Runner, + configuration: DependenciesConfiguration, + mhc_parser: MhcParser, + neoantigen: Neoantigen, + patient: Patient, + ): + prime = Prime(runner, configuration, mhc_parser) + prime.run(neoantigen=neoantigen, mhc=patient.mhc1, uniprot=self.uniprot) + return prime + + def _run_mixmhc2pred( + self, + runner: Runner, + configuration: DependenciesConfiguration, + mhc_parser: MhcParser, + neoantigen: Neoantigen, + patient: Patient, + ): + mixmhc2 = MixMHC2pred(runner, configuration, mhc_parser) + mixmhc2.run(mhc=patient.mhc2, neoantigen=neoantigen, uniprot=self.uniprot) + return mixmhc2 \ No newline at end of file diff --git a/neofox/annotator/neoepitope_annotator.py b/neofox/annotator/neoepitope_annotator.py new file mode 100755 index 00000000..0fec9f51 --- /dev/null +++ b/neofox/annotator/neoepitope_annotator.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# +# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH. +# +# This file is part of Neofox +# (see https://github.com/tron-bioinformatics/neofox). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .# + +from logzero import logger +from datetime import datetime +import neofox +from neofox.MHC_predictors.MixMHCpred.mixmhc2pred import MixMHC2pred +from neofox.MHC_predictors.MixMHCpred.mixmhcpred import MixMHCpred +from neofox.MHC_predictors.prime import Prime +from neofox.annotator.abstract_annotator import AbstractAnnotator +from neofox.annotator.neoantigen_mhc_binding_annotator import NeoantigenMhcBindingAnnotator +from neofox.annotator.neoepitope_mhc_binding_annotator import NeoepitopeMhcBindingAnnotator +from neofox.helpers.epitope_helper import EpitopeHelper +from neofox.MHC_predictors.netmhcpan.combine_netmhcpan_pred_multiple_binders import BestAndMultipleBinder +from neofox.model.factories import AnnotationFactory +from neofox.model.mhc_parser import MhcParser +from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import TcellPrediction +from neofox.published_features.neoag.neoag_gbm_model import NeoagCalculator +from neofox.published_features.self_similarity.self_similarity import SelfSimilarityCalculator +from neofox.published_features.expression import Expression +from neofox.model.neoantigen import Patient, Neoantigen, Annotations, PredictedEpitope +from neofox.published_features.vaxrank.vaxrank import VaxRank +from neofox.references.references import ( + ReferenceFolder, + DependenciesConfiguration, + ORGANISM_HOMO_SAPIENS +) + + +class NeoepitopeAnnotator(AbstractAnnotator): + def __init__(self, references: ReferenceFolder, configuration: DependenciesConfiguration, + tcell_predictor: TcellPrediction, self_similarity: SelfSimilarityCalculator): + """class to annotate neoantigens""" + + super().__init__(references, configuration, tcell_predictor, self_similarity) + self.proteome_db = references.proteome_db + self.available_alleles = references.get_available_alleles() + + # NOTE: these resources do not read any file thus can be initialised fast + self.neoag_calculator = NeoagCalculator(runner=self.runner, configuration=configuration) + self.mhc_database = references.get_mhc_database() + self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database) + + self.neoepitope_mhc_binding_annotator = NeoepitopeMhcBindingAnnotator( + references=references, configuration=configuration, proteome_blastp_runner=self.proteome_blastp_runner, + uniprot=self.uniprot) + + self.resources_versions = references.get_resources_versions() + + def get_annotated_neoepitope(self, neoepitope: PredictedEpitope) -> PredictedEpitope: + neoepitope.neofox_annotations = Annotations( + annotator="NeoFox", + annotator_version=neofox.VERSION, + timestamp="{:%Y%m%d%H%M%S%f}".format(datetime.now()), + resources=self.resources_versions, + annotations=[] + ) + + # if the WT is not provided it searches for the closest match in the proteome + if neoepitope.wild_type_peptide is None or neoepitope.wild_type_peptide == '': + neoepitope.wild_type_peptide = self.proteome_blastp_runner.get_most_similar_wt_epitope( + neoepitope.mutated_peptide) + + # Runs netmhcpan, netmhc2pan, mixmhcpred and mixmhc2prd in parallel + annotated_neoepitope = self.neoepitope_mhc_binding_annotator.get_mhc_binding_annotations(neoepitope=neoepitope) + + has_mhc1 = annotated_neoepitope.allele_mhc_i is not None and annotated_neoepitope.allele_mhc_i.name + + if has_mhc1: + annotated_neoepitope = self.get_additional_annotations_neoepitope_mhci(epitope=annotated_neoepitope) + else: + annotated_neoepitope = self.get_additional_annotations_neoepitope_mhcii(epitope=annotated_neoepitope) + + return annotated_neoepitope diff --git a/neofox/annotator/neoepitope_mhc_binding_annotator.py b/neofox/annotator/neoepitope_mhc_binding_annotator.py new file mode 100644 index 00000000..e5202822 --- /dev/null +++ b/neofox/annotator/neoepitope_mhc_binding_annotator.py @@ -0,0 +1,149 @@ +from typing import Tuple + +from neofox.MHC_predictors.MixMHCpred.mixmhc2pred import MixMHC2pred +from neofox.MHC_predictors.MixMHCpred.mixmhcpred import MixMHCpred +from neofox.MHC_predictors.netmhcpan.combine_netmhcIIpan_pred_multiple_binders import BestAndMultipleBinderMhcII +from neofox.MHC_predictors.netmhcpan.combine_netmhcpan_pred_multiple_binders import BestAndMultipleBinder +from neofox.MHC_predictors.netmhcpan.netmhcIIpan_prediction import NetMhcIIPanPredictor +from neofox.MHC_predictors.netmhcpan.netmhcpan_prediction import NetMhcPanPredictor +from neofox.MHC_predictors.prime import Prime +from neofox.annotation_resources.uniprot.uniprot import Uniprot +from neofox.helpers.blastp_runner import BlastpRunner +from neofox.helpers.epitope_helper import EpitopeHelper +from neofox.helpers.runner import Runner +from neofox.model.factories import AnnotationFactory +from neofox.model.mhc_parser import MhcParser +from neofox.model.neoantigen import Neoantigen, Patient, PredictedEpitope +from neofox.references.references import DependenciesConfiguration, AvailableAlleles, ReferenceFolder, \ + ORGANISM_HOMO_SAPIENS + + +class NeoepitopeMhcBindingAnnotator: + + def __init__(self, references: ReferenceFolder, configuration: DependenciesConfiguration, + uniprot: Uniprot, proteome_blastp_runner: BlastpRunner): + """class to annotate neoantigens""" + self.runner = Runner() + self.configuration = configuration + self.proteome_db = references.proteome_db + self.available_alleles = references.get_available_alleles() + self.organism = references.organism + self.uniprot = uniprot + self.proteome_blastp_runner = proteome_blastp_runner + + self.mhc_database = references.get_mhc_database() + self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database) + self.netmhcpan = NetMhcPanPredictor( + runner=self.runner, configuration=configuration, mhc_parser=self.mhc_parser, + blastp_runner=self.proteome_blastp_runner) + self.netmhc2pan = NetMhcIIPanPredictor( + runner=self.runner, configuration=configuration, mhc_parser=self.mhc_parser, + blastp_runner=self.proteome_blastp_runner) + self.mixmhcpred = MixMHCpred(self.runner, self.configuration, self.mhc_parser) + self.mixmhc2pred = MixMHC2pred(self.runner, self.configuration, self.mhc_parser) + self.prime = Prime(self.runner, self.configuration, self.mhc_parser) + + def get_mhc_binding_annotations(self, neoepitope: PredictedEpitope) -> PredictedEpitope: + + has_mhc1 = neoepitope.allele_mhc_i is not None and neoepitope.allele_mhc_i.name != '' + has_mhc2 = neoepitope.isoform_mhc_i_i is not None and neoepitope.isoform_mhc_i_i.name != '' + + if has_mhc1: + # MHC I epitope + annotated_neoepitope = self._run_netmhcpan(neoepitope=neoepitope) + if annotated_neoepitope: + if self.configuration.mix_mhc_pred and self.organism == ORGANISM_HOMO_SAPIENS: + mixmhcpred_neoepitope, mixmhcpred_neoepitope_wt = self._run_mixmhcpred(neoepitope=neoepitope) + if mixmhcpred_neoepitope: + annotated_neoepitope = AnnotationFactory.annotate_epitope( + epitope=annotated_neoepitope, + paired_epitope=mixmhcpred_neoepitope, + annotation_name=MixMHCpred.ANNOTATION_PREFIX) + annotated_neoepitope = AnnotationFactory.annotate_epitope( + epitope=annotated_neoepitope, + paired_epitope=mixmhcpred_neoepitope_wt, + annotation_name=MixMHCpred.ANNOTATION_PREFIX_WT) + if self.configuration.prime: + prime_neoepitope, prime_neoepitope_wt = self._run_prime(neoepitope=neoepitope) + if prime_neoepitope: + annotated_neoepitope = AnnotationFactory.annotate_epitope( + epitope=annotated_neoepitope, + paired_epitope=prime_neoepitope, + annotation_name=Prime.ANNOTATION_PREFIX) + annotated_neoepitope = AnnotationFactory.annotate_epitope( + epitope=annotated_neoepitope, + paired_epitope=prime_neoepitope_wt, + annotation_name=Prime.ANNOTATION_PREFIX_WT) + elif has_mhc2: + # MHC II epitope + annotated_neoepitope = self._run_netmhc2pan(neoepitope=neoepitope) + if annotated_neoepitope: + if self.configuration.mix_mhc2_pred and self.organism == ORGANISM_HOMO_SAPIENS: + mixmhc2pred_neoepitope, mixmhc2pred_neoepitope_wt = self._run_mixmhc2pred(neoepitope=neoepitope) + if mixmhc2pred_neoepitope: + annotated_neoepitope = AnnotationFactory.annotate_epitope( + epitope=annotated_neoepitope, + paired_epitope=mixmhc2pred_neoepitope, + annotation_name=MixMHC2pred.ANNOTATION_PREFIX) + annotated_neoepitope = AnnotationFactory.annotate_epitope( + epitope=annotated_neoepitope, + paired_epitope=mixmhc2pred_neoepitope_wt, + annotation_name=MixMHC2pred.ANNOTATION_PREFIX_WT) + else: + raise ValueError("Neoepitope without neither MHC I allele or MHC II isoform") + + return annotated_neoepitope + + def _run_netmhcpan(self, neoepitope: PredictedEpitope) -> PredictedEpitope: + # runs NetMHCpan in peptide mode over the mutated and WT separately and merges it back in one + # predicted epitope + annotated_neoepitope = neoepitope + + netmhcpan_allele = self.mhc_parser.get_netmhcpan_representation(neoepitope.allele_mhc_i) + if netmhcpan_allele in self.available_alleles.get_available_mhc_i(): + mutated_epitope = self.netmhcpan.mhc_prediction_peptide( + sequence=neoepitope.mutated_peptide, alleles=netmhcpan_allele) + annotated_neoepitope.affinity_mutated = mutated_epitope.affinity_mutated + annotated_neoepitope.rank_mutated = mutated_epitope.rank_mutated + wt_epitope = self.netmhcpan.mhc_prediction_peptide( + sequence=neoepitope.wild_type_peptide, alleles=netmhcpan_allele) + annotated_neoepitope.affinity_wild_type = wt_epitope.affinity_mutated + annotated_neoepitope.rank_wild_type = wt_epitope.rank_mutated + return annotated_neoepitope + + def _run_netmhc2pan(self, neoepitope: PredictedEpitope) -> PredictedEpitope: + annotated_neoepitope = neoepitope + + netmhc2pan_allele = self.mhc_parser.get_netmhc2pan_representation(neoepitope.isoform_mhc_i_i) + if netmhc2pan_allele in self.available_alleles.get_available_mhc_ii(): + mutated_epitope = self.netmhc2pan.mhc2_prediction_peptide( + sequence=neoepitope.mutated_peptide, + mhc2_isoform=neoepitope.isoform_mhc_i_i) + annotated_neoepitope.affinity_mutated = mutated_epitope.affinity_mutated + annotated_neoepitope.rank_mutated = mutated_epitope.rank_mutated + wt_epitope = self.netmhc2pan.mhc2_prediction_peptide( + sequence=neoepitope.wild_type_peptide, + mhc2_isoform=neoepitope.isoform_mhc_i_i) + annotated_neoepitope.affinity_wild_type = wt_epitope.affinity_mutated + annotated_neoepitope.rank_wild_type = wt_epitope.rank_mutated + return annotated_neoepitope + + def _run_mixmhcpred(self, neoepitope: PredictedEpitope) -> Tuple[PredictedEpitope, PredictedEpitope]: + mutated_epitope = self.mixmhcpred.run_peptide( + peptide=neoepitope.mutated_peptide, allele=neoepitope.allele_mhc_i) + wt_epitope = self.mixmhcpred.run_peptide( + peptide=neoepitope.wild_type_peptide, allele=neoepitope.allele_mhc_i) + return mutated_epitope, wt_epitope + + def _run_prime(self, neoepitope: PredictedEpitope) -> Tuple[PredictedEpitope, PredictedEpitope]: + mutated_epitope = self.prime.run_peptide(peptide=neoepitope.mutated_peptide, allele=neoepitope.allele_mhc_i) + wt_epitope = self.prime.run_peptide( + peptide=neoepitope.wild_type_peptide, allele=neoepitope.allele_mhc_i) + return mutated_epitope, wt_epitope + + def _run_mixmhc2pred(self, neoepitope: PredictedEpitope) -> Tuple[PredictedEpitope, PredictedEpitope]: + mutated_epitope = self.mixmhc2pred.run_peptide( + isoform=neoepitope.isoform_mhc_i_i, peptide=neoepitope.mutated_peptide) + wt_epitope = self.mixmhc2pred.run_peptide( + peptide=neoepitope.wild_type_peptide, isoform=neoepitope.isoform_mhc_i_i) + return mutated_epitope, wt_epitope diff --git a/neofox/command_line.py b/neofox/command_line.py index b234bf17..d2728306 100755 --- a/neofox/command_line.py +++ b/neofox/command_line.py @@ -17,17 +17,18 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see .# from argparse import ArgumentParser -from typing import Tuple, List +from typing import Tuple, List, Dict import dotenv from logzero import logger import orjson as json import neofox -from neofox.model.neoantigen import Neoantigen, Patient -from neofox.exceptions import NeofoxInputParametersException +import neofox.neofox +from neofox.model.neoantigen import Neoantigen, Patient, PredictedEpitope +from neofox.model.validation import ModelValidator from neofox.neofox import NeoFox -from neofox import AFFINITY_THRESHOLD_DEFAULT import os from neofox.model.conversion import ModelConverter +from neofox.neofox_epitope import NeoFoxEpitope from neofox.references.installer import NeofoxReferenceInstaller from neofox.references.references import ReferenceFolder, ORGANISM_HOMO_SAPIENS, ORGANISM_MUS_MUSCULUS, MhcDatabase @@ -72,14 +73,11 @@ def neofox_cli(): epilog=epilog ) parser.add_argument( - "--candidate-file", - dest="candidate_file", - help="input file with neoantigens candidates represented by long mutated peptide sequences", - ) - parser.add_argument( - "--json-file", - dest="json_file", - help="input JSON file with neoantigens candidates represented by long mutated peptide sequences", + "--input-file", + dest="input_file", + help="Input file with neoantigens candidates represented by long mutated peptide sequences. " + "Supported formats: tab-separated columns (extensions: .txt or .tsv) or JSON (extension: .json)", + required=True, ) parser.add_argument( "--patient-data", @@ -98,31 +96,24 @@ def neofox_cli(): default="neofox", ) parser.add_argument( - "--with-table", - dest="with_table", - action="store_true", - help="output results in a short wide tab-separated table " - "(if no format is specified this is the default)", - ) - parser.add_argument( - "--with-json", - dest="with_json", + "--with-all-neoepitopes", + dest="with_all_neoepitopes", action="store_true", - help="output results in JSON format", + help="output annotations for all MHC-I and MHC-II neoepitopes on all HLA alleles", ) parser.add_argument( - "--patient-id", - dest="patient_id", - help="the patient id for the input file. This parameter is only required, " - 'if the column "patient" has not been added to the candidate file', + "--rank-mhci-threshold", + dest="rank_mhci_threshold", + help="MHC-I epitopes with a netMHCpan predicted rank greater than or equal than this threshold will be " + "filtered out (default: {})".format(neofox.RANK_MHCI_THRESHOLD_DEFAULT), + default=neofox.RANK_MHCI_THRESHOLD_DEFAULT ) parser.add_argument( - "--affinity-threshold", - dest="affinity_threshold", - help="neoantigen candidates with a best predicted affinity greater than or equal than this threshold will be " - "not annotated with features that specifically model neoepitope recognition. A threshold that is commonly " - "used is 500 nM", - default=AFFINITY_THRESHOLD_DEFAULT + "--rank-mhcii-threshold", + dest="rank_mhcii_threshold", + help="MHC-II epitopes with a netMHCIIpan predicted rank greater than or equal than this threshold will be " + "filtered out (default: {})".format(neofox.RANK_MHCII_THRESHOLD_DEFAULT), + default=neofox.RANK_MHCII_THRESHOLD_DEFAULT ) parser.add_argument( "--num-cpus", dest="num_cpus", default=1, help="number of CPUs for computation" @@ -139,39 +130,35 @@ def neofox_cli(): help="the organism to which the data corresponds", default="human" ) + parser.add_argument( + "--verbose", + dest="verbose", + action="store_true", + help="verbose logs", + ) args = parser.parse_args() - candidate_file = args.candidate_file - json_file = args.json_file - patient_id = args.patient_id + input_file = args.input_file patients_data = args.patients_data output_folder = args.output_folder output_prefix = args.output_prefix - with_table = args.with_table - with_json = args.with_json - affinity_threshold = int(args.affinity_threshold) + with_all_neoepitopes = args.with_all_neoepitopes + rank_mhci_threshold = float(args.rank_mhci_threshold) + rank_mhcii_threshold = float(args.rank_mhcii_threshold) num_cpus = int(args.num_cpus) config = args.config organism = args.organism - logger.info("NeoFox v{}".format(neofox.VERSION)) - try: - # check parameters - if bool(candidate_file) + bool(json_file) > 1: - raise NeofoxInputParametersException( - "Please, define either a candidate file, a standard input file or a JSON file as input. Not many of them" - ) - if not candidate_file and not json_file: - raise NeofoxInputParametersException( - "Please, define one input file, either a candidate file, a standard input file or a JSON file" - ) - if not with_table and not with_json: - with_table = True # if none specified short wide is the default - # makes sure that the output folder exists os.makedirs(output_folder, exist_ok=True) + # initialise logs + log_file_name = NeoFox.get_log_file_name(work_folder=output_folder, output_prefix=output_prefix) + neofox.neofox.initialise_logs(log_file_name, verbose=args.verbose) + + logger.info("NeoFox v{}".format(neofox.VERSION)) + # loads configuration if config: dotenv.load_dotenv(config, override=True) @@ -179,30 +166,27 @@ def neofox_cli(): # reads the input data neoantigens, patients = _read_data( - candidate_file, - json_file, + input_file, patients_data, - patient_id, reference_folder.get_mhc_database()) # run annotations annotated_neoantigens = NeoFox( neoantigens=neoantigens, patients=patients, - patient_id=patient_id, - work_folder=output_folder, - output_prefix=output_prefix, + log_file_name=log_file_name, num_cpus=num_cpus, reference_folder=reference_folder, - affinity_threshold=affinity_threshold + rank_mhci_threshold=rank_mhci_threshold, + rank_mhcii_threshold=rank_mhcii_threshold, + with_all_neoepitopes=with_all_neoepitopes ).get_annotations() _write_results( - annotated_neoantigens, - output_folder, - output_prefix, - with_json, - with_table, + neoantigens=annotated_neoantigens, + output_folder=output_folder, + output_prefix=output_prefix, + with_all_neoepitopes=with_all_neoepitopes ) except Exception as e: logger.exception(e) # logs every exception in the file @@ -211,36 +195,234 @@ def neofox_cli(): logger.info("Finished NeoFox") -def _read_data( - candidate_file, json_file, patients_data, patient_id, mhc_database: MhcDatabase -) -> Tuple[List[Neoantigen], List[Patient]]: +def _read_data(input_file, patients_data, mhc_database: MhcDatabase) -> Tuple[List[Neoantigen], List[Patient]]: # parse patient data + logger.info("Parsing patients data from: {}".format(patients_data)) patients = ModelConverter.parse_patients_file(patients_data, mhc_database) + logger.info("Loaded {} patients".format(len(patients))) + # parse the neoantigen candidate data - if candidate_file is not None: - neoantigens = ModelConverter.parse_candidate_file( - candidate_file, patient_id - ) + if input_file.endswith('.txt') or input_file.endswith('.tsv'): + logger.info("Parsing candidate neoantigens from: {}".format(input_file)) + neoantigens = ModelConverter.parse_candidate_file(input_file) + logger.info("Loaded {} candidate neoantigens".format(len(neoantigens))) + elif input_file.endswith('.json') : + logger.info("Parsing candidate neoantigens from: {}".format(input_file)) + neoantigens = ModelConverter.parse_neoantigens_json_file(input_file) + logger.info("Loaded {} candidate neoantigens".format(len(neoantigens))) else: - neoantigens = ModelConverter.parse_neoantigens_json_file(json_file) + raise ValueError('Not supported input file extension: {}'.format(input_file)) + + patients_dict : Dict[str, Patient] + patients_dict = {p.identifier: p for p in patients} + + for n in neoantigens: + patient = patients_dict.get(n.patient_identifier) + if not patient.is_rna_available: + # removes RNA vaf if indicated in patient that this information is no good + # iCam legacy + n.rna_variant_allele_frequency = None return neoantigens, patients -def _write_results(neoantigens, output_folder, output_prefix, with_json, with_table): +def _write_results(neoantigens, output_folder, output_prefix, with_all_neoepitopes): # NOTE: this import here is a compromise solution so the help of the command line responds faster from neofox.model.conversion import ModelConverter # writes the output - if with_table: - ModelConverter.annotations2table(neoantigens).to_csv( + ModelConverter.annotations2neoantigens_table(neoantigens).to_csv( + os.path.join( + output_folder, + "{}_neoantigen_candidates_annotated.tsv".format(output_prefix), + ), + sep="\t", + index=False, + ) + + if with_all_neoepitopes: + ModelConverter.annotations2epitopes_table(neoantigens, mhc=neofox.MHC_I).to_csv( + os.path.join( + output_folder, + "{}_mhcI_epitope_candidates_annotated.tsv".format(output_prefix), + ), + sep="\t", + index=False, + ) + ModelConverter.annotations2epitopes_table(neoantigens, mhc=neofox.MHC_II).to_csv( os.path.join( output_folder, - "{}_neoantigen_candidates_annotated.tsv".format(output_prefix), + "{}_mhcII_epitope_candidates_annotated.tsv".format(output_prefix), ), sep="\t", index=False, ) - if with_json: - output_features = os.path.join(output_folder, "{}_neoantigen_candidates_annotated.json".format(output_prefix)) - with open(output_features, "wb") as f: - f.write(json.dumps(ModelConverter.objects2json(neoantigens))) + + output_features = os.path.join(output_folder, "{}_neoantigen_candidates_annotated.json".format(output_prefix)) + with open(output_features, "wb") as f: + f.write(json.dumps(ModelConverter.objects2json(neoantigens))) + + +def neofox_epitope_cli(): + parser = ArgumentParser( + description="NeoFox {} epitope annotates a given set of neoepitope candidates " + "derived from point mutation with relevant immunogenic features".format(neofox.VERSION), + epilog=epilog + ) + parser.add_argument( + "--input-file", + dest="input_file", + help="Input file with neoepitope candidates. " + "Supported formats: tab-separated columns (extensions: .txt or .tsv) or JSON (extension: .json)", + required=True, + ) + parser.add_argument( + "--patient-data", + dest="patients_data", + help="file with data for patients with columns: identifier, estimated_tumor_content, " + "mhc_i_alleles, mhc_ii_alleles, tissue", + ) + parser.add_argument( + "--output-folder", dest="output_folder", help="output folder", required=True, + ) + parser.add_argument( + "--output-prefix", + dest="output_prefix", + help="prefix to name output files in the output folder", + default="neofox", + ) + parser.add_argument( + "--num-cpus", dest="num_cpus", default=1, help="number of CPUs for computation" + ) + parser.add_argument( + "--config", + dest="config", + help="an optional configuration file with all the environment variables", + ) + parser.add_argument( + "--organism", + dest="organism", + choices=[ORGANISM_HOMO_SAPIENS, ORGANISM_MUS_MUSCULUS], + help="the organism to which the data corresponds", + default="human" + ) + parser.add_argument( + "--verbose", + dest="verbose", + action="store_true", + help="verbose logs", + ) + args = parser.parse_args() + + input_file = args.input_file + patients_data = args.patients_data + output_folder = args.output_folder + output_prefix = args.output_prefix + num_cpus = int(args.num_cpus) + config = args.config + organism = args.organism + + try: + # makes sure that the output folder exists + os.makedirs(output_folder, exist_ok=True) + + # initialise logs + log_file_name = NeoFox.get_log_file_name(work_folder=output_folder, output_prefix=output_prefix) + neofox.neofox.initialise_logs(log_file_name, verbose=args.verbose) + + logger.info("NeoFox v{}".format(neofox.VERSION)) + + # loads configuration + if config: + dotenv.load_dotenv(config, override=True) + reference_folder = ReferenceFolder(organism=organism) + + # reads the input data + neoepitopes, patients = _read_data_epitopes( + input_file, + patients_data, + reference_folder.get_mhc_database()) + + # run annotations + annotated_neoepitopes = NeoFoxEpitope( + neoepitopes=neoepitopes, + patients=patients, + log_file_name=log_file_name, + num_cpus=num_cpus, + reference_folder=reference_folder + ).get_annotations() + + _write_results_epitopes( + neoepitopes=annotated_neoepitopes, + output_folder=output_folder, + output_prefix=output_prefix + ) + except Exception as e: + logger.exception(e) # logs every exception in the file + raise e + + logger.info("Finished NeoFox epitopes") + + +def _read_data_epitopes( + input_file, patients_data, mhc_database: MhcDatabase) -> Tuple[List[PredictedEpitope], List[Patient]]: + + # parse patient data + patients = [] + if patients_data is not None: + logger.info("Parsing patients data from: {}".format(patients_data)) + patients = ModelConverter.parse_patients_file(patients_data, mhc_database) + logger.info("Loaded {} patients".format(len(patients))) + + # parse the neoantigen candidate data + if input_file.endswith('.txt') or input_file.endswith('.tsv'): + logger.info("Parsing candidate neoepitopes from: {}".format(input_file)) + neoepitopes = ModelConverter.parse_candidate_neoepitopes_file(input_file, mhc_database) + logger.info("Loaded {} candidate neoepitopes".format(len(neoepitopes))) + # TODO: add support for input in JSON format + #elif input_file.endswith('.json') : + # logger.info("Parsing candidate neoepitopes from: {}".format(input_file)) + # neoepitopes = ModelConverter.parse_neoepitopes_json_file(input_file) + # logger.info("Loaded {} candidate neoepitopes".format(len(neoepitopes))) + else: + raise ValueError('Not supported input file extension: {}'.format(input_file)) + + patients_dict : Dict[str, Patient] + patients_dict = {p.identifier: p for p in patients} + + for n in neoepitopes: + patient = patients_dict.get(n.patient_identifier) + if patient is not None and not patient.is_rna_available: + # removes RNA vaf if indicated in patient that this information is no good + # iCam legacy + n.rna_variant_allele_frequency = None + + return neoepitopes, patients + + +def _write_results_epitopes(neoepitopes: List[PredictedEpitope], output_folder, output_prefix): + # NOTE: this import here is a compromise solution so the help of the command line responds faster + from neofox.model.conversion import ModelConverter + + mhci_neoepitopes = [n for n in neoepitopes if ModelValidator.is_mhci_epitope(n)] + mhcii_neoepitopes = [n for n in neoepitopes if ModelValidator.is_mhcii_epitope(n)] + + ModelConverter.annotated_neoepitopes2epitopes_table(mhci_neoepitopes, mhc=neofox.MHC_I).to_csv( + os.path.join( + output_folder, + "{}_mhcI_epitope_candidates_annotated.tsv".format(output_prefix), + ), + sep="\t", + index=False, + ) + ModelConverter.annotated_neoepitopes2epitopes_table(mhcii_neoepitopes, mhc=neofox.MHC_II).to_csv( + os.path.join( + output_folder, + "{}_mhcII_epitope_candidates_annotated.tsv".format(output_prefix), + ), + sep="\t", + index=False, + ) + + output_features = os.path.join(output_folder, "{}_neoepitope_candidates_annotated.json".format(output_prefix)) + with open(output_features, "wb") as f: + f.write(json.dumps(ModelConverter.objects2json(neoepitopes))) diff --git a/neofox/helpers/blastp_runner.py b/neofox/helpers/blastp_runner.py index ac1bb05b..d015f424 100755 --- a/neofox/helpers/blastp_runner.py +++ b/neofox/helpers/blastp_runner.py @@ -33,6 +33,7 @@ def __init__(self, runner: Runner, configuration: DependenciesConfiguration, dat self.runner = runner self.configuration = configuration self.database = database + self.cache_homologous_epitopes = {} def calculate_similarity_database(self, peptide, a=26) -> float: """ @@ -71,26 +72,30 @@ def calculate_similarity_database(self, peptide, a=26) -> float: return similarity_score def get_most_similar_wt_epitope(self, peptide): - cmd = [ - self.configuration.blastp, - "-outfmt", - "15", - "-db", - self.database, - "-evalue", - "100000000", - "-qcov_hsp_perc", - "100", - "-comp_based_stats F", - "-num_alignments 1", - "-ungapped" - ] + if peptide not in self.cache_homologous_epitopes: + cmd = [ + self.configuration.blastp, + "-outfmt", + "15", + "-db", + self.database, + "-evalue", + "100000000", + "-qcov_hsp_perc", + "100", + "-comp_based_stats F", + "-num_alignments 1", + "-ungapped" + ] - hits = self._run_blastp(cmd=cmd, peptide=peptide, print_log=True) - wt_peptide = None - if hits is not None and len(hits) > 0: - best_hit = hits[0] - wt_peptide = best_hit.get("hsps")[0].get("hseq") + hits = self._run_blastp(cmd=cmd, peptide=peptide, print_log=True) + wt_peptide = None + if hits is not None and len(hits) > 0: + best_hit = hits[0] + wt_peptide = best_hit.get("hsps")[0].get("hseq") + self.cache_homologous_epitopes[peptide] = wt_peptide + else: + wt_peptide = self.cache_homologous_epitopes.get(peptide) return wt_peptide def _run_blastp(self, cmd, peptide, print_log=True): diff --git a/neofox/helpers/epitope_helper.py b/neofox/helpers/epitope_helper.py index 2fb431a4..ed38d991 100755 --- a/neofox/helpers/epitope_helper.py +++ b/neofox/helpers/epitope_helper.py @@ -20,65 +20,43 @@ from Bio.Data import IUPACData -from neofox.model.neoantigen import Mutation +from neofox.helpers.blastp_runner import BlastpRunner +from neofox.model.neoantigen import PredictedEpitope, MhcAllele, Mhc2Isoform, Annotation, Annotations, \ + Neoantigen class EpitopeHelper(object): @staticmethod - def generate_nmers(mutation: Mutation, lengths, uniprot): + def generate_nmers(neoantigen: Neoantigen, lengths, uniprot): """ Generates peptides covering mutation of all lengths that are provided. Returns peptides as list No peptide is shorter than the minimun length provided There are no repetitions in the results """ - length_mut = len(mutation.mutated_xmer) + length_mut = len(neoantigen.mutated_xmer) list_peptides = set() for length in lengths: if length <= length_mut: starts = range(length_mut - length + 1) ends = [s + length for s in starts] for s, e in zip(starts, ends): - peptide = mutation.mutated_xmer[s:e] + peptide = neoantigen.mutated_xmer[s:e] if len(peptide) == length and uniprot.is_sequence_not_in_uniprot(peptide): list_peptides.add(peptide) return list(list_peptides) @staticmethod - def mut_position_xmer_seq(mutation: Mutation) -> List[int]: - """ - returns position (1-based) of mutation in xmer sequence. There can be more than one SNV within Xmer sequence. - """ - # TODO: this is not efficient. A solution using zip is 25% faster. There may be other alternatives - pos_mut = [] - if mutation.wild_type_xmer is not None and mutation.mutated_xmer is not None: - if len(mutation.wild_type_xmer) == len(mutation.mutated_xmer): - p1 = -1 - for i, aa in enumerate(mutation.mutated_xmer): - if aa != mutation.wild_type_xmer[i]: - p1 = i + 1 - pos_mut.append(p1) - else: - p1 = 0 - # in case sequences do not have same length - for a1, a2 in zip(mutation.wild_type_xmer, mutation.mutated_xmer): - if a1 == a2: - p1 += 1 - elif a1 != a2: - p1 += 1 - pos_mut.append(p1) - return pos_mut - - @staticmethod - def position_of_mutation_epitope(wild_type, mutation) -> int: + def position_of_mutation_epitope(epitope: PredictedEpitope) -> int: """ This function determines the position of the mutation within the epitope sequence. + When multiple mutations are present it returns the last position """ # TODO: is this efficient? No, a solution with zip is around 25% faster, maybe something else is even faster position = -1 try: - for i, aa in enumerate(mutation): - if aa != wild_type[i]: + for i, aa in enumerate(epitope.mutated_peptide): + if aa != epitope.wild_type_peptide[i]: position = i + 1 except Exception: position = None @@ -133,3 +111,127 @@ def contains_rare_amino_acid(peptide): found_rare_amino_acid = True return found_rare_amino_acid return found_rare_amino_acid + + @staticmethod + def pair_predictions(predictions, predictions_wt) -> List[PredictedEpitope]: + for prediction in predictions: + for prediction_wt in predictions_wt: + if len(prediction_wt.mutated_peptide) == len(prediction.mutated_peptide) and \ + prediction.position == prediction_wt.position and \ + prediction.allele_mhc_i.name == prediction_wt.allele_mhc_i.name: + prediction.wild_type_peptide = prediction_wt.mutated_peptide + prediction.rank_wild_type = prediction_wt.rank_mutated + prediction.affinity_wild_type = prediction_wt.affinity_mutated + break + return predictions + + @staticmethod + def pair_mhcii_predictions(predictions, predictions_wt) -> List[PredictedEpitope]: + for prediction in predictions: + for prediction_wt in predictions_wt: + if len(prediction_wt.mutated_peptide) == len(prediction.mutated_peptide) and \ + prediction.position == prediction_wt.position and \ + prediction.isoform_mhc_i_i.name == prediction_wt.isoform_mhc_i_i.name: + prediction.wild_type_peptide = prediction_wt.mutated_peptide + prediction.rank_wild_type = prediction_wt.rank_mutated + prediction.affinity_wild_type = prediction_wt.affinity_mutated + break + return predictions + + @staticmethod + def get_empty_epitope(): + return PredictedEpitope( + mutated_peptide=None, + position=None, + allele_mhc_i=MhcAllele(name=None), + isoform_mhc_i_i=Mhc2Isoform(name=None), + affinity_mutated=None, + rank_mutated=None, + ) + + @staticmethod + def select_best_by_rank(predictions: List[PredictedEpitope]) -> PredictedEpitope: + """ + Returns the peptide with the lowest (ie: meaning highest) rank and in case of tie first peptide on + alphabetical order to ensure determinism + """ + return max(predictions, key=lambda p: (-p.rank_mutated, p.mutated_peptide)) \ + if predictions is not None and len(predictions) > 0 else EpitopeHelper.get_empty_epitope() + + @staticmethod + def select_best_by_affinity(predictions: List[PredictedEpitope], maximum=False) -> PredictedEpitope: + """ + Returns the peptide with the highest affinity score and in case of tie first peptide on + alphabetical order to ensure determinism + By default the highest affinity score is the lowest (ie: netmhc family) if maximum=True then the highest + score is the highest (ie: mixmhcpred and PRIME) + """ + if maximum: + return max(predictions, key=lambda p: (p.affinity_mutated, p.mutated_peptide)) \ + if predictions is not None and len(predictions) > 0 else EpitopeHelper.get_empty_epitope() + else: + return max(predictions, key=lambda p: (-p.affinity_mutated, p.mutated_peptide)) \ + if predictions is not None and len(predictions) > 0 else EpitopeHelper.get_empty_epitope() + + @staticmethod + def remove_peptides_in_proteome(predictions: List[PredictedEpitope], uniprot + ) -> List[PredictedEpitope]: + """filters prediction file for predicted epitopes that cover mutations by searching for epitope + in uniprot proteome database with an exact match search""" + return list( + filter( + lambda p: uniprot.is_sequence_not_in_uniprot( + p.mutated_peptide + ), + predictions, + ) + ) + + @staticmethod + def filter_for_9mers(predictions: List[PredictedEpitope]) -> List[PredictedEpitope]: + """returns only predicted 9mers""" + return list(filter(lambda p: len(p.mutated_peptide) == 9, predictions)) + + @staticmethod + def filter_peptides_covering_snv( + position_of_mutation, predictions: List[PredictedEpitope]) -> List[PredictedEpitope]: + """filters prediction file for predicted epitopes that cover mutations""" + return list( + filter( + lambda p: EpitopeHelper.epitope_covers_mutation( + position_of_mutation, p.position, len(p.mutated_peptide) + ), + predictions, + ) + ) + + @staticmethod + def set_wt_epitope_by_homology(predictions: List[PredictedEpitope], blastp_runner: BlastpRunner) -> List[PredictedEpitope]: + """returns wt epitope for each neoepitope candidate of a neoantigen candidate from an alternative mutation + class by a BLAST search.""" + + for p in predictions: + p.wild_type_peptide = blastp_runner.get_most_similar_wt_epitope(p.mutated_peptide) + return predictions + + @staticmethod + def get_epitope_id(epitope): + if epitope.allele_mhc_i is not None: + return "{}-{}".format(epitope.allele_mhc_i.name, epitope.mutated_peptide) + elif epitope.isoform_mhc_i_i is not None: + return "{}-{}".format(epitope.isoform_mhc_i_i.name, epitope.mutated_peptide) + else: + raise ValueError('Cannot build id on an epitope without HLA allele or isoform') + + @staticmethod + def get_annotation_by_name(annotations: List[Annotation], name: str) -> str: + result = None + found = False + for a in annotations: + if a.name == name: + result = a.value + found = True + break + if not found: + raise ValueError("Expected annotation '{}' not found".format(name)) + return result diff --git a/neofox/helpers/mhc_helper.py b/neofox/helpers/mhc_helper.py new file mode 100644 index 00000000..edd1c090 --- /dev/null +++ b/neofox/helpers/mhc_helper.py @@ -0,0 +1,31 @@ +from typing import List +from neofox.model.neoantigen import Mhc1, Zygosity + + +class MhcHelper: + + @staticmethod + def get_homozygous_mhc1_alleles(mhc_isoforms: List[Mhc1]) -> List[str]: + """ + Returns alleles that occur more than one time in list of patient alleles and hence are homozygous alleles. + Otherwise retunrs empty list + """ + return [ + a.name + for m in mhc_isoforms + for a in m.alleles + if m.zygosity == Zygosity.HOMOZYGOUS + ] + + @staticmethod + def get_heterozygous_or_hemizygous_mhc1_alleles(mhc_isoforms: List[Mhc1]) -> List[str]: + """ + Returns alleles that occur more than one time in list of patient alleles and hence are homozygous alleles. + Otherwise retunrs empty list + """ + return [ + a.name + for m in mhc_isoforms + for a in m.alleles + if m.zygosity in [Zygosity.HETEROZYGOUS, Zygosity.HEMIZYGOUS] + ] diff --git a/neofox/model/conversion.py b/neofox/model/conversion.py index da7c593c..f57f6c56 100755 --- a/neofox/model/conversion.py +++ b/neofox/model/conversion.py @@ -20,34 +20,26 @@ import pandas as pd import betterproto from betterproto import Casing -from neofox import NOT_AVAILABLE_VALUE +from neofox import NOT_AVAILABLE_VALUE, MHC_II, MHC_I from collections import defaultdict import orjson as json import numpy as np +from neofox.model.mhc_parser import MhcParser from neofox.model.neoantigen import ( Neoantigen, Patient, - Annotation, + PredictedEpitope, ) from neofox.model.factories import PatientFactory, NeoantigenFactory from neofox.references.references import MhcDatabase -FIELD_VAF_DNA = "VAF_in_tumor" -FIELD_VAF_RNA = "VAF_in_RNA" -FIELD_TRANSCRIPT_EXPRESSION = "transcript_expression" -FIELD_GENE = "gene" -FIELD_WILD_TYPE_XMER = "[WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL)" -FIELD_MUTATED_XMER = "+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL)" - class ModelConverter(object): @staticmethod - def parse_candidate_file(candidate_file: str, patient_id: str = None) -> List[Neoantigen]: + def parse_candidate_file(candidate_file: str) -> List[Neoantigen]: """ :param candidate_file: the path to an neoantigen candidate input file - :param patient_id: the patient identifier for all neoantigens in the input file, if not provided it is - expected as column named `patient.id` or `patient` :return neoantigens in model objects """ data = pd.read_csv( @@ -55,8 +47,8 @@ def parse_candidate_file(candidate_file: str, patient_id: str = None) -> List[Ne # NOTE: forces the types of every column to avoid pandas setting the wrong type for corner cases dtype={ "gene": str, - "mutation.wildTypeXmer": str, - "mutation.mutatedXmer": str, + "wildTypeXmer": str, + "mutatedXmer": str, "patientIdentifier": str, "dnaVariantAlleleFrequency": float, "rnaExpression": float, @@ -64,29 +56,36 @@ def parse_candidate_file(candidate_file: str, patient_id: str = None) -> List[Ne } ) - # check format of input file - if FIELD_MUTATED_XMER in data.columns.values.tolist(): - # NOTE: this is the support for the iCaM format - data = data.replace({np.nan: None}) - neoantigens = [] - for _, candidate_entry in data.iterrows(): - neoantigen = ModelConverter._candidate_entry2model( - candidate_entry, patient_id=patient_id - ) - neoantigen.external_annotations = [ - # NOTE: we need to exclude the field gene from the external annotations as it matches a field - # in the model and thus it causes a conflict when both are renamed to gene_x and gene_y when - # joining - Annotation(name=name, value=str(value)) for name, value - in candidate_entry.iteritems() if name != FIELD_GENE - ] - neoantigens.append(neoantigen) - else: - # NOTE: this is the support for the NeoFox format - data = data.replace({np.nan: None}) - neoantigens = ModelConverter._neoantigens_csv2objects(data) + # NOTE: this is the support for the NeoFox format + data = data.replace({np.nan: None}) + neoantigens = ModelConverter._neoantigens_csv2objects(data) + return neoantigens + @staticmethod + def parse_candidate_neoepitopes_file(candidate_file: str, mhc_database: MhcDatabase) -> List[PredictedEpitope]: + data = pd.read_csv( + candidate_file, sep="\t", + # NOTE: forces the types of every column to avoid pandas setting the wrong type for corner cases + dtype={ + "gene": str, + "mutatedPeptide": str, + "wildTypePeptide": str, + "dnaVariantAlleleFrequency": float, + "rnaExpression": float, + "rnaVariantAlleleFrequency": float, + "patientIdentifier": str, + "alleleMhcI": str, + "isoformMhcII": str, + } + ) + + # NOTE: this is the support for the NeoFox format + data = data.replace({np.nan: None}) + neoepitopes = ModelConverter._neoepitopes_csv2objects(data, mhc_database) + return neoepitopes + + @staticmethod def parse_patients_file(patients_file: str, mhc_database: MhcDatabase) -> List[Patient]: """ @@ -114,8 +113,8 @@ def parse_patients_file(patients_file: str, mhc_database: MhcDatabase) -> List[P identifier=patient_dict.get("identifier"), is_rna_available=patient_dict.get("isRnaAvailable", False), tumor_type=patient_dict.get("tumorType"), - mhc_alleles=patient_dict["mhcIAlleles"], - mhc2_alleles=patient_dict["mhcIIAlleles"], + mhc_alleles=patient_dict.get("mhcIAlleles", []), + mhc2_alleles=patient_dict.get("mhcIIAlleles", []), mhc_database=mhc_database ) patients.append(patient) @@ -143,7 +142,7 @@ def objects2json(model_objects: List[betterproto.Message]): return [o.to_dict(casing=Casing.SNAKE) for o in model_objects] @staticmethod - def annotations2table(neoantigens: List[Neoantigen]) -> pd.DataFrame: + def annotations2neoantigens_table(neoantigens: List[Neoantigen]) -> pd.DataFrame: dfs = [] neoantigens_df = ModelConverter._neoantigens2table(neoantigens) neoantigens_df.replace({None: NOT_AVAILABLE_VALUE}, inplace=True) @@ -151,9 +150,9 @@ def annotations2table(neoantigens: List[Neoantigen]) -> pd.DataFrame: neoantigens_df = neoantigens_df.loc[:, ["patientIdentifier", "gene", - "mutation.mutatedXmer", - "mutation.wildTypeXmer", - "mutation.position", + "mutatedXmer", + "wildTypeXmer", + "position", "dnaVariantAlleleFrequency", "rnaVariantAlleleFrequency", "rnaExpression", @@ -175,6 +174,85 @@ def annotations2table(neoantigens: List[Neoantigen]) -> pd.DataFrame: df.replace('None', NOT_AVAILABLE_VALUE, inplace=True) return df + @staticmethod + def annotations2epitopes_table(neoantigens: List[Neoantigen], mhc: str) -> pd.DataFrame: + + assert(mhc in [MHC_I, MHC_II], 'Bad MHC value') + + epitopes_dfs = [] + for n in neoantigens: + # parses epitopes from a neoantigen into a data frame + patient_identifier = n.patient_identifier + epitopes = n.neoepitopes_mhc_i if mhc == MHC_I else n.neoepitopes_mhc_i_i + epitopes_temp_df = ModelConverter._objects2dataframe(epitopes) + epitopes_temp_df['patient_identifier'] = patient_identifier + + # adapts output table depending on MHC type + if mhc == MHC_I: + epitopes_temp_df.drop(list(epitopes_temp_df.filter(regex='isoformMhcII.*')), axis=1, inplace=True) + else: + epitopes_temp_df.drop(list(epitopes_temp_df.filter(regex='alleleMhcI.*')), axis=1, inplace=True) + + # annotations need a custom parsing, thus we remove these columns + epitopes_temp_df.drop(list(epitopes_temp_df.filter(regex='neofoxAnnotations.*')), axis=1, inplace=True) + + # parses the annotations from each of the epitopes into a data frame + annotations_dfs = [] + for e in epitopes: + annotations = [a.to_dict() for a in e.neofox_annotations.annotations] + annotations_temp_df = (pd.DataFrame(annotations).set_index("name").transpose()) + annotations_dfs.append(annotations_temp_df) + if len(annotations_dfs) > 0: + annotations_df = pd.concat(annotations_dfs, sort=True).reset_index() + del annotations_df["index"] + + # puts together both data frames + epitopes_temp_df = pd.concat([epitopes_temp_df, annotations_df], axis=1) + + epitopes_temp_df.replace({None: NOT_AVAILABLE_VALUE}, inplace=True) + epitopes_dfs.append(epitopes_temp_df) + + # concatenates all together + epitopes_df = pd.concat(epitopes_dfs) + + return epitopes_df + + @staticmethod + def annotated_neoepitopes2epitopes_table(neoepitopes: List[PredictedEpitope], mhc: str) -> pd.DataFrame: + + assert (mhc in [MHC_I, MHC_II], 'Bad MHC value') + + epitopes_df = ModelConverter._objects2dataframe(neoepitopes) + + # adapts output table depending on MHC type + if mhc == MHC_I: + epitopes_df.drop(list(epitopes_df.filter(regex='isoformMhcII.*')), axis=1, inplace=True) + else: + epitopes_df.drop(list(epitopes_df.filter(regex='alleleMhcI.*')), axis=1, inplace=True) + + # formats annotation columns + epitopes_df.drop(list(epitopes_df.filter(regex='neofoxAnnotations.*')), axis=1, inplace=True) + # the position is used to pair neoepitopes coming out of netMHCpan in neoantigen mode, not of any use here + epitopes_df.drop(["position"], axis=1, inplace=True) + + # parses the annotations from each of the epitopes into a data frame + annotations_dfs = [] + for e in neoepitopes: + annotations = [a.to_dict() for a in e.neofox_annotations.annotations] + annotations_temp_df = (pd.DataFrame(annotations).set_index("name").transpose()) + annotations_dfs.append(annotations_temp_df) + if len(annotations_dfs) > 0: + annotations_df = pd.concat(annotations_dfs, sort=True).reset_index() + del annotations_df["index"] + + # puts together both data frames + epitopes_df = pd.concat([epitopes_df, annotations_df], axis=1) + + # replace None by NA + epitopes_df.replace({None: NOT_AVAILABLE_VALUE}, inplace=True) + + return epitopes_df + @staticmethod def patients2table(patients: List[Patient]) -> pd.DataFrame: @@ -198,31 +276,17 @@ def _objects2dataframe(model_objects: List[betterproto.Message]) -> pd.DataFrame data=[n.to_dict(include_default_values=True) for n in model_objects] ) - @staticmethod - def _candidate_entry2model(candidate_entry: dict, patient_id: str) -> Neoantigen: - """parses an row from a candidate file into a model object""" - - vaf_rna_raw = candidate_entry.get(FIELD_TRANSCRIPT_EXPRESSION) - return NeoantigenFactory.build_neoantigen( - wild_type_xmer=candidate_entry.get(FIELD_WILD_TYPE_XMER), - mutated_xmer=candidate_entry.get(FIELD_MUTATED_XMER), - patient_identifier=patient_id if patient_id else candidate_entry.get("patient"), - gene=candidate_entry.get(FIELD_GENE), - rna_expression=vaf_rna_raw if vaf_rna_raw is not None and vaf_rna_raw >= 0 else None, - rna_variant_allele_frequency=candidate_entry.get(FIELD_VAF_RNA), - dna_variant_allele_frequency=candidate_entry.get(FIELD_VAF_DNA) - ) - @staticmethod def _neoantigens_csv2objects(dataframe: pd.DataFrame) -> List[Neoantigen]: """transforms an patients CSV into a list of objects""" neoantigens = [] for _, row in dataframe.iterrows(): - nested_dict = ModelConverter._flat_dict2nested_dict(flat_dict=row.to_dict()) + neoantigen_dict = row.to_dict() # build the external annotations from anything not from the model - external_annotations = nested_dict.copy() - external_annotations.pop("mutation", None) + external_annotations = neoantigen_dict.copy() + external_annotations.pop("wildTypeXmer", None) + external_annotations.pop("mutatedXmer", None) external_annotations.pop("patientIdentifier", None) external_annotations.pop("gene", None) external_annotations.pop("rnaExpression", None) @@ -230,14 +294,14 @@ def _neoantigens_csv2objects(dataframe: pd.DataFrame) -> List[Neoantigen]: external_annotations.pop("dnaVariantAlleleFrequency", None) neoantigen = NeoantigenFactory.build_neoantigen( - wild_type_xmer=nested_dict.get("mutation", {}).get("wildTypeXmer"), - mutated_xmer=nested_dict.get("mutation", {}).get("mutatedXmer"), - patient_identifier=nested_dict.get("patientIdentifier"), - gene=nested_dict.get("gene"), - rna_expression=nested_dict.get("rnaExpression"), - rna_variant_allele_frequency=nested_dict.get("rnaVariantAlleleFrequency"), - dna_variant_allele_frequency=nested_dict.get("dnaVariantAlleleFrequency"), - imputed_gene_expression=nested_dict.get("imputedGeneExpression"), + wild_type_xmer=neoantigen_dict.get("wildTypeXmer"), + mutated_xmer=neoantigen_dict.get("mutatedXmer"), + patient_identifier=neoantigen_dict.get("patientIdentifier"), + gene=neoantigen_dict.get("gene"), + rna_expression=neoantigen_dict.get("rnaExpression"), + rna_variant_allele_frequency=neoantigen_dict.get("rnaVariantAlleleFrequency"), + dna_variant_allele_frequency=neoantigen_dict.get("dnaVariantAlleleFrequency"), + imputed_gene_expression=neoantigen_dict.get("imputedGeneExpression"), **external_annotations ) neoantigens.append(neoantigen) @@ -245,25 +309,78 @@ def _neoantigens_csv2objects(dataframe: pd.DataFrame) -> List[Neoantigen]: return neoantigens @staticmethod - def _neoantigens2table(neoantigens: List[Neoantigen]) -> pd.DataFrame: - df = ModelConverter._objects2dataframe(neoantigens) - df["mutation.position"] = df["mutation.position"].transform( - lambda x: ",".join([str(y) for y in x]) if x is not None else x) - return df + def _neoepitopes_csv2objects(dataframe: pd.DataFrame, mhc_database: MhcDatabase) -> List[PredictedEpitope]: + """transforms an patients CSV into a list of objects""" + neoepitopes = [] + mhc_parser = MhcParser.get_mhc_parser(mhc_database) + for _, row in dataframe.iterrows(): + neoepitope_dict = row.to_dict() - @staticmethod - def _flat_dict2nested_dict(flat_dict: dict) -> dict: - """transforms a flattened dict into a nested dict, assuming that the dot indicates a nested level""" - nested_dict = defaultdict(lambda: {}) - for k, v in flat_dict.items(): - splitted_k = k.split(".") - if len(splitted_k) > 2: - raise NotImplementedError( - "Support for dictionaries nested more than one level is not implemented" + # build the external annotations from anything not from the model + external_annotations = neoepitope_dict.copy() + external_annotations.pop("mutatedPeptide", None) + external_annotations.pop("wildTypePeptide", None) + external_annotations.pop("affinityMutated", None) + external_annotations.pop("rankMutated", None) + external_annotations.pop("affinityWildType", None) + external_annotations.pop("rankWildType", None) + external_annotations.pop("alleleMhcI", None) + external_annotations.pop("alleleMhcII", None) + external_annotations.pop("position", None) + external_annotations.pop("patientIdentifier", None) + external_annotations.pop("gene", None) + external_annotations.pop("rnaExpression", None) + external_annotations.pop("rnaVariantAlleleFrequency", None) + external_annotations.pop("dnaVariantAlleleFrequency", None) + + mhci_allele = neoepitope_dict.get("alleleMhcI") + mhcii_isoform = neoepitope_dict.get("isoformMhcII") + patient_id = neoepitope_dict.get("patientIdentifier") + if mhci_allele is not None and mhci_allele != '': + neoepitope = PredictedEpitope( + mutated_peptide=neoepitope_dict.get("mutatedPeptide"), + wild_type_peptide=neoepitope_dict.get("wildTypePeptide"), + patient_identifier=patient_id, + allele_mhc_i=mhc_parser.parse_mhc_allele(mhci_allele), + gene=neoepitope_dict.get("gene"), + rna_expression=neoepitope_dict.get("rnaExpression"), + rna_variant_allele_frequency=neoepitope_dict.get("rnaVariantAlleleFrequency"), + dna_variant_allele_frequency=neoepitope_dict.get("dnaVariantAlleleFrequency"), + imputed_gene_expression=neoepitope_dict.get("imputedGeneExpression"), + ) + elif mhcii_isoform is not None and mhcii_isoform != '': + neoepitope = PredictedEpitope( + mutated_peptide=neoepitope_dict.get("mutatedPeptide"), + wild_type_peptide=neoepitope_dict.get("wildTypePeptide"), + patient_identifier=patient_id, + isoform_mhc_i_i=mhc_parser.parse_mhc2_isoform(mhcii_isoform), + gene=neoepitope_dict.get("gene"), + rna_expression=neoepitope_dict.get("rnaExpression"), + rna_variant_allele_frequency=neoepitope_dict.get("rnaVariantAlleleFrequency"), + dna_variant_allele_frequency=neoepitope_dict.get("dnaVariantAlleleFrequency"), + imputed_gene_expression=neoepitope_dict.get("imputedGeneExpression"), + ) + elif patient_id is not None and patient_id != '': + neoepitope = PredictedEpitope( + mutated_peptide=neoepitope_dict.get("mutatedPeptide"), + wild_type_peptide=neoepitope_dict.get("wildTypePeptide"), + patient_identifier=patient_id, + gene=neoepitope_dict.get("gene"), + rna_expression=neoepitope_dict.get("rnaExpression"), + rna_variant_allele_frequency=neoepitope_dict.get("rnaVariantAlleleFrequency"), + dna_variant_allele_frequency=neoepitope_dict.get("dnaVariantAlleleFrequency"), + imputed_gene_expression=neoepitope_dict.get("imputedGeneExpression"), ) - if len(splitted_k) == 2: - nested_dict[splitted_k[0]][splitted_k[1]] = v else: - nested_dict[k] = v - return dict(nested_dict) + raise ValueError( + "Found an epitope without MHC-I allele, MHC-II isoform or patiend identifier: {}".format( + neoepitope_dict)) + neoepitopes.append(neoepitope) + + return neoepitopes + @staticmethod + def _neoantigens2table(neoantigens: List[Neoantigen]) -> pd.DataFrame: + df = ModelConverter._objects2dataframe(neoantigens) + df["position"] = df["position"].transform(lambda x: ",".join([str(y) for y in x]) if x is not None else x) + return df diff --git a/neofox/model/factories.py b/neofox/model/factories.py index b5fd9394..b1da0522 100755 --- a/neofox/model/factories.py +++ b/neofox/model/factories.py @@ -23,7 +23,7 @@ from neofox.helpers.epitope_helper import EpitopeHelper from neofox.model.mhc_parser import MhcParser, get_mhc2_isoform_name from neofox.model.neoantigen import Annotation, Patient, Mhc1, Zygosity, Mhc2, Mhc2Gene, Mhc2Name, Mhc2Isoform, \ - MhcAllele, Mhc2GeneName, Mhc1Name, Mutation, Neoantigen + MhcAllele, Mhc2GeneName, Neoantigen, PredictedEpitope, Annotations from neofox.model.validation import ModelValidator, GENES_BY_MOLECULE from neofox.references.references import MhcDatabase @@ -43,6 +43,48 @@ def build_annotation(name, value): value = NOT_AVAILABLE_VALUE return Annotation(name=name, value=value) + @staticmethod + def annotate_epitopes_with_other_scores( + epitopes: List[PredictedEpitope], + annotated_epitopes: List[PredictedEpitope], + annotation_name: str) -> List[PredictedEpitope]: + + merged_epitopes = [] + if annotated_epitopes is not None: + annotated_epitopes_dict = {EpitopeHelper.get_epitope_id(e): e for e in annotated_epitopes} + for e in epitopes: + + # intialise annotations for the epitope if not done already + if e.neofox_annotations is None: + e.neofox_annotations = Annotations(annotations=[]) + + # adds new annotations if any + paired_epitope = annotated_epitopes_dict.get(EpitopeHelper.get_epitope_id(e)) + AnnotationFactory.annotate_epitope( + annotation_name=annotation_name, epitope=e, paired_epitope=paired_epitope) + + # updates epitope + merged_epitopes.append(e) + else: + # if there are no results to annotate with it returns the input list as is + merged_epitopes = epitopes + + return merged_epitopes + + @staticmethod + def annotate_epitope(annotation_name: str, epitope: PredictedEpitope, paired_epitope: PredictedEpitope) -> \ + PredictedEpitope: + if paired_epitope is not None: + if paired_epitope.affinity_mutated is not None: + epitope.neofox_annotations.annotations.append( + AnnotationFactory.build_annotation( + name=annotation_name + '_score', value=paired_epitope.affinity_mutated)) + if paired_epitope.rank_mutated is not None: + epitope.neofox_annotations.annotations.append( + AnnotationFactory.build_annotation( + name=annotation_name + '_rank', value=paired_epitope.rank_mutated)) + return epitope + class NeoantigenFactory(object): @staticmethod @@ -57,12 +99,9 @@ def build_neoantigen(wild_type_xmer=None, mutated_xmer=None, patient_identifier= neoantigen.rna_variant_allele_frequency = rna_variant_allele_frequency neoantigen.dna_variant_allele_frequency = dna_variant_allele_frequency neoantigen.imputed_gene_expression = imputed_gene_expression - - mutation = Mutation() - mutation.wild_type_xmer = wild_type_xmer - mutation.mutated_xmer = mutated_xmer - mutation.position = EpitopeHelper.mut_position_xmer_seq(mutation) - neoantigen.mutation = mutation + neoantigen.wild_type_xmer = wild_type_xmer.strip().upper() if wild_type_xmer else wild_type_xmer + neoantigen.mutated_xmer = mutated_xmer.strip().upper() if mutated_xmer else mutated_xmer + neoantigen.position = NeoantigenFactory.mut_position_xmer_seq(neoantigen) external_annotation_names = dict.fromkeys( nam for nam in kw.keys() if stringcase.snakecase(nam) not in set(Neoantigen.__annotations__.keys())) @@ -73,11 +112,69 @@ def build_neoantigen(wild_type_xmer=None, mutated_xmer=None, patient_identifier= return neoantigen + @staticmethod + def mut_position_xmer_seq(neoantigen: Neoantigen) -> List[int]: + """ + returns position (1-based) of mutation in xmer sequence. There can be more than one SNV within Xmer sequence. + """ + # TODO: this is not efficient. A solution using zip is 25% faster. There may be other alternatives + pos_mut = [] + if neoantigen.wild_type_xmer is not None and neoantigen.mutated_xmer is not None: + if len(neoantigen.wild_type_xmer) == len(neoantigen.mutated_xmer): + p1 = -1 + for i, aa in enumerate(neoantigen.mutated_xmer): + if aa != neoantigen.wild_type_xmer[i]: + p1 = i + 1 + pos_mut.append(p1) + else: + p1 = 0 + # in case sequences do not have same length + for a1, a2 in zip(neoantigen.wild_type_xmer, neoantigen.mutated_xmer): + if a1 == a2: + p1 += 1 + elif a1 != a2: + p1 += 1 + pos_mut.append(p1) + return pos_mut + + +class NeoepitopeFactory(object): + + @staticmethod + def build_neoepitope(mutated_peptide=None, wild_type_peptide=None, patient_identifier=None, gene=None, + rna_expression=None, rna_variant_allele_frequency=None, dna_variant_allele_frequency=None, + imputed_gene_expression=None, allele_mhc_i=None, isoform_mhc_i_i=None, organism=None, + mhc_database: MhcDatabase = None, **kw): + + neoepitope = PredictedEpitope() + neoepitope.patient_identifier = patient_identifier + neoepitope.gene = gene + neoepitope.rna_expression = rna_expression + neoepitope.rna_variant_allele_frequency = rna_variant_allele_frequency + neoepitope.dna_variant_allele_frequency = dna_variant_allele_frequency + neoepitope.imputed_gene_expression = imputed_gene_expression + neoepitope.mutated_peptide = mutated_peptide.strip().upper() if mutated_peptide else mutated_peptide + neoepitope.wild_type_peptide = wild_type_peptide.strip().upper() if wild_type_peptide else wild_type_peptide + + # parse MHC alleles and isoforms + mhc_parser = MhcParser.get_mhc_parser(mhc_database) + neoepitope.allele_mhc_i = mhc_parser.parse_mhc_allele(allele_mhc_i) if allele_mhc_i else None + neoepitope.isoform_mhc_i_i = mhc_parser.parse_mhc2_isoform(isoform_mhc_i_i) if isoform_mhc_i_i else None + + external_annotation_names = dict.fromkeys( + nam for nam in kw.keys() if stringcase.snakecase(nam) not in set(Neoantigen.__annotations__.keys())) + neoepitope.external_annotations = [ + Annotation(name=name, value=str(kw.get(name))) for name in external_annotation_names] + + ModelValidator.validate_neoepitope(neoepitope, organism=organism) + + return neoepitope + class PatientFactory(object): @staticmethod - def build_patient(identifier, is_rna_available=False, tumor_type=None, mhc_alleles: List = [], - mhc2_alleles: List = [], mhc_database: MhcDatabase =None): + def build_patient(identifier, is_rna_available=False, tumor_type=None, mhc_alleles: List[str] = [], + mhc2_alleles: List[str] = [], mhc_database: MhcDatabase =None): patient = Patient( identifier=identifier, is_rna_available=is_rna_available, diff --git a/neofox/model/models.md b/neofox/model/models.md new file mode 100644 index 00000000..6f257bc6 --- /dev/null +++ b/neofox/model/models.md @@ -0,0 +1,353 @@ +# Data models + + +Protocol Buffers is employed to model Neofox's input and output data: neoantigens, Major Histocompatibility Complex (MHC) alleles, patients and output annotations. + +![Neofox model](../figures/neofox_model.png) + +## Table of Contents + +- [neoantigen.proto](#neoantigen.proto) + - [Annotation](#neoantigen.Annotation) + - [Annotations](#neoantigen.Annotations) + - [Mhc1](#neoantigen.Mhc1) + - [Mhc2](#neoantigen.Mhc2) + - [Mhc2Gene](#neoantigen.Mhc2Gene) + - [Mhc2Isoform](#neoantigen.Mhc2Isoform) + - [MhcAllele](#neoantigen.MhcAllele) + - [Neoantigen](#neoantigen.Neoantigen) + - [Patient](#neoantigen.Patient) + - [PredictedEpitope](#neoantigen.PredictedEpitope) + - [Resource](#neoantigen.Resource) + + - [Mhc1Name](#neoantigen.Mhc1Name) + - [Mhc2GeneName](#neoantigen.Mhc2GeneName) + - [Mhc2Name](#neoantigen.Mhc2Name) + - [Zygosity](#neoantigen.Zygosity) + +- [Scalar Value Types](#scalar-value-types) + + + + +

Top

+ +## neoantigen.proto + + + + + +### Annotation +This is a generic class to hold annotations from Neofox + + +| Field | Type | Label | Description | +| ----- | ---- | ----- | ----------- | +| name | [string](#string) | | The name of the annotation | +| value | [string](#string) | | The value of the annotation | + + + + + + + + +### Annotations +A set of annotations for a neoantigen candidate + + +| Field | Type | Label | Description | +| ----- | ---- | ----- | ----------- | +| annotations | [Annotation](#neoantigen.Annotation) | repeated | List of annotations | +| annotator | [string](#string) | | The annotator | +| annotatorVersion | [string](#string) | | The version of the annotator | +| timestamp | [string](#string) | | A timestamp determined when the annotation was created | +| resources | [Resource](#neoantigen.Resource) | repeated | List of resources | + + + + + + + + +### Mhc1 +Models MHC I alleles related to the same MHC I gene, i.e. 2 alleles/2 isoforms per gene + + +| Field | Type | Label | Description | +| ----- | ---- | ----- | ----------- | +| name | [Mhc1Name](#neoantigen.Mhc1Name) | | MHC I gene name | +| zygosity | [Zygosity](#neoantigen.Zygosity) | | Zygosity of the gene | +| alleles | [MhcAllele](#neoantigen.MhcAllele) | repeated | The alleles of the gene (0, 1 or 2) | + + + + + + + + +### Mhc2 +Models MHC II alleles related to the same MHC II protein, i.e. 4 isoforms related to 2 genes with 2 alleles each + + +| Field | Type | Label | Description | +| ----- | ---- | ----- | ----------- | +| name | [Mhc2Name](#neoantigen.Mhc2Name) | | MHC II molecule name | +| genes | [Mhc2Gene](#neoantigen.Mhc2Gene) | repeated | List of MHC II genes | +| isoforms | [Mhc2Isoform](#neoantigen.Mhc2Isoform) | repeated | Different combinations of MHC II alleles building different isoforms | + + + + + + + + +### Mhc2Gene +MHC II gene + + +| Field | Type | Label | Description | +| ----- | ---- | ----- | ----------- | +| name | [Mhc2GeneName](#neoantigen.Mhc2GeneName) | | MHC II gene name | +| zygosity | [Zygosity](#neoantigen.Zygosity) | | Zygosity of the gene | +| alleles | [MhcAllele](#neoantigen.MhcAllele) | repeated | The alleles of the gene (0, 1 or 2) | + + + + + + + + +### Mhc2Isoform +MHC II isoform + + +| Field | Type | Label | Description | +| ----- | ---- | ----- | ----------- | +| name | [string](#string) | | Name to refer to the MHC II isoform | +| alphaChain | [MhcAllele](#neoantigen.MhcAllele) | | The alpha chain of the isoform | +| betaChain | [MhcAllele](#neoantigen.MhcAllele) | | The beta chain of the isoform | + + + + + + + + +### MhcAllele +MHC allele representation. It does not include non synonymous changes to the sequence, changes in the non coding region +or changes in expression. See http://hla.alleles.org/nomenclature/naming.html for details + + +| Field | Type | Label | Description | +| ----- | ---- | ----- | ----------- | +| fullName | [string](#string) | | HLA full name as provided by the user (e.g.: HLA-DRB1*13:01:02:03N). This will be parsed into name, gene and group. Any digit format is allowed for this field (ie: 4, 6 or 8 digits), 2 digits names are not specific enough for our purpose and thus invalid | +| name | [string](#string) | | A specific HLA protein (e.g. HLA-DRB1*13:01). Alleles whose numbers differ in group and protein must differ in one or more nucleotide substitutions that change the amino acid sequence of the encoded protein. This name is normalized to avoid different representations of the same allele. For instance both HLA-DRB113:01 and HLA-DRB1*13:01:02:03N will be transformed into their normalised version HLA-DRB1*13:01. This name is also truncated to 4 digits. 2 digits names are not specific enough for our purpose and thus invalid | +| gene | [string](#string) | | The gene from either MHC I or II (e.g. DRB1, A) (this information is redundant with the Mhc1Gene.name and Mhc2Gene.name but it is convenient to have this at this level too, code will check for data coherence) | +| group | [string](#string) | | A group of alleles defined by a common serotype ie: Serological antigen carried by an allotype (e.g. 13 from HLA-DRB1*13) | +| protein | [string](#string) | | A specific protein (e.g.: 02 from HLA-DRB1*13:02) | + + + + + + + + +### Neoantigen +A neoantigen minimal definition + + +| Field | Type | Label | Description | +| ----- | ---- | ----- | ----------- | +| patientIdentifier | [string](#string) | | Patient identifier | +| gene | [string](#string) | | The HGNC gene symbol or gene identifier | +| position | [int32](#int32) | repeated | The amino acid position within the neoantigen candidate sequence. 1-based, starting in the N-terminus | +| wildTypeXmer | [string](#string) | | Amino acid sequence of the WT corresponding to the neoantigen candidate sequence (IUPAC 1 letter codes) | +| mutatedXmer | [string](#string) | | Amino acid sequence of the neoantigen candidate (IUPAC 1 letter codes) | +| rnaExpression | [float](#float) | | Expression value of the transcript from RNA data. Range [0, +inf]. | +| imputedGeneExpression | [float](#float) | | Expression value of the transcript from TCGA data. Range [0, +inf]. | +| dnaVariantAlleleFrequency | [float](#float) | | Variant allele frequency from the DNA. Range [0.0, 1.0] | +| rnaVariantAlleleFrequency | [float](#float) | | Variant allele frequency from the RNA. Range [0.0, 1.0] | +| neofoxAnnotations | [Annotations](#neoantigen.Annotations) | | The NeoFox neoantigen annotations | +| externalAnnotations | [Annotation](#neoantigen.Annotation) | repeated | List of external annotations | +| neoepitopesMhcI | [PredictedEpitope](#neoantigen.PredictedEpitope) | repeated | List of predicted neoepitopes for MHC-I with feature annotation (optional) | +| neoepitopesMhcII | [PredictedEpitope](#neoantigen.PredictedEpitope) | repeated | List of predicted neoepitopes for MHC-II with feature annotation (optional) | + + + + + + + + +### Patient +The metadata required for analysis for a given patient + its patient identifier + + +| Field | Type | Label | Description | +| ----- | ---- | ----- | ----------- | +| identifier | [string](#string) | | Patient identifier | +| isRnaAvailable | [bool](#bool) | | Is RNA expression available? | +| tumorType | [string](#string) | | Tumor entity in TCGA study abbrevation style as described here: https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations | +| mhc1 | [Mhc1](#neoantigen.Mhc1) | repeated | MHC I classic molecules | +| mhc2 | [Mhc2](#neoantigen.Mhc2) | repeated | MHC II classic molecules | + + + + + + + + +### PredictedEpitope + + + +| Field | Type | Label | Description | +| ----- | ---- | ----- | ----------- | +| position | [int32](#int32) | | Not sure that we need this... this is in the old PredictedEpitope model | +| mutatedPeptide | [string](#string) | | The mutated peptide | +| wildTypePeptide | [string](#string) | | Closest wild type peptide | +| alleleMhcI | [MhcAllele](#neoantigen.MhcAllele) | | MHC I allele | +| isoformMhcII | [Mhc2Isoform](#neoantigen.Mhc2Isoform) | | MHC II isoform | +| affinityMutated | [float](#float) | | MHC binding affinity for the mutated peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides | +| rankMutated | [float](#float) | | MHC binding rank for the mutated peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides | +| affinityWildType | [float](#float) | | MHC binding affinity for the wild type peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides | +| rankWildType | [float](#float) | | MHC binding rank for the wild type peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides | +| neofoxAnnotations | [Annotations](#neoantigen.Annotations) | | The NeoFox neoantigen annotations | +| patientIdentifier | [string](#string) | | Patient identifier | +| gene | [string](#string) | | The HGNC gene symbol or gene identifier | +| rnaExpression | [float](#float) | | Expression value of the transcript from RNA data. Range [0, +inf]. | +| imputedGeneExpression | [float](#float) | | Expression value of the transcript from TCGA data. Range [0, +inf]. | +| dnaVariantAlleleFrequency | [float](#float) | | Variant allele frequency from the DNA. Range [0.0, 1.0] | +| rnaVariantAlleleFrequency | [float](#float) | | Variant allele frequency from the RNA. Range [0.0, 1.0] | + + + + + + + + +### Resource +This is a class to track the version of an annotation resource + + +| Field | Type | Label | Description | +| ----- | ---- | ----- | ----------- | +| name | [string](#string) | | The name of the resource | +| version | [string](#string) | | The version of the resource | +| url | [string](#string) | | The URL of the resource if applicable | +| hash | [string](#string) | | The MD5 hash of the resource if applicable. This may be used when version is not available | +| download_timestamp | [string](#string) | | The timestamp when the download happened | + + + + + + + + + + +### Mhc1Name +Valid names for MHC I classic genes +Mus musculus gene names are preceded by the prefix H2 to avoid naming collisions. + +| Name | Number | Description | +| ---- | ------ | ----------- | +| A | 0 | Homo sapiens | +| B | 1 | | +| C | 2 | | +| H2K | 3 | Mus musculus | +| H2D | 4 | | +| H2L | 5 | | + + + + + +### Mhc2GeneName +Valid names for MHC II classic genes. +DRA is not included in this list as it does not have much variability in the population and for our purpose is +considered constant. +For Mus musculus we do not represent alpha and beta chains as they are homozygotes at all their MHC loci. +Hence, they can be treated as a single gene, like DR is for HLA. +See http://www.imgt.org/IMGTrepertoireMH/Polymorphism/haplotypes/mouse/MHC/Mu_haplotypes.html +Mus musculus gene names are preceded by the prefix H2 to avoid naming collisions. + +| Name | Number | Description | +| ---- | ------ | ----------- | +| DRB1 | 0 | Homo sapiens | +| DPA1 | 1 | | +| DPB1 | 2 | | +| DQA1 | 3 | | +| DQB1 | 4 | | +| H2A | 5 | Mus musculus | +| H2E | 6 | | + + + + + +### Mhc2Name +Valid names for MHC II classic molecules + +| Name | Number | Description | +| ---- | ------ | ----------- | +| DR | 0 | | +| DP | 1 | | +| DQ | 2 | | +| H2A_molecule | 3 | | +| H2E_molecule | 4 | | + + + + + +### Zygosity +The zygosity of a given gene + +| Name | Number | Description | +| ---- | ------ | ----------- | +| HOMOZYGOUS | 0 | Two equal copies of the gene | +| HETEROZYGOUS | 1 | Two different copies of the gene | +| HEMIZYGOUS | 2 | Only one copy of the gene | +| LOSS | 3 | No copy of the gene | + + + + + + + + + + +## Scalar Value Types + +| .proto Type | Notes | C++ | Java | Python | Go | C# | PHP | Ruby | +| ----------- | ----- | --- | ---- | ------ | -- | -- | --- | ---- | +| double | | double | double | float | float64 | double | float | Float | +| float | | float | float | float | float32 | float | float | Float | +| int32 | Uses variable-length encoding. Inefficient for encoding negative numbers – if your field is likely to have negative values, use sint32 instead. | int32 | int | int | int32 | int | integer | Bignum or Fixnum (as required) | +| int64 | Uses variable-length encoding. Inefficient for encoding negative numbers – if your field is likely to have negative values, use sint64 instead. | int64 | long | int/long | int64 | long | integer/string | Bignum | +| uint32 | Uses variable-length encoding. | uint32 | int | int/long | uint32 | uint | integer | Bignum or Fixnum (as required) | +| uint64 | Uses variable-length encoding. | uint64 | long | int/long | uint64 | ulong | integer/string | Bignum or Fixnum (as required) | +| sint32 | Uses variable-length encoding. Signed int value. These more efficiently encode negative numbers than regular int32s. | int32 | int | int | int32 | int | integer | Bignum or Fixnum (as required) | +| sint64 | Uses variable-length encoding. Signed int value. These more efficiently encode negative numbers than regular int64s. | int64 | long | int/long | int64 | long | integer/string | Bignum | +| fixed32 | Always four bytes. More efficient than uint32 if values are often greater than 2^28. | uint32 | int | int | uint32 | uint | integer | Bignum or Fixnum (as required) | +| fixed64 | Always eight bytes. More efficient than uint64 if values are often greater than 2^56. | uint64 | long | int/long | uint64 | ulong | integer/string | Bignum | +| sfixed32 | Always four bytes. | int32 | int | int | int32 | int | integer | Bignum or Fixnum (as required) | +| sfixed64 | Always eight bytes. | int64 | long | int/long | int64 | long | integer/string | Bignum | +| bool | | bool | boolean | boolean | bool | bool | boolean | TrueClass/FalseClass | +| string | A string must always contain UTF-8 encoded or 7-bit ASCII text. | string | String | str/unicode | string | string | string | String (UTF-8) | +| bytes | May contain any arbitrary sequence of bytes. | string | ByteString | str | []byte | ByteString | string | String (ASCII-8BIT) | diff --git a/neofox/model/neoantigen.proto b/neofox/model/neoantigen.proto index 4b024f84..757c62c2 100755 --- a/neofox/model/neoantigen.proto +++ b/neofox/model/neoantigen.proto @@ -3,21 +3,6 @@ syntax = "proto3"; package neoantigen; -message Mutation { - /** - The amino acid position within the neoantigen candidate sequence. 1-based, starting in the N-terminus - */ - repeated int32 position = 1; - /** - Amino acid sequence of the WT corresponding to the neoantigen candidate sequence (IUPAC 1 letter codes) - */ - string wildTypeXmer = 2; - /** - Amino acid sequence of the neoantigen candidate (IUPAC 1 letter codes) - */ - string mutatedXmer = 3; -} - /** This is a generic class to hold annotations from Neofox */ @@ -59,9 +44,9 @@ message Resource { } /** -A set of annotations for a neoantigen +A set of annotations for a neoantigen candidate */ -message NeoantigenAnnotations { +message Annotations { /** List of annotations */ @@ -84,48 +69,6 @@ message NeoantigenAnnotations { repeated Resource resources = 5; } -/** -A neoantigen minimal definition -*/ -message Neoantigen { - /** - Patient identifier - */ - string patientIdentifier = 1; - /** - The HGNC gene symbol or gene identifier - */ - string gene = 2; - /** - The mutation - */ - Mutation mutation = 3; - /** - Expression value of the transcript from RNA data. Range [0, +inf]. - */ - float rnaExpression = 4; - /** - Expression value of the transcript from TCGA data. Range [0, +inf]. - */ - float imputedGeneExpression = 5; - /** - Variant allele frequency from the DNA. Range [0.0, 1.0] - */ - float dnaVariantAlleleFrequency = 6; - /** - Variant allele frequency from the RNA. Range [0.0, 1.0] - */ - float rnaVariantAlleleFrequency = 7; - /** - The NeoFox neoantigen annotations - */ - NeoantigenAnnotations neofoxAnnotations = 8; - /** - List of external annotations - */ - repeated Annotation externalAnnotations = 9; -} - /** The metadata required for analysis for a given patient + its patient identifier */ @@ -326,3 +269,132 @@ message MhcAllele { */ string protein = 5; } + +message PredictedEpitope { + /** + Not sure that we need this... this is in the old PredictedEpitope model + */ + int32 position = 1; + /** + The mutated peptide + */ + string mutatedPeptide = 2; + /** + Closest wild type peptide + */ + string wildTypePeptide = 3; + /** + MHC I allele + */ + MhcAllele alleleMhcI = 4; + /** + MHC II isoform + */ + Mhc2Isoform isoformMhcII = 5; + /** + MHC binding affinity for the mutated peptide. This value is estimated with NetMHCpan in case of MHC-I peptides + and NetMHCIIpan in cas of MHC-II peptides + */ + float affinityMutated = 6; + /** + MHC binding rank for the mutated peptide. This value is estimated with NetMHCpan in case of MHC-I peptides + and NetMHCIIpan in cas of MHC-II peptides + */ + float rankMutated = 7; + /** + MHC binding affinity for the wild type peptide. This value is estimated with NetMHCpan in case of MHC-I peptides + and NetMHCIIpan in cas of MHC-II peptides + */ + float affinityWildType = 8; + /** + MHC binding rank for the wild type peptide. This value is estimated with NetMHCpan in case of MHC-I peptides + and NetMHCIIpan in cas of MHC-II peptides + */ + float rankWildType = 9; + /** + The NeoFox neoantigen annotations + */ + Annotations neofoxAnnotations = 10; + /** + Patient identifier + */ + string patientIdentifier = 11; + /** + The HGNC gene symbol or gene identifier + */ + string gene = 12; + /** + Expression value of the transcript from RNA data. Range [0, +inf]. + */ + float rnaExpression = 13; + /** + Expression value of the transcript from TCGA data. Range [0, +inf]. + */ + float imputedGeneExpression = 14; + /** + Variant allele frequency from the DNA. Range [0.0, 1.0] + */ + float dnaVariantAlleleFrequency = 15; + /** + Variant allele frequency from the RNA. Range [0.0, 1.0] + */ + float rnaVariantAlleleFrequency = 16; +} + +/** +A neoantigen minimal definition +*/ +message Neoantigen { + /** + Patient identifier + */ + string patientIdentifier = 1; + /** + The HGNC gene symbol or gene identifier + */ + string gene = 2; + /** + The amino acid position within the neoantigen candidate sequence. 1-based, starting in the N-terminus + */ + repeated int32 position = 3; + /** + Amino acid sequence of the WT corresponding to the neoantigen candidate sequence (IUPAC 1 letter codes) + */ + string wildTypeXmer = 4; + /** + Amino acid sequence of the neoantigen candidate (IUPAC 1 letter codes) + */ + string mutatedXmer = 5; + /** + Expression value of the transcript from RNA data. Range [0, +inf]. + */ + float rnaExpression = 6; + /** + Expression value of the transcript from TCGA data. Range [0, +inf]. + */ + float imputedGeneExpression = 7; + /** + Variant allele frequency from the DNA. Range [0.0, 1.0] + */ + float dnaVariantAlleleFrequency = 8; + /** + Variant allele frequency from the RNA. Range [0.0, 1.0] + */ + float rnaVariantAlleleFrequency = 9; + /** + The NeoFox neoantigen annotations + */ + Annotations neofoxAnnotations = 10; + /** + List of external annotations + */ + repeated Annotation externalAnnotations = 11; + /** + List of predicted neoepitopes for MHC-I with feature annotation (optional) + */ + repeated PredictedEpitope neoepitopesMhcI = 12; + /** + List of predicted neoepitopes for MHC-II with feature annotation (optional) + */ + repeated PredictedEpitope neoepitopesMhcII = 13; +} diff --git a/neofox/model/neoantigen.py b/neofox/model/neoantigen.py index ff3db4d3..a0b253d3 100755 --- a/neofox/model/neoantigen.py +++ b/neofox/model/neoantigen.py @@ -68,18 +68,6 @@ class Mhc2Name(betterproto.Enum): H2E_molecule = 4 -@dataclass -class Mutation(betterproto.Message): - # *The amino acid position within the neoantigen candidate sequence. 1-based, - # starting in the N-terminus - position: List[int] = betterproto.int32_field(1) - # *Amino acid sequence of the WT corresponding to the neoantigen candidate - # sequence (IUPAC 1 letter codes) - wild_type_xmer: str = betterproto.string_field(2) - # *Amino acid sequence of the neoantigen candidate (IUPAC 1 letter codes) - mutated_xmer: str = betterproto.string_field(3) - - @dataclass class Annotation(betterproto.Message): """*This is a generic class to hold annotations from Neofox""" @@ -108,8 +96,8 @@ class Resource(betterproto.Message): @dataclass -class NeoantigenAnnotations(betterproto.Message): - """*A set of annotations for a neoantigen""" +class Annotations(betterproto.Message): + """*A set of annotations for a neoantigen candidate""" # *List of annotations annotations: List["Annotation"] = betterproto.message_field(1) @@ -123,30 +111,6 @@ class NeoantigenAnnotations(betterproto.Message): resources: List["Resource"] = betterproto.message_field(5) -@dataclass -class Neoantigen(betterproto.Message): - """*A neoantigen minimal definition""" - - # *Patient identifier - patient_identifier: str = betterproto.string_field(1) - # *The HGNC gene symbol or gene identifier - gene: str = betterproto.string_field(2) - # *The mutation - mutation: "Mutation" = betterproto.message_field(3) - # *Expression value of the transcript from RNA data. Range [0, +inf]. - rna_expression: float = betterproto.float_field(4) - # *Expression value of the transcript from TCGA data. Range [0, +inf]. - imputed_gene_expression: float = betterproto.float_field(5) - # *Variant allele frequency from the DNA. Range [0.0, 1.0] - dna_variant_allele_frequency: float = betterproto.float_field(6) - # *Variant allele frequency from the RNA. Range [0.0, 1.0] - rna_variant_allele_frequency: float = betterproto.float_field(7) - # *The NeoFox neoantigen annotations - neofox_annotations: "NeoantigenAnnotations" = betterproto.message_field(8) - # *List of external annotations - external_annotations: List["Annotation"] = betterproto.message_field(9) - - @dataclass class Patient(betterproto.Message): """ @@ -253,3 +217,82 @@ class MhcAllele(betterproto.Message): group: str = betterproto.string_field(4) # *A specific protein (e.g.: 02 from HLA-DRB1*13:02) protein: str = betterproto.string_field(5) + + +@dataclass +class PredictedEpitope(betterproto.Message): + # *Not sure that we need this... this is in the old PredictedEpitope model + position: int = betterproto.int32_field(1) + # *The mutated peptide + mutated_peptide: str = betterproto.string_field(2) + # *Closest wild type peptide + wild_type_peptide: str = betterproto.string_field(3) + # *MHC I allele + allele_mhc_i: "MhcAllele" = betterproto.message_field(4) + # *MHC II isoform + isoform_mhc_i_i: "Mhc2Isoform" = betterproto.message_field(5) + # *MHC binding affinity for the mutated peptide. This value is estimated with + # NetMHCpan in case of MHC-I peptidesand NetMHCIIpan in cas of MHC-II + # peptides + affinity_mutated: float = betterproto.float_field(6) + # *MHC binding rank for the mutated peptide. This value is estimated with + # NetMHCpan in case of MHC-I peptidesand NetMHCIIpan in cas of MHC-II + # peptides + rank_mutated: float = betterproto.float_field(7) + # *MHC binding affinity for the wild type peptide. This value is estimated + # with NetMHCpan in case of MHC-I peptidesand NetMHCIIpan in cas of MHC-II + # peptides + affinity_wild_type: float = betterproto.float_field(8) + # *MHC binding rank for the wild type peptide. This value is estimated with + # NetMHCpan in case of MHC-I peptidesand NetMHCIIpan in cas of MHC-II + # peptides + rank_wild_type: float = betterproto.float_field(9) + # *The NeoFox neoantigen annotations + neofox_annotations: "Annotations" = betterproto.message_field(10) + # *Patient identifier + patient_identifier: str = betterproto.string_field(11) + # *The HGNC gene symbol or gene identifier + gene: str = betterproto.string_field(12) + # *Expression value of the transcript from RNA data. Range [0, +inf]. + rna_expression: float = betterproto.float_field(13) + # *Expression value of the transcript from TCGA data. Range [0, +inf]. + imputed_gene_expression: float = betterproto.float_field(14) + # *Variant allele frequency from the DNA. Range [0.0, 1.0] + dna_variant_allele_frequency: float = betterproto.float_field(15) + # *Variant allele frequency from the RNA. Range [0.0, 1.0] + rna_variant_allele_frequency: float = betterproto.float_field(16) + + +@dataclass +class Neoantigen(betterproto.Message): + """*A neoantigen minimal definition""" + + # *Patient identifier + patient_identifier: str = betterproto.string_field(1) + # *The HGNC gene symbol or gene identifier + gene: str = betterproto.string_field(2) + # *The amino acid position within the neoantigen candidate sequence. 1-based, + # starting in the N-terminus + position: List[int] = betterproto.int32_field(3) + # *Amino acid sequence of the WT corresponding to the neoantigen candidate + # sequence (IUPAC 1 letter codes) + wild_type_xmer: str = betterproto.string_field(4) + # *Amino acid sequence of the neoantigen candidate (IUPAC 1 letter codes) + mutated_xmer: str = betterproto.string_field(5) + # *Expression value of the transcript from RNA data. Range [0, +inf]. + rna_expression: float = betterproto.float_field(6) + # *Expression value of the transcript from TCGA data. Range [0, +inf]. + imputed_gene_expression: float = betterproto.float_field(7) + # *Variant allele frequency from the DNA. Range [0.0, 1.0] + dna_variant_allele_frequency: float = betterproto.float_field(8) + # *Variant allele frequency from the RNA. Range [0.0, 1.0] + rna_variant_allele_frequency: float = betterproto.float_field(9) + # *The NeoFox neoantigen annotations + neofox_annotations: "Annotations" = betterproto.message_field(10) + # *List of external annotations + external_annotations: List["Annotation"] = betterproto.message_field(11) + # *List of predicted neoepitopes for MHC-I with feature annotation (optional) + neoepitopes_mhc_i: List["PredictedEpitope"] = betterproto.message_field(12) + # *List of predicted neoepitopes for MHC-II with feature annotation + # (optional) + neoepitopes_mhc_i_i: List["PredictedEpitope"] = betterproto.message_field(13) diff --git a/neofox/model/validation.py b/neofox/model/validation.py index 95536843..a53e16f1 100755 --- a/neofox/model/validation.py +++ b/neofox/model/validation.py @@ -19,14 +19,12 @@ import betterproto from Bio.Alphabet.IUPAC import ExtendedIUPACProtein from Bio.Data import IUPACData -from neofox.helpers.epitope_helper import EpitopeHelper from neofox.exceptions import NeofoxDataValidationException from logzero import logger from neofox.model.mhc_parser import HLA_MOLECULE_PATTERN, HLA_DR_MOLECULE_PATTERN, \ ALLELE_PATTERN_BY_ORGANISM, H2_MOLECULE_PATTERN from neofox.model.neoantigen import ( Neoantigen, - Mutation, Patient, Mhc2Name, Mhc2GeneName, @@ -34,7 +32,7 @@ Mhc2, Mhc2Isoform, MhcAllele, - Mhc1, Mhc1Name + Mhc1, Mhc1Name, PredictedEpitope ) from neofox.references.references import ORGANISM_HOMO_SAPIENS, MHC_I_GENES_BY_ORGANISM, MHC_II_GENES_BY_ORGANISM, \ ORGANISM_MUS_MUSCULUS @@ -78,7 +76,19 @@ def validate_neoantigen(neoantigen: Neoantigen): "A patient identifier is missing. Please provide patientIdentifier in the input file" # checks mutation - ModelValidator._validate_mutation(neoantigen.mutation) + assert neoantigen.mutated_xmer is not None and len(neoantigen.mutated_xmer) > 0, \ + "Missing mutated peptide sequence in input (mutatedXmer) " + + for aa in neoantigen.mutated_xmer: + ModelValidator._validate_aminoacid(aa) + + # avoids this validation when there is no wild type + if neoantigen.wild_type_xmer: + for aa in neoantigen.wild_type_xmer: + ModelValidator._validate_aminoacid(aa) + + assert neoantigen.position is not None and neoantigen.position != "", \ + "The position of the mutation is empty, please use EpitopeHelper.mut_position_xmer_seq() to fill it" # check the expression values ModelValidator._validate_expression_values(neoantigen) @@ -86,6 +96,81 @@ def validate_neoantigen(neoantigen: Neoantigen): logger.error(neoantigen.to_json(indent=3)) raise NeofoxDataValidationException(e) + @staticmethod + def validate_neoepitope(neoepitope: PredictedEpitope, organism: str): + + # checks format consistency first + ModelValidator.validate(neoepitope) + + try: + has_patient_id = neoepitope.patient_identifier is not None and len(neoepitope.patient_identifier) > 0 + has_mhc_i = ModelValidator.is_mhci_epitope(neoepitope) + has_mhc_ii = ModelValidator.is_mhcii_epitope(neoepitope) + + assert has_patient_id or has_mhc_i or has_mhc_ii, \ + "A patient identifier is missing for a neoepitope without MHC-I allele or MHC-II isoform." + + assert not (has_mhc_i and has_mhc_ii), \ + "Neoepitopes can only be associated to either an MHC-I allele or MHC-II isoform" + + # checks peptides + for aa in neoepitope.mutated_peptide: + ModelValidator._validate_aminoacid(aa) + has_wt_peptide = neoepitope.wild_type_peptide is not None and neoepitope.wild_type_peptide != "" + if has_wt_peptide: + for aa in neoepitope.wild_type_peptide: + ModelValidator._validate_aminoacid(aa) + + # check lengths according to MHC I or II + length_mutated_peptide = len(neoepitope.mutated_peptide) + if has_mhc_i: + ModelValidator.validate_mhc_allele_representation(neoepitope.allele_mhc_i, organism=organism) + assert ModelValidator.is_mhci_peptide_length_valid(length_mutated_peptide), \ + "Mutated MHC-I peptide has a non supported length of {}".format(length_mutated_peptide) + elif has_mhc_ii: + ModelValidator.validate_mhc2_isoform_representation(neoepitope.isoform_mhc_i_i, organism=organism) + assert ModelValidator.is_mhcii_peptide_length_valid(length_mutated_peptide), \ + "Mutated MHC-II peptide has a non supported length of {}".format(length_mutated_peptide) + else: + assert ModelValidator.is_mhci_peptide_length_valid(length_mutated_peptide) or \ + ModelValidator.is_mhcii_peptide_length_valid(length_mutated_peptide), \ + "Mutated peptide has a non supported length of {}".format(length_mutated_peptide) + + if has_wt_peptide: + length_wt_peptide = len(neoepitope.wild_type_peptide) + if has_mhc_i: + assert ModelValidator.is_mhci_peptide_length_valid(length_wt_peptide), \ + "Mutated MHC-I peptide has a non supported length of {}".format(length_wt_peptide) + elif has_mhc_ii: + assert ModelValidator.is_mhcii_peptide_length_valid(length_wt_peptide), \ + "Mutated MHC-II peptide has a non supported length of {}".format(length_wt_peptide) + else: + assert ModelValidator.is_mhci_peptide_length_valid(length_wt_peptide) or \ + ModelValidator.is_mhcii_peptide_length_valid(length_wt_peptide), \ + "Mutated peptide has a non supported length of {}".format(length_wt_peptide) + + # check the expression values + ModelValidator._validate_expression_values(neoepitope) + except AssertionError as e: + logger.error(neoepitope.to_json(indent=3)) + raise NeofoxDataValidationException(e) + + @staticmethod + def is_mhcii_peptide_length_valid(length_mutated_peptide): + return 9 <= length_mutated_peptide <= 20000 + + @staticmethod + def is_mhci_peptide_length_valid(length_mutated_peptide): + return 8 <= length_mutated_peptide <= 14 + + @staticmethod + def is_mhcii_epitope(neoepitope): + return neoepitope.isoform_mhc_i_i is not None and neoepitope.isoform_mhc_i_i.name != '' + + @staticmethod + def is_mhci_epitope(neoepitope): + return neoepitope.allele_mhc_i is not None and neoepitope.allele_mhc_i.name != '' + @staticmethod def validate_patient(patient: Patient, organism=ORGANISM_HOMO_SAPIENS): @@ -184,32 +269,14 @@ def _validate_mhc2(mhc2: Mhc2, organism: str): return mhc2 @staticmethod - def _validate_expression_values(neoantigen): + def _validate_expression_values(object: Neoantigen or PredictedEpitope): assert ( - neoantigen.rna_expression is None or neoantigen.rna_expression >= 0 + object.rna_expression is None or object.rna_expression >= 0 ), "RNA expression should be a positive integer or zero {}".format( - neoantigen.rna_expression - ) - ModelValidator._validate_vaf(neoantigen.dna_variant_allele_frequency) - ModelValidator._validate_vaf(neoantigen.rna_variant_allele_frequency) - - @staticmethod - def _validate_mutation(mutation: Mutation): - assert mutation.mutated_xmer is not None and len(mutation.mutated_xmer) > 0, \ - "Missing mutated peptide sequence in input (mutation.mutatedXmer) " - mutation.mutated_xmer = "".join( - [ModelValidator._validate_aminoacid(aa) for aa in mutation.mutated_xmer] + object.rna_expression ) - # avoids this validation when there is no wild type - if mutation.wild_type_xmer: - mutation.wild_type_xmer = "".join( - [ - ModelValidator._validate_aminoacid(aa) - for aa in mutation.wild_type_xmer - ] - ) - assert mutation.position is not None and mutation.position != "", \ - "The position of the mutation is empty, please use EpitopeHelper.mut_position_xmer_seq() to fill it" + ModelValidator._validate_vaf(object.dna_variant_allele_frequency) + ModelValidator._validate_vaf(object.rna_variant_allele_frequency) @staticmethod def _validate_vaf(vaf): @@ -221,7 +288,6 @@ def _validate_vaf(vaf): @staticmethod def _validate_aminoacid(aminoacid): assert aminoacid is not None, "Amino acid field cannot be empty" - aminoacid = aminoacid.strip() assert isinstance(aminoacid, str), "Amino acid has to be a string" # this chunk is unused but let's leave in case it is handy in the future if len(aminoacid) == 3: @@ -231,13 +297,11 @@ def _validate_aminoacid(aminoacid): assert aminoacid != "X", "Unknown amino acid X is not supported. Please, remove neoantigens containing an X." aminoacid = IUPACData.protein_letters_3to1_extended.get(aminoacid) if len(aminoacid) == 1: - aminoacid = aminoacid.upper() assert ( aminoacid in ExtendedIUPACProtein.letters ), "Non existing aminoacid {}".format(aminoacid) else: assert False, "Invalid aminoacid {}".format(aminoacid) - return aminoacid @staticmethod diff --git a/neofox/neofox.py b/neofox/neofox.py index edf96717..c805393b 100755 --- a/neofox/neofox.py +++ b/neofox/neofox.py @@ -24,21 +24,16 @@ import logzero from logzero import logger from dask.distributed import Client + +import neofox from neofox.expression_imputation.expression_imputation import ExpressionAnnotator -from neofox.helpers.epitope_helper import EpitopeHelper -from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import ( - TcellPrediction, -) -from neofox.published_features.self_similarity.self_similarity import ( - SelfSimilarityCalculator, -) +from neofox.model.factories import NeoantigenFactory +from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import TcellPrediction +from neofox.published_features.self_similarity.self_similarity import SelfSimilarityCalculator from neofox.references.references import ReferenceFolder, DependenciesConfiguration, ORGANISM_HOMO_SAPIENS -from neofox import NEOFOX_LOG_FILE_ENV, AFFINITY_THRESHOLD_DEFAULT -from neofox.annotator import NeoantigenAnnotator -from neofox.exceptions import ( - NeofoxConfigurationException, - NeofoxDataValidationException, -) +from neofox import NEOFOX_LOG_FILE_ENV +from neofox.annotator.neoantigen_annotator import NeoantigenAnnotator +from neofox.exceptions import NeofoxConfigurationException, NeofoxDataValidationException from neofox.model.neoantigen import Neoantigen, Patient from neofox.model.validation import ModelValidator import dotenv @@ -47,28 +42,29 @@ class NeoFox: def __init__( - self, - neoantigens: List[Neoantigen], - patients: List[Patient], - num_cpus: int = 1, - patient_id: str = None, - work_folder=None, - output_prefix=None, - reference_folder: ReferenceFolder = None, - configuration: DependenciesConfiguration = None, - verbose=True, - configuration_file=None, - affinity_threshold=AFFINITY_THRESHOLD_DEFAULT - ): + self, + neoantigens: List[Neoantigen], + patients: List[Patient], + num_cpus: int = 1, + log_file_name=None, + reference_folder: ReferenceFolder = None, + configuration: DependenciesConfiguration = None, + verbose=True, + configuration_file=None, + rank_mhci_threshold=neofox.RANK_MHCI_THRESHOLD_DEFAULT, + rank_mhcii_threshold=neofox.RANK_MHCII_THRESHOLD_DEFAULT, + with_all_neoepitopes=False): + + initialise_logs(logfile=log_file_name, verbose=verbose) + logger.info("Loading reference data...") - self.affinity_threshold = affinity_threshold + self.rank_mhci_threshold = rank_mhci_threshold + self.rank_mhcii_threshold = rank_mhcii_threshold if configuration_file: dotenv.load_dotenv(configuration_file, override=True) - # initialise logs - self.log_file_name = self._get_log_file_name(output_prefix, work_folder) - self._initialise_logs(self.log_file_name, verbose) + self.log_file_name = log_file_name # intialize references folder and configuration # NOTE: uses the reference folder and config passed as a parameter if exists, this is here to make it @@ -81,7 +77,7 @@ def __init__( self.configuration = ( configuration if configuration else DependenciesConfiguration() ) - self.tcell_predictor = TcellPrediction(affinity_threshold=self.affinity_threshold) + self.tcell_predictor = TcellPrediction() self.self_similarity = SelfSimilarityCalculator() self.num_cpus = num_cpus @@ -96,10 +92,8 @@ def __init__( # validates neoantigens self.neoantigens = neoantigens for n in self.neoantigens: - if n.patient_identifier is None: - n.patient_identifier = patient_id # NOTE: the position of the mutations is not expected from the user and if provide the value is ignored - n.mutation.position = EpitopeHelper.mut_position_xmer_seq(mutation=n.mutation) + n.position = NeoantigenFactory.mut_position_xmer_seq(neoantigen=n) ModelValidator.validate_neoantigen(n) # validates patients @@ -117,7 +111,8 @@ def __init__( expression_per_patient[neoantigen.patient_identifier].append(neoantigen.rna_expression) for patient in self.patients: - self.patients[patient].is_rna_available = all(e is not None for e in expression_per_patient[self.patients[patient].identifier]) + self.patients[patient].is_rna_available = all(e is not None for e in + expression_per_patient[self.patients[patient].identifier]) # only performs the expression imputation for humans if self.reference_folder.organism == ORGANISM_HOMO_SAPIENS: @@ -127,7 +122,9 @@ def __init__( # NOTE: add gene expression to neoantigen candidate model self.neoantigens = self._conditional_expression_imputation() - logger.info("Data loaded") + self.with_all_neoepitopes = with_all_neoepitopes + + logger.info("Reference data loaded") def _conditional_expression_imputation(self) -> List[Neoantigen]: expression_annotator = ExpressionAnnotator() @@ -148,16 +145,7 @@ def _conditional_expression_imputation(self) -> List[Neoantigen]: return neoantigens_transformed @staticmethod - def _initialise_logs(logfile, verbose=False): - if logfile is not None: - logzero.logfile(logfile) - # TODO: this does not work - if verbose: - logzero.loglevel(logging.INFO) - else: - logzero.loglevel(logging.WARN) - - def _get_log_file_name(self, output_prefix, work_folder): + def get_log_file_name(output_prefix, work_folder): if work_folder and os.path.exists(work_folder): logfile = os.path.join(work_folder, "{}.log".format(output_prefix)) else: @@ -204,7 +192,8 @@ def get_annotations(self) -> List[Neoantigen]: # see reference on using threads versus CPUs here https://docs.dask.org/en/latest/setup/single-machine.html dask_client = Client(n_workers=self.num_cpus, threads_per_worker=1) annotations = self.send_to_client(dask_client) - dask_client.close() + dask_client.shutdown() # terminates schedulers and workers + dask_client.close(timeout=10) # waits 10 seconds for the client to close before killing return annotations @@ -216,12 +205,8 @@ def send_to_client(self, dask_client): future_tcell_predictor = dask_client.scatter( self.tcell_predictor, broadcast=True ) - future_self_similarity = dask_client.scatter( - self.self_similarity, broadcast=True - ) - future_reference_folder = dask_client.scatter( - self.reference_folder, broadcast=True - ) + future_self_similarity = dask_client.scatter(self.self_similarity, broadcast=True) + future_reference_folder = dask_client.scatter(self.reference_folder, broadcast=True) future_configuration = dask_client.scatter(self.configuration, broadcast=True) for neoantigen in self.neoantigens: patient = self.patients.get(neoantigen.patient_identifier) @@ -237,7 +222,9 @@ def send_to_client(self, dask_client): future_tcell_predictor, future_self_similarity, self.log_file_name, - self.affinity_threshold + self.rank_mhci_threshold, + self.rank_mhcii_threshold, + self.with_all_neoepitopes ) ) annotated_neoantigens = dask_client.gather(futures) @@ -258,11 +245,13 @@ def annotate_neoantigen( tcell_predictor: TcellPrediction, self_similarity: SelfSimilarityCalculator, log_file_name: str, - affinity_threshold = AFFINITY_THRESHOLD_DEFAULT + rank_mhci_threshold = neofox.RANK_MHCI_THRESHOLD_DEFAULT, + rank_mhcii_threshold=neofox.RANK_MHCII_THRESHOLD_DEFAULT, + with_all_neoepitopes=False ): # the logs need to be initialised inside every dask job - NeoFox._initialise_logs(log_file_name) - logger.info("Starting neoantigen annotation with peptide={}".format(neoantigen.mutation.mutated_xmer)) + initialise_logs(log_file_name) + logger.info("Starting neoantigen annotation with peptide={}".format(neoantigen.mutated_xmer)) start = time.time() try: annotated_neoantigen = NeoantigenAnnotator( @@ -270,8 +259,9 @@ def annotate_neoantigen( configuration, tcell_predictor=tcell_predictor, self_similarity=self_similarity, - affinity_threshold=affinity_threshold - ).get_annotation(neoantigen, patient) + rank_mhci_threshold=rank_mhci_threshold, + rank_mhcii_threshold=rank_mhcii_threshold + ).get_annotated_neoantigen(neoantigen, patient, with_all_neoepitopes=with_all_neoepitopes) except Exception as e: logger.error("Error processing neoantigen {}".format(neoantigen.to_dict())) logger.error("Error processing patient {}".format(patient.to_dict())) @@ -279,6 +269,16 @@ def annotate_neoantigen( end = time.time() logger.info( "Elapsed time for annotating neoantigen for peptide={}: {} seconds".format( - neoantigen.mutation.mutated_xmer, int(end - start)) + neoantigen.mutated_xmer, int(end - start)) ) return annotated_neoantigen + + +def initialise_logs(logfile, verbose=False): + if logfile is not None: + logzero.logfile(logfile) + # TODO: this does not work + if verbose: + logzero.loglevel(logging.INFO) + else: + logzero.loglevel(logging.WARN) diff --git a/neofox/neofox_epitope.py b/neofox/neofox_epitope.py new file mode 100755 index 00000000..26433cb2 --- /dev/null +++ b/neofox/neofox_epitope.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python +# +# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH. +# +# This file is part of Neofox +# (see https://github.com/tron-bioinformatics/neofox). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .# +import logging +import time +from copy import copy +from typing import List +import logzero +from logzero import logger +from dask.distributed import Client + +from neofox.annotator.neoepitope_annotator import NeoepitopeAnnotator +from neofox.expression_imputation.expression_imputation import ExpressionAnnotator +from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import TcellPrediction +from neofox.published_features.self_similarity.self_similarity import SelfSimilarityCalculator +from neofox.references.references import ReferenceFolder, DependenciesConfiguration, ORGANISM_HOMO_SAPIENS +from neofox.exceptions import NeofoxConfigurationException, NeofoxDataValidationException +from neofox.model.neoantigen import Patient, PredictedEpitope +from neofox.model.validation import ModelValidator +import dotenv + + +class NeoFoxEpitope: + + def __init__( + self, + neoepitopes: List[PredictedEpitope], + patients: List[Patient] = [], + num_cpus: int = 1, + log_file_name=None, + reference_folder: ReferenceFolder = None, + configuration: DependenciesConfiguration = None, + verbose=True, + configuration_file=None): + + initialise_logs(logfile=log_file_name, verbose=verbose) + logger.info("Loading reference data...") + + if configuration_file: + dotenv.load_dotenv(configuration_file, override=True) + + self.log_file_name = log_file_name + + # intialize references folder and configuration + # NOTE: uses the reference folder and config passed as a parameter if exists, this is here to make it + # testable with fake objects + self.reference_folder = ( + reference_folder if reference_folder else ReferenceFolder(verbose=verbose) + ) + # NOTE: makes this call to force the loading of the available alleles here + self.reference_folder.get_available_alleles() + self.configuration = ( + configuration if configuration else DependenciesConfiguration() + ) + self.tcell_predictor = TcellPrediction() + self.self_similarity = SelfSimilarityCalculator() + self.num_cpus = num_cpus + + # validates patients + self.patients = {} + for patient in patients: + ModelValidator.validate_patient(patient, organism=self.reference_folder.organism) + self.patients[patient.identifier] = patient + + if neoepitopes is None or len(neoepitopes) == 0: + raise NeofoxConfigurationException("Missing input data to run Neofox") + + # validates neoepitopes and combines neoepitopes according to patient alleles + self.neoepitopes = [] + for n in neoepitopes: + ModelValidator.validate_neoepitope(n, organism=self.reference_folder.organism) + if ModelValidator.is_mhci_epitope(n) or ModelValidator.is_mhcii_epitope(n): + self.neoepitopes.append(n) + else: + if n.patient_identifier in self.patients: + patient = self.patients.get(n.patient_identifier) + if ModelValidator.is_mhci_peptide_length_valid(len(n.mutated_peptide)): + for m in patient.mhc1: + for a in m.alleles: + mhci_neoepitope = copy(n) + mhci_neoepitope.allele_mhc_i = a + self.neoepitopes.append(mhci_neoepitope) + for m in patient.mhc2: + for i in m.isoforms: + mhcii_neoepitope = copy(n) + mhcii_neoepitope.isoform_mhc_i_i = i + self.neoepitopes.append(mhcii_neoepitope) + else: + raise NeofoxDataValidationException( + 'A neoepitope is linked to patient {} for which there is no data'.format(n.patient_identifier)) + + # only performs the expression imputation for humans + if self.reference_folder.organism == ORGANISM_HOMO_SAPIENS: + # impute expresssion from TCGA, ONLY if isRNAavailable = False for given patient, + # otherwise original values is reported + # NOTE: this must happen after validation to avoid uncaptured errors due to missing patients + # NOTE: add gene expression to neoantigen candidate model + self.neoepitopes = self._conditional_expression_imputation() + + logger.info("Reference data loaded") + + def get_annotations(self) -> List[PredictedEpitope]: + """ + Loads epitope data (if file has been not imported to R; colnames need to be changed), adds data to class that are needed to calculate, + calls epitope class --> determination of epitope properties, + write to txt file + """ + logger.info("Starting NeoFox annotations...") + # initialise dask + # see reference on using threads versus CPUs here https://docs.dask.org/en/latest/setup/single-machine.html + dask_client = Client(n_workers=self.num_cpus, threads_per_worker=1) + annotations = self.send_to_client(dask_client) + dask_client.close(timeout=10) # waits 10 seconds for the client to close before killing + + return annotations + + def send_to_client(self, dask_client): + # feature calculation for each epitope + futures = [] + start = time.time() + # NOTE: sets those heavy resources distributed to all workers in the cluster + future_tcell_predictor = dask_client.scatter( + self.tcell_predictor, broadcast=True + ) + future_self_similarity = dask_client.scatter(self.self_similarity, broadcast=True) + future_reference_folder = dask_client.scatter(self.reference_folder, broadcast=True) + future_configuration = dask_client.scatter(self.configuration, broadcast=True) + + for neoepitope in self.neoepitopes: + logger.debug("Neoantigen: {}".format(neoepitope.to_json(indent=3))) + futures.append( + dask_client.submit( + NeoFoxEpitope.annotate_neoepitope, + neoepitope, + future_reference_folder, + future_configuration, + future_tcell_predictor, + future_self_similarity, + self.log_file_name, + ) + ) + annotated_neoantigens = dask_client.gather(futures) + end = time.time() + logger.info( + "Elapsed time for annotating {} neoepitopes {} seconds".format( + len(self.neoepitopes), int(end - start) + ) + ) + + # close distributed resources + del future_tcell_predictor + del future_self_similarity + del future_reference_folder + del future_configuration + + return annotated_neoantigens + + @staticmethod + def annotate_neoepitope( + neoepitope: PredictedEpitope, + reference_folder: ReferenceFolder, + configuration: DependenciesConfiguration, + tcell_predictor: TcellPrediction, + self_similarity: SelfSimilarityCalculator, + log_file_name: str, + ): + # the logs need to be initialised inside every dask job + initialise_logs(log_file_name) + logger.info("Starting neoepitope annotation with peptide={}".format(neoepitope.mutated_peptide)) + start = time.time() + try: + annotated_neoantigen = NeoepitopeAnnotator( + reference_folder, + configuration, + tcell_predictor=tcell_predictor, + self_similarity=self_similarity, + ).get_annotated_neoepitope(neoepitope) + except Exception as e: + logger.error("Error processing neoantigen {}".format(neoepitope.to_dict())) + raise e + end = time.time() + logger.info( + "Elapsed time for annotating neoantigen for peptide={}: {} seconds".format( + neoepitope.mutated_peptide, int(end - start)) + ) + return annotated_neoantigen + + def _conditional_expression_imputation(self) -> List[PredictedEpitope]: + + expression_annotator = ExpressionAnnotator() + neoepitopes_transformed = [] + for neoepitope in self.neoepitopes: + if neoepitope.patient_identifier is not None and neoepitope.patient_identifier != '': + patient = self.patients[neoepitope.patient_identifier] + neoepitope_transformed = neoepitope + gene_expression = expression_annotator.get_gene_expression_annotation( + gene_name=neoepitope.gene, tcga_cohort=patient.tumor_type) + if not patient.is_rna_available and patient.tumor_type is not None and patient.tumor_type != "": + neoepitope_transformed.rna_expression = gene_expression + neoepitope.imputed_gene_expression = gene_expression + neoepitopes_transformed.append(neoepitope_transformed) + else: + neoepitopes_transformed.append(neoepitope) + return neoepitopes_transformed + + +def initialise_logs(logfile, verbose=False): + if logfile is not None: + logzero.logfile(logfile) + # TODO: this does not work + if verbose: + logzero.loglevel(logging.INFO) + else: + logzero.loglevel(logging.WARN) diff --git a/neofox/published_features/Tcell_predictor/preprocess.py b/neofox/published_features/Tcell_predictor/preprocess.py index 1fb0baaf..32759c35 100755 --- a/neofox/published_features/Tcell_predictor/preprocess.py +++ b/neofox/published_features/Tcell_predictor/preprocess.py @@ -24,6 +24,11 @@ from Bio.Data import IUPACData +from neofox.helpers.epitope_helper import EpitopeHelper +from neofox.model.neoantigen import PredictedEpitope + +from functools import lru_cache + MAT = "SIRdata.mat" GENES_EXPRESSION_PICKLE = "genes-expression.pickle" ACIDS_FEATURES_PICKLE = "amino-acids-features.pickle" @@ -121,26 +126,26 @@ def get_properties(self, amino_substitution): ] ) - def main(self, f_name): - lst_data = [] - with open(f_name, "r") as f: - for row in f: - gene_name, sequence, aa_subs = row.split() - seq_arr = self.seq2bin(sequence) - # tap score - tap_mat = self.load_data.get("tap") - tap_score = tap_mat.dot(seq_arr.T).ravel() - # cleavge score - clv_mat = self.load_data.get("clv") - clv_mat = clv_mat[0, 20:200] - clv_score = clv_mat.dot(seq_arr.T).ravel() - - features_aa = self.get_properties(aa_subs) - # expresion - expression_value = self.get_gene_expression(gene_name) - - lst_data.append( - np.hstack((expression_value, features_aa, clv_score, tap_score)) - ) - mat_features = np.asarray(lst_data) + def main(self, gene: str, epitope: PredictedEpitope): + + seq_arr = self.seq2bin(epitope.mutated_peptide) + # tap score + tap_mat = self.load_data.get("tap") + tap_score = tap_mat.dot(seq_arr.T).ravel() + # cleavge score + clv_mat = self.load_data.get("clv") + clv_mat = clv_mat[0, 20:200] + clv_score = clv_mat.dot(seq_arr.T).ravel() + + position_of_mutation = EpitopeHelper.position_of_mutation_epitope(epitope=epitope) + wild_type_aminoacid = epitope.wild_type_peptide[position_of_mutation - 1] # it is 1-based + mutated_aminoacid = epitope.mutated_peptide[position_of_mutation - 1] + features_aa = self.get_properties(wild_type_aminoacid + mutated_aminoacid) + # expresion + expression_value = self.get_gene_expression(gene) + + result = np.hstack((expression_value, features_aa, clv_score, tap_score)) + + mat_features = np.asarray([result]) + return mat_features diff --git a/neofox/published_features/Tcell_predictor/tcellpredictor_wrapper.py b/neofox/published_features/Tcell_predictor/tcellpredictor_wrapper.py index 6fe48260..f380ce84 100755 --- a/neofox/published_features/Tcell_predictor/tcellpredictor_wrapper.py +++ b/neofox/published_features/Tcell_predictor/tcellpredictor_wrapper.py @@ -21,11 +21,9 @@ import pickle import warnings from typing import List -from neofox.helpers import intermediate_files from neofox.model.validation import ModelValidator -from neofox.model.neoantigen import Annotation, Neoantigen +from neofox.model.neoantigen import Annotation, Neoantigen, PredictedEpitope from neofox.model.factories import AnnotationFactory -from neofox import AFFINITY_THRESHOLD_DEFAULT from neofox.published_features.Tcell_predictor.preprocess import Preprocessor from neofox.MHC_predictors.netmhcpan.combine_netmhcpan_pred_multiple_binders import ( BestAndMultipleBinder, @@ -36,8 +34,7 @@ class TcellPrediction: - def __init__(self, affinity_threshold=AFFINITY_THRESHOLD_DEFAULT): - self.affinity_threshold = affinity_threshold + def __init__(self): # UserWarning: Trying to unpickle estimator DecisionTreeClassifier from version 0.19.0 when using version # 0.20.3. This might lead to breaking code or invalid results. Use at your own risk. with warnings.catch_warnings(): @@ -51,70 +48,35 @@ def __init__(self, affinity_threshold=AFFINITY_THRESHOLD_DEFAULT): # this sets the n_jobs parameter otherwise inherited from the pickle file self.classifier = pickle.load(f).set_params(n_jobs=1) - def _triple_gen_seq_subst(self, gene, substitution, epitope, score): + self.preprocessor = Preprocessor() + + def _wrapper_tcellpredictor(self, gene, epitope: PredictedEpitope): """ - extracts gene id, epitope sequence and substitution from epitope dictionary - Tcell predictor works with 9mers only! --> extract for 9mers only + wrapper function to determine """ result = None has_gene = gene is not None and gene.strip() != "" - if has_gene and len(epitope) == 9: - if self.affinity_threshold is None or float(score) < self.affinity_threshold: - result = [gene.replace(" ", ""), epitope, substitution] - return result - - def _write_triple_to_file(self, triple, tmpfile_in): - """ - writes triple (gene id, epitope sequence, substitution) to temporary file - """ - with open(tmpfile_in, "w") as f: - tripleString = " ".join(triple) - f.write(tripleString + "\n") - - def _run_prediction(self, f_name): - input_file = f_name - mat = Preprocessor().main(input_file) - scores = self.classifier.predict_proba(mat) - result = "indefinable_by_TcellPredictor" - if ( - scores is not None - and len(scores) > 0 - and scores[-1] is not None - and len(scores[-1]) > 0 - ): - # it returns the last number from the latest entry in the list - result = str(scores[-1][-1]) + if has_gene and len(epitope.mutated_peptide) == 9: + mat = self.preprocessor.main(gene, epitope=epitope) + scores = self.classifier.predict_proba(mat) + result = "indefinable_by_TcellPredictor" + if ( + scores is not None + and len(scores) > 0 + and scores[-1] is not None + and len(scores[-1]) > 0 + ): + # it returns the last number from the latest entry in the list + result = str(scores[-1][-1]) return result - def _wrapper_tcellpredictor( - self, gene, substitution, epitope, score, tmpfile_in - ): - """ - wrapper function to determine - """ - trp = self._triple_gen_seq_subst( - gene=gene, - substitution=substitution, - epitope=epitope, - score=score - ) - pred_out = None - if trp is not None: - self._write_triple_to_file(trp, tmpfile_in) - pred_out = self._run_prediction(tmpfile_in) - return pred_out - - def _calculate_tcell_predictor_score( - self, gene, substitution, epitope, score + def calculate_tcell_predictor_score( + self, gene: str, epitope: PredictedEpitope ): """returns Tcell_predictor score given mps in dictionary format""" - tmp_tcellPredIN = intermediate_files.create_temp_file( - prefix="tmp_TcellPredicIN_", suffix=".txt" - ) tcell_predictor_score = None - if not ModelValidator.has_peptide_rare_amino_acids(epitope): - tcell_predictor_score = self._wrapper_tcellpredictor( - gene=gene, substitution=substitution, epitope=epitope, score=score, tmpfile_in=tmp_tcellPredIN, ) + if not ModelValidator.has_peptide_rare_amino_acids(epitope.mutated_peptide): + tcell_predictor_score = self._wrapper_tcellpredictor(gene=gene, epitope=epitope) return tcell_predictor_score def get_annotations( @@ -123,20 +85,21 @@ def get_annotations( # TODO: this is difficult to extend to more complex mutations (eg: MNVs, indels) as only considers first mutated # position tcell_predictor_score = None - if neoantigen.mutation.wild_type_xmer and netmhcpan.best_ninemer_epitope_by_affinity.peptide: - mutation_position = neoantigen.mutation.position[0] - wild_type_aminoacid = neoantigen.mutation.wild_type_xmer[ - mutation_position - 1 - ] # it is 1-based - mutated_aminoacid = neoantigen.mutation.mutated_xmer[mutation_position - 1] - tcell_predictor_score = self._calculate_tcell_predictor_score(gene=neoantigen.gene, - substitution=wild_type_aminoacid + mutated_aminoacid, - epitope=netmhcpan.best_ninemer_epitope_by_affinity.peptide, - score=netmhcpan.best_ninemer_epitope_by_affinity.affinity_score) + if neoantigen.wild_type_xmer and netmhcpan.best_ninemer_epitope_by_affinity.mutated_peptide: + tcell_predictor_score = self.calculate_tcell_predictor_score( + gene=neoantigen.gene, + epitope=netmhcpan.best_ninemer_epitope_by_affinity) annotations = [ AnnotationFactory.build_annotation( value=tcell_predictor_score, - name="Tcell_predictor_score", + name="Tcell_predictor", ) ] return annotations + + def get_annotations_epitope_mhci(self, epitope: PredictedEpitope, gene: str) -> List[Annotation]: + return [ + AnnotationFactory.build_annotation( + value=self.calculate_tcell_predictor_score(epitope=epitope, gene=gene), + name='Tcell_predictor') + ] diff --git a/neofox/published_features/differential_binding/amplitude.py b/neofox/published_features/differential_binding/amplitude.py index 9e012557..837ac75d 100755 --- a/neofox/published_features/differential_binding/amplitude.py +++ b/neofox/published_features/differential_binding/amplitude.py @@ -20,7 +20,7 @@ from typing import List -from neofox.model.neoantigen import Annotation +from neofox.model.neoantigen import Annotation, PredictedEpitope from neofox.model.factories import AnnotationFactory from neofox.MHC_predictors.netmhcpan.combine_netmhcIIpan_pred_multiple_binders import ( BestAndMultipleBinderMhcII, @@ -64,40 +64,57 @@ def run( ): # MHC I if netmhcpan: - if netmhcpan.best_epitope_by_affinity.peptide and netmhcpan.best_wt_epitope_by_affinity.peptide: + if netmhcpan.best_epitope_by_affinity.mutated_peptide and netmhcpan.best_epitope_by_affinity.wild_type_peptide: self.amplitude_mhci_affinity = self.calculate_amplitude_mhc( - score_mutation=netmhcpan.best_epitope_by_affinity.affinity_score, - score_wild_type=netmhcpan.best_wt_epitope_by_affinity.affinity_score, + score_mutation=netmhcpan.best_epitope_by_affinity.affinity_mutated, + score_wild_type=netmhcpan.best_epitope_by_affinity.affinity_wild_type, apply_correction=True, ) - if netmhcpan.best_ninemer_epitope_by_affinity.peptide and netmhcpan.best_ninemer_wt_epitope_by_affinity.peptide: + if netmhcpan.best_ninemer_epitope_by_affinity.mutated_peptide and netmhcpan.best_ninemer_epitope_by_affinity.wild_type_peptide: self.amplitude_mhci_affinity_9mer = self.calculate_amplitude_mhc( - score_mutation=netmhcpan.best_ninemer_epitope_by_affinity.affinity_score, - score_wild_type=netmhcpan.best_ninemer_wt_epitope_by_affinity.affinity_score, + score_mutation=netmhcpan.best_ninemer_epitope_by_affinity.affinity_mutated, + score_wild_type=netmhcpan.best_ninemer_epitope_by_affinity.affinity_wild_type, apply_correction=True, ) # MHC II if netmhc2pan: - if netmhc2pan.best_predicted_epitope_rank.peptide and netmhc2pan.best_predicted_epitope_rank_wt.peptide: + if netmhc2pan.best_predicted_epitope_rank.mutated_peptide and netmhc2pan.best_predicted_epitope_rank.wild_type_peptide: self.amplitude_mhcii_rank = self.calculate_amplitude_mhc( - score_mutation=netmhc2pan.best_predicted_epitope_rank.rank, - score_wild_type=netmhc2pan.best_predicted_epitope_rank_wt.rank, + score_mutation=netmhc2pan.best_predicted_epitope_rank.rank_mutated, + score_wild_type=netmhc2pan.best_predicted_epitope_rank.rank_wild_type, ) def get_annotations(self) -> List[Annotation]: return [ AnnotationFactory.build_annotation( value=self.amplitude_mhci_affinity_9mer, - name="Amplitude_MHCI_affinity_9mer", + name="Amplitude_MHCI_bestAffinity9mer", ), AnnotationFactory.build_annotation( - value=self.amplitude_mhci_affinity, name="Amplitude_MHCI_affinity" + value=self.amplitude_mhci_affinity, name="Amplitude_MHCI_bestAffinity" ), ] def get_annotations_mhc2(self) -> List[Annotation]: return [ AnnotationFactory.build_annotation( - value=self.amplitude_mhcii_rank, name="Amplitude_MHCII_rank" + value=self.amplitude_mhcii_rank, name="Amplitude_MHCII_bestRank" ) ] + + def get_annotations_epitope_mhcii(self, epitope: PredictedEpitope) -> List[Annotation]: + return [ + AnnotationFactory.build_annotation( + value=self.calculate_amplitude_mhc( + score_mutation=epitope.rank_mutated, score_wild_type=epitope.rank_wild_type), + name='amplitude') + ] + + def get_annotations_epitope_mhci(self, epitope: PredictedEpitope) -> List[Annotation]: + return [ + AnnotationFactory.build_annotation( + value=self.calculate_amplitude_mhc( + score_mutation=epitope.affinity_mutated, score_wild_type=epitope.affinity_wild_type, + apply_correction=True), + name='amplitude') + ] diff --git a/neofox/published_features/differential_binding/differential_binding.py b/neofox/published_features/differential_binding/differential_binding.py index 076a419a..c4ba2567 100755 --- a/neofox/published_features/differential_binding/differential_binding.py +++ b/neofox/published_features/differential_binding/differential_binding.py @@ -20,29 +20,20 @@ from typing import List -from neofox.model.neoantigen import Annotation +from neofox.model.neoantigen import Annotation, PredictedEpitope from neofox.model.factories import AnnotationFactory -from neofox import AFFINITY_THRESHOLD_DEFAULT from neofox.published_features.differential_binding.amplitude import Amplitude -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import PredictedEpitope class DifferentialBinding: - def __init__(self, affinity_threshold=AFFINITY_THRESHOLD_DEFAULT): - self.affinity_threshold = affinity_threshold - - def dai(self, score_mutation, score_wild_type, affin_filtering=False): + def dai(self, score_mutation, score_wild_type): """ Calculates DAI: Returns difference between wt and mut MHC binding score. """ score = None try: - if affin_filtering: - if score_mutation < self.affinity_threshold: - score = score_wild_type - score_mutation - else: - score = score_wild_type - score_mutation + score = score_wild_type - score_mutation except TypeError: pass return score @@ -74,17 +65,16 @@ def classify_adn_cdn( pass return group - def get_annotations_dai(self, mutated_peptide_mhci: PredictedEpitope, wt_peptide_mhcii: PredictedEpitope) -> List[Annotation]: + def get_annotations_dai(self, epitope: PredictedEpitope) -> List[Annotation]: dai = None - if mutated_peptide_mhci.peptide and wt_peptide_mhcii.peptide: + if epitope.mutated_peptide and epitope.wild_type_peptide: dai = self.dai( - score_mutation=mutated_peptide_mhci.affinity_score, - score_wild_type=wt_peptide_mhcii.affinity_score, - affin_filtering=True, + score_mutation=epitope.affinity_mutated, + score_wild_type=epitope.affinity_wild_type ) annotations = [ AnnotationFactory.build_annotation( - name="DAI_MHCI_affinity", + name="DAI_MHCI_bestAffinity", value=dai ), ] @@ -100,9 +90,9 @@ def get_annotations( cdn = None adn = None - if mutated_peptide_mhci.peptide: + if mutated_peptide_mhci.mutated_peptide: cdn = self.classify_adn_cdn( - score_mutation=mutated_peptide_mhci.affinity_score, + score_mutation=mutated_peptide_mhci.affinity_mutated, amplitude=amplitude.amplitude_mhci_affinity, bdg_cutoff_classical=bdg_cutoff_classical_mhci, bdg_cutoff_alternative=bdg_cutoff_alternative_mhci, @@ -110,7 +100,7 @@ def get_annotations( category="CDN", ) adn = self.classify_adn_cdn( - score_mutation=mutated_peptide_mhci.affinity_score, + score_mutation=mutated_peptide_mhci.affinity_mutated, amplitude=amplitude.amplitude_mhci_affinity, bdg_cutoff_classical=bdg_cutoff_classical_mhci, bdg_cutoff_alternative=bdg_cutoff_alternative_mhci, @@ -119,11 +109,11 @@ def get_annotations( ) annotations = [ AnnotationFactory.build_annotation( - name="CDN_MHCI", + name="Classically_defined_neopeptide_MHCI", value=cdn ), AnnotationFactory.build_annotation( - name="ADN_MHCI", + name="Alternatively_defined_neopeptide_MHCI", value=adn ), ] @@ -138,9 +128,9 @@ def get_annotations_mhc2( amplitude_cutoff_mhcii = 4 cdn = None adn = None - if mutated_peptide_mhcii.peptide: + if mutated_peptide_mhcii.mutated_peptide: cdn = self.classify_adn_cdn( - score_mutation=mutated_peptide_mhcii.rank, + score_mutation=mutated_peptide_mhcii.rank_mutated, amplitude=amplitude.amplitude_mhcii_rank, bdg_cutoff_classical=bdg_cutoff_classical_mhcii, bdg_cutoff_alternative=bdg_cutoff_alternative_mhcii, @@ -148,7 +138,7 @@ def get_annotations_mhc2( category="CDN", ) adn = self.classify_adn_cdn( - score_mutation=mutated_peptide_mhcii.rank, + score_mutation=mutated_peptide_mhcii.rank_mutated, amplitude=amplitude.amplitude_mhcii_rank, bdg_cutoff_classical=bdg_cutoff_classical_mhcii, bdg_cutoff_alternative=bdg_cutoff_alternative_mhcii, @@ -158,11 +148,18 @@ def get_annotations_mhc2( annotations = [ AnnotationFactory.build_annotation( value=cdn, - name="CDN_MHCII", + name="Classically_defined_neopeptide_MHCII", ), AnnotationFactory.build_annotation( value=adn, - name="ADN_MHCII", + name="Alternatively_defined_neopeptide_MHCII", ), ] return annotations + + def get_annotations_epitope_mhci(self, epitope: PredictedEpitope) -> List[Annotation]: + return [ + AnnotationFactory.build_annotation( + value=self.dai(score_mutation=epitope.affinity_mutated, score_wild_type=epitope.affinity_wild_type), + name='DAI') + ] diff --git a/neofox/published_features/dissimilarity_garnish/dissimilaritycalculator.py b/neofox/published_features/dissimilarity_garnish/dissimilaritycalculator.py index effd8835..f4ea43bb 100755 --- a/neofox/published_features/dissimilarity_garnish/dissimilaritycalculator.py +++ b/neofox/published_features/dissimilarity_garnish/dissimilaritycalculator.py @@ -18,29 +18,24 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see .# from typing import List -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import PredictedEpitope from neofox.helpers.blastp_runner import BlastpRunner -from neofox.model.neoantigen import Annotation +from neofox.model.neoantigen import Annotation, PredictedEpitope from neofox.model.factories import AnnotationFactory -from neofox import AFFINITY_THRESHOLD_DEFAULT class DissimilarityCalculator: - def __init__(self, proteome_blastp_runner: BlastpRunner, affinity_threshold=AFFINITY_THRESHOLD_DEFAULT): - self.affinity_threshold = affinity_threshold + def __init__(self, proteome_blastp_runner: BlastpRunner): self.proteome_blastp_runner = proteome_blastp_runner - def calculate_dissimilarity(self, mutated_peptide, mhc_affinity): + def calculate_dissimilarity(self, epitope: PredictedEpitope): """ wrapper for dissimilarity calculation """ dissimilarity = None - if mutated_peptide != "-" and not mhc_affinity >= self.affinity_threshold: + if epitope.mutated_peptide != "-": similarity = self.proteome_blastp_runner.calculate_similarity_database( - peptide=mutated_peptide, - a=32, - ) + peptide=epitope.mutated_peptide, a=32) if similarity is not None: dissimilarity = 1 - similarity return dissimilarity @@ -52,14 +47,10 @@ def get_annotations( """ dissimilarity_mhci = None dissimilarity_mhcii = None - if mutated_peptide_mhci and mutated_peptide_mhci.peptide: - dissimilarity_mhci = self.calculate_dissimilarity( - mutated_peptide=mutated_peptide_mhci.peptide, - mhc_affinity=mutated_peptide_mhci.affinity_score ) - if mutated_peptide_mhcii and mutated_peptide_mhcii.peptide: - dissimilarity_mhcii = self.calculate_dissimilarity( - mutated_peptide=mutated_peptide_mhcii.peptide, - mhc_affinity=mutated_peptide_mhcii.affinity_score ) + if mutated_peptide_mhci and mutated_peptide_mhci.mutated_peptide: + dissimilarity_mhci = self.calculate_dissimilarity(epitope=mutated_peptide_mhci) + if mutated_peptide_mhcii and mutated_peptide_mhcii.mutated_peptide: + dissimilarity_mhcii = self.calculate_dissimilarity(epitope=mutated_peptide_mhcii) annotations = [ AnnotationFactory.build_annotation( value=dissimilarity_mhci, @@ -71,3 +62,10 @@ def get_annotations( ) ] return annotations + + def get_annotations_epitope(self, epitope: PredictedEpitope) -> List[Annotation]: + return [ + AnnotationFactory.build_annotation( + value=self.calculate_dissimilarity(epitope=epitope), + name='dissimilarity_score') + ] diff --git a/neofox/published_features/expression.py b/neofox/published_features/expression.py index 4100264b..9045384d 100755 --- a/neofox/published_features/expression.py +++ b/neofox/published_features/expression.py @@ -18,16 +18,12 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see .# from typing import List - -from neofox.model.neoantigen import Annotation +from logzero import logger +from neofox.model.neoantigen import Annotation, Neoantigen, Patient from neofox.model.factories import AnnotationFactory class Expression: - def __init__(self, transcript_expression, vaf_rna): - self.expression = self._get_expression_annotation( - transcript_expression, vaf_rna - ) @staticmethod def _get_expression_annotation( @@ -48,9 +44,13 @@ def _get_expression_annotation( pass return expression_mut - def get_annotations(self) -> List[Annotation]: + def get_annotations(self, neoantigen: Neoantigen) -> List[Annotation]: + + vaf = neoantigen.dna_variant_allele_frequency + if vaf is None or vaf == -1: + vaf = neoantigen.rna_variant_allele_frequency + return [ AnnotationFactory.build_annotation( - name="Expression_mutated_transcript", value=self.expression - ), - ] + name="Expression_mutated_transcript", value=self._get_expression_annotation( + transcript_expression=neoantigen.rna_expression, vaf_rna=vaf))] diff --git a/neofox/published_features/hex/hex.py b/neofox/published_features/hex/hex.py index af1ca7ca..116dfb5c 100755 --- a/neofox/published_features/hex/hex.py +++ b/neofox/published_features/hex/hex.py @@ -19,9 +19,7 @@ # along with this program. If not, see .# from typing import List import os - -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import PredictedEpitope -from neofox.model.neoantigen import Annotation +from neofox.model.neoantigen import Annotation, PredictedEpitope from neofox.model.factories import AnnotationFactory from neofox.references.references import ReferenceFolder @@ -57,18 +55,25 @@ def get_annotation( hex_aln_score_mhci = None hex_aln_score_mhcii = None # hex_b_score = None - if mutated_peptide_mhci and mutated_peptide_mhci.peptide: + if mutated_peptide_mhci and mutated_peptide_mhci.mutated_peptide: # hex_aln_score, hex_b_score = self.apply_hex(netmhcpan.best_epitope_by_affinity.peptide).split(" ") - hex_aln_score_mhci = self.apply_hex(mutated_peptide_mhci.peptide) - if mutated_peptide_mhcii and mutated_peptide_mhcii.peptide: - hex_aln_score_mhcii = self.apply_hex(mutated_peptide_mhcii.peptide) + hex_aln_score_mhci = self.apply_hex(mutated_peptide_mhci.mutated_peptide) + if mutated_peptide_mhcii and mutated_peptide_mhcii.mutated_peptide: + hex_aln_score_mhcii = self.apply_hex(mutated_peptide_mhcii.mutated_peptide) annotations = [ AnnotationFactory.build_annotation( - value=hex_aln_score_mhci, name="Hex_alignment_score_MHCI"), + value=hex_aln_score_mhci, name="HexAlignmentScore_MHCI"), AnnotationFactory.build_annotation( - value=hex_aln_score_mhcii, name="Hex_alignment_score_MHCII") + value=hex_aln_score_mhcii, name="HexAlignmentScore_MHCII") # AnnotationFactory.build_annotation( # value=hex_b_score, name="hex_B_score" #) ] return annotations + + def get_annotations_epitope(self, epitope: PredictedEpitope) -> List[Annotation]: + return [ + AnnotationFactory.build_annotation( + value=self.apply_hex(mut_peptide=epitope.mutated_peptide), + name='hex_alignment_score') + ] diff --git a/neofox/published_features/iedb_immunogenicity/iedb.py b/neofox/published_features/iedb_immunogenicity/iedb.py index 21351153..aaebabb3 100755 --- a/neofox/published_features/iedb_immunogenicity/iedb.py +++ b/neofox/published_features/iedb_immunogenicity/iedb.py @@ -16,12 +16,10 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see .# -from typing import List +from typing import List, Union from logzero import logger -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import PredictedEpitope -from neofox.model.neoantigen import Annotation, MhcAllele +from neofox.model.neoantigen import Annotation, MhcAllele, PredictedEpitope from neofox.model.factories import AnnotationFactory -from neofox import AFFINITY_THRESHOLD_DEFAULT immunoweight = [0.00, 0.00, 0.10, 0.31, 0.30, 0.29, 0.26, 0.18, 0.00] @@ -99,9 +97,6 @@ class IEDBimmunogenicity: - def __init__(self, affinity_threshold=AFFINITY_THRESHOLD_DEFAULT): - self.affinity_threshold = affinity_threshold - def predict_immunogenicity(self, pep, allele): custom_mask = allele_dict.get(allele, False) @@ -138,13 +133,13 @@ def predict_immunogenicity(self, pep, allele): return score def calculate_iedb_immunogenicity( - self, peptide, mhc_allele: MhcAllele, mhc_score + self, peptide, mhc_allele: Union[MhcAllele, None], mhc_score ): """This function determines the IEDB immunogenicity score""" score = None try: - if peptide != "-" and float(mhc_score) < self.affinity_threshold: - score = self.predict_immunogenicity(peptide, mhc_allele.name) + if peptide != "-": + score = self.predict_immunogenicity(peptide, mhc_allele.name if mhc_allele else None) logger.info(score) except (ValueError, AttributeError): pass @@ -157,17 +152,17 @@ def get_annotations( """ iedb = None iedb_mhcii = None - if mutated_peptide_mhci and mutated_peptide_mhci.peptide: + if mutated_peptide_mhci and mutated_peptide_mhci.mutated_peptide: iedb = self.calculate_iedb_immunogenicity( - peptide=mutated_peptide_mhci.peptide, - mhc_allele=mutated_peptide_mhci.hla, - mhc_score=mutated_peptide_mhci.affinity_score, + peptide=mutated_peptide_mhci.mutated_peptide, + mhc_allele=mutated_peptide_mhci.allele_mhc_i, + mhc_score=mutated_peptide_mhci.affinity_mutated, ) - if mutated_peptide_mhcii and mutated_peptide_mhcii.peptide: + if mutated_peptide_mhcii and mutated_peptide_mhcii.mutated_peptide: iedb_mhcii = self.calculate_iedb_immunogenicity( - peptide=mutated_peptide_mhcii.peptide, - mhc_allele=mutated_peptide_mhcii.hla, - mhc_score=mutated_peptide_mhcii.affinity_score, + peptide=mutated_peptide_mhcii.mutated_peptide, + mhc_allele=None, + mhc_score=mutated_peptide_mhcii.affinity_mutated, ) annotations = [ AnnotationFactory.build_annotation( @@ -180,3 +175,20 @@ def get_annotations( ) ] return annotations + + def get_annotations_epitope_mhcii(self, epitope: PredictedEpitope) -> List[Annotation]: + return [ + AnnotationFactory.build_annotation( + value=self.calculate_iedb_immunogenicity( + peptide=epitope.mutated_peptide, mhc_allele=None, mhc_score=epitope.affinity_mutated), + name='IEDB_Immunogenicity') + ] + + def get_annotations_epitope_mhci(self, epitope: PredictedEpitope) -> List[Annotation]: + return [ + AnnotationFactory.build_annotation( + value=self.calculate_iedb_immunogenicity( + peptide=epitope.mutated_peptide, mhc_allele=epitope.allele_mhc_i, + mhc_score=epitope.affinity_mutated), + name='IEDB_Immunogenicity') + ] diff --git a/neofox/published_features/neoantigen_fitness/neoantigen_fitness.py b/neofox/published_features/neoantigen_fitness/neoantigen_fitness.py index f97881a1..8d604c45 100755 --- a/neofox/published_features/neoantigen_fitness/neoantigen_fitness.py +++ b/neofox/published_features/neoantigen_fitness/neoantigen_fitness.py @@ -20,20 +20,18 @@ from typing import List from logzero import logger -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import PredictedEpitope from neofox.helpers.blastp_runner import BlastpRunner -from neofox.model.neoantigen import Annotation +from neofox.helpers.epitope_helper import EpitopeHelper +from neofox.model.neoantigen import Annotation, PredictedEpitope from neofox.model.factories import AnnotationFactory -from neofox import AFFINITY_THRESHOLD_DEFAULT class NeoantigenFitnessCalculator: - def __init__(self, iedb_blastp_runner: BlastpRunner, affinity_threshold=AFFINITY_THRESHOLD_DEFAULT): - self.affinity_threshold = affinity_threshold + def __init__(self, iedb_blastp_runner: BlastpRunner): self.iedb_blastp_runner = iedb_blastp_runner - def get_pathogen_similarity(self, peptide): + def get_pathogen_similarity(self, peptide: str): pathsim = self.iedb_blastp_runner.calculate_similarity_database(peptide=peptide) logger.info( "Peptide {} has a pathogen similarity of {}".format(peptide, pathsim) @@ -63,13 +61,7 @@ def calculate_amplitude_mhc( def _calculate_correction(self, score_wild_type): return 1 / (1 + 0.0003 * float(score_wild_type)) - def calculate_recognition_potential( - self, - amplitude: float, - pathogen_similarity: float, - mutation_in_anchor: bool, - mhc_affinity_mut: float = None, - ): + def calculate_recognition_potential(self, amplitude: float, pathogen_similarity: float): """ This function calculates the recognition potential, defined by the product of amplitude and pathogensimiliarity of an epitope according to Balachandran et al. @@ -79,48 +71,96 @@ def calculate_recognition_potential( """ recognition_potential = None try: - candidate_recognition_potential = amplitude * pathogen_similarity - if mhc_affinity_mut: - if not mutation_in_anchor and mhc_affinity_mut < self.affinity_threshold: - recognition_potential = candidate_recognition_potential - else: - if not mutation_in_anchor: - recognition_potential = candidate_recognition_potential + recognition_potential = amplitude * pathogen_similarity except (ValueError, TypeError): pass return recognition_potential def get_annotations( - self, mutated_peptide_mhci: PredictedEpitope, mutated_peptide_mhcii: PredictedEpitope, - amplitude, mutation_in_anchor + self, mutated_peptide_mhci: PredictedEpitope, mutated_peptide_mhcii: PredictedEpitope, amplitude ) -> List[Annotation]: pathogen_similarity_9mer = None pathogen_similarity_mhcii = None recognition_potential = None - if mutated_peptide_mhci and mutated_peptide_mhci.peptide: - pathogen_similarity_9mer = self.get_pathogen_similarity(peptide=mutated_peptide_mhci.peptide) + if mutated_peptide_mhci and mutated_peptide_mhci.mutated_peptide: + pathogen_similarity_9mer = self.get_pathogen_similarity(peptide=mutated_peptide_mhci.mutated_peptide) if pathogen_similarity_9mer is not None: recognition_potential = self.calculate_recognition_potential( amplitude=amplitude, - pathogen_similarity=pathogen_similarity_9mer, - mutation_in_anchor=mutation_in_anchor, - mhc_affinity_mut=mutated_peptide_mhci.affinity_score, + pathogen_similarity=pathogen_similarity_9mer ) - if mutated_peptide_mhcii and mutated_peptide_mhcii.peptide: - pathogen_similarity_mhcii = self.get_pathogen_similarity(peptide=mutated_peptide_mhcii.peptide) + if mutated_peptide_mhcii and mutated_peptide_mhcii.mutated_peptide: + pathogen_similarity_mhcii = self.get_pathogen_similarity(peptide=mutated_peptide_mhcii.mutated_peptide) annotations = [ AnnotationFactory.build_annotation( - name="Pathogensimiliarity_MHCI_9mer", + name="Pathogensimiliarity_MHCI_bestAffinity9mer", value=pathogen_similarity_9mer, ), AnnotationFactory.build_annotation( - name="Recognition_Potential_MHCI_9mer", + name="RecognitionPotential_MHCI_bestAffinity9mer", value=recognition_potential ), AnnotationFactory.build_annotation( - name="Pathogensimiliarity_MHCII", + name="Pathogensimiliarity_MHCII_bestAffinity", value=pathogen_similarity_mhcii ), ] return annotations + + def get_annotations_extended( + self, mutated_peptide_mhci: PredictedEpitope, amplitude + ) -> List[Annotation]: + pathogen_similarity = None + recognition_potential = None + + if mutated_peptide_mhci and mutated_peptide_mhci.mutated_peptide: + pathogen_similarity = self.get_pathogen_similarity(peptide=mutated_peptide_mhci.mutated_peptide) + if pathogen_similarity is not None: + recognition_potential = self.calculate_recognition_potential( + amplitude=amplitude, + pathogen_similarity=pathogen_similarity + ) + + annotations = [ + AnnotationFactory.build_annotation( + name="Pathogensimiliarity_MHCI_bestAffinity", + value=pathogen_similarity, + ), + AnnotationFactory.build_annotation( + name="RecognitionPotential_MHCI_bestAffinity", + value=recognition_potential + ), + ] + return annotations + + + + def get_annotations_epitope_mhcii(self, epitope: PredictedEpitope) -> List[Annotation]: + return [ + AnnotationFactory.build_annotation( + value=self.get_pathogen_similarity(peptide=epitope.mutated_peptide), + name='pathogen_similarity') + ] + + def get_annotations_epitope_mhci(self, epitope: PredictedEpitope) -> List[Annotation]: + # NOTE: this expects the annotations "amplitude" and "anchor_mutated" in the epitope annotations + pathogen_similarity = self.get_pathogen_similarity(peptide=epitope.mutated_peptide) + pathogen_similarity_annotation = AnnotationFactory.build_annotation( + value=pathogen_similarity, + name='pathogen_similarity') + try: + amplitude = float(EpitopeHelper.get_annotation_by_name( + epitope.neofox_annotations.annotations, name='amplitude')) + except ValueError: + return [ + pathogen_similarity_annotation + ] + return [ + pathogen_similarity_annotation, + AnnotationFactory.build_annotation( + value=self.calculate_recognition_potential( + amplitude=amplitude, pathogen_similarity=pathogen_similarity, + ), + name='recognition_potential') + ] diff --git a/neofox/published_features/priority_score.py b/neofox/published_features/priority_score.py index a3419ce1..832b3354 100755 --- a/neofox/published_features/priority_score.py +++ b/neofox/published_features/priority_score.py @@ -23,7 +23,7 @@ from typing import List from neofox.helpers.epitope_helper import EpitopeHelper -from neofox.model.neoantigen import Annotation +from neofox.model.neoantigen import Annotation, PredictedEpitope, Neoantigen, Patient from neofox.model.factories import AnnotationFactory from neofox.MHC_predictors.netmhcpan.combine_netmhcpan_pred_multiple_binders import ( BestAndMultipleBinder, @@ -43,7 +43,7 @@ def calc_logistic_function(self, mhc_score): def calc_priority_score( self, - vaf_tumor, + vaf_dna, vaf_rna, transcript_expr, no_mismatch, @@ -54,22 +54,23 @@ def calc_priority_score( """ This function calculates the Priority Score using parameters for mhc I. """ - l_mut = self.calc_logistic_function(score_mut) - l_wt = self.calc_logistic_function(score_wt) priority_score = None + vaf = None try: - if vaf_tumor is not None and vaf_tumor != -1: - priority_score = self.mupexi( - l_mut, - l_wt, - mut_not_in_prot, - no_mismatch, - transcript_expr, - vaf_tumor, - ) + if vaf_dna is not None and vaf_dna != -1: + vaf = vaf_dna elif vaf_rna is not None and vaf_rna != -1: + vaf = vaf_rna + if vaf: + l_mut = self.calc_logistic_function(score_mut) + l_wt = self.calc_logistic_function(score_wt) priority_score = self.mupexi( - l_mut, l_wt, mut_not_in_prot, no_mismatch, transcript_expr, vaf_rna + l_mut=l_mut, + l_wt=l_wt, + mut_not_in_prot=mut_not_in_prot, + no_mismatch=no_mismatch, + transcript_expr=transcript_expr, + vaf_tumor=vaf ) except (TypeError, ValueError): pass @@ -87,27 +88,29 @@ def get_annotations( self, netmhcpan: BestAndMultipleBinder, mut_not_in_prot, - expr, - vaf_tum, - vaf_transcr, + neoantigen: Neoantigen ) -> List[Annotation]: """ returns number of mismatches between best MHCI / MHC II epitopes (rank) and their corresponding WTs """ num_mismatches_mhc1 = None priority_score = None - if netmhcpan.best_wt_epitope_by_rank.peptide and netmhcpan.best_epitope_by_rank.peptide: + if netmhcpan.best_epitope_by_rank.wild_type_peptide and netmhcpan.best_epitope_by_rank.mutated_peptide: num_mismatches_mhc1 = EpitopeHelper.number_of_mismatches( - epitope_wild_type=netmhcpan.best_wt_epitope_by_rank.peptide, - epitope_mutation=netmhcpan.best_epitope_by_rank.peptide, + epitope_wild_type=netmhcpan.best_epitope_by_rank.wild_type_peptide, + epitope_mutation=netmhcpan.best_epitope_by_rank.mutated_peptide, ) + vaf_rna = neoantigen.dna_variant_allele_frequency + if vaf_rna is None: + vaf_rna = neoantigen.rna_variant_allele_frequency + priority_score = self.calc_priority_score( - vaf_tumor=vaf_tum, - vaf_rna=vaf_transcr, - transcript_expr=expr, + vaf_dna=neoantigen.dna_variant_allele_frequency, + vaf_rna=vaf_rna, + transcript_expr=neoantigen.rna_expression, no_mismatch=num_mismatches_mhc1, - score_mut=netmhcpan.best_epitope_by_rank.rank, - score_wt=netmhcpan.best_wt_epitope_by_rank.rank, + score_mut=netmhcpan.best_epitope_by_rank.rank_mutated, + score_wt=netmhcpan.best_epitope_by_rank.rank_wild_type, mut_not_in_prot=mut_not_in_prot, ) annotations = [ @@ -121,3 +124,20 @@ def get_annotations( ), ] return annotations + + def get_annotations_epitope_mhci(self, epitope: PredictedEpitope, vaf_tumor, transcript_exp, vaf_rna) -> \ + List[Annotation]: + return [ + AnnotationFactory.build_annotation( + value=self.calc_priority_score( + vaf_dna=vaf_tumor, + vaf_rna=vaf_rna, + transcript_expr=transcript_exp, + no_mismatch=int(EpitopeHelper.get_annotation_by_name( + epitope.neofox_annotations.annotations, name='number_of_mismatches')), + score_mut=epitope.rank_mutated, + score_wt=epitope.rank_wild_type, + mut_not_in_prot=bool(EpitopeHelper.get_annotation_by_name( + epitope.neofox_annotations.annotations, name='mutation_not_found_in_proteome'))), + name='Priority_score') + ] diff --git a/neofox/published_features/self_similarity/self_similarity.py b/neofox/published_features/self_similarity/self_similarity.py index 2b26e87c..2358a508 100755 --- a/neofox/published_features/self_similarity/self_similarity.py +++ b/neofox/published_features/self_similarity/self_similarity.py @@ -20,9 +20,8 @@ from typing import List import math import os -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import PredictedEpitope from neofox.model.validation import ModelValidator -from neofox.model.neoantigen import Annotation +from neofox.model.neoantigen import Annotation, PredictedEpitope from neofox.model.factories import AnnotationFactory @@ -137,27 +136,24 @@ def self_similarity_of_conserved_binder_only( return result def get_annnotations( - self, mutated_peptide_mhci: PredictedEpitope, wt_peptide_mhci: PredictedEpitope, - mutated_peptide_mhcii: PredictedEpitope, wt_peptide_mhcii: PredictedEpitope) -> List[Annotation]: + self, epitope_mhci: PredictedEpitope, epitope_mhcii: PredictedEpitope) -> List[Annotation]: improved_binding_mhci = None self_similarity_mhci = None self_similarity_mhcii = None - if mutated_peptide_mhci and wt_peptide_mhci and \ - mutated_peptide_mhci.peptide and wt_peptide_mhci.peptide: + if epitope_mhci and epitope_mhci.mutated_peptide and epitope_mhci.wild_type_peptide: improved_binding_mhci = self.is_improved_binder( - score_mutation=mutated_peptide_mhci.rank, - score_wild_type=wt_peptide_mhci.rank, + score_mutation=epitope_mhci.rank_mutated, + score_wild_type=epitope_mhci.rank_wild_type, ) self_similarity_mhci = self.get_self_similarity( - mutated_peptide=mutated_peptide_mhci.peptide, - wt_peptide=wt_peptide_mhci.peptide, + mutated_peptide=epitope_mhci.mutated_peptide, + wt_peptide=epitope_mhci.wild_type_peptide, ) - if mutated_peptide_mhcii and wt_peptide_mhcii and \ - mutated_peptide_mhcii.peptide and wt_peptide_mhcii.peptide: + if epitope_mhcii and epitope_mhcii.mutated_peptide and epitope_mhcii.wild_type_peptide: self_similarity_mhcii = self.get_self_similarity( - mutated_peptide=mutated_peptide_mhcii.peptide, - wt_peptide=wt_peptide_mhcii.peptide, + mutated_peptide=epitope_mhcii.mutated_peptide, + wt_peptide=epitope_mhcii.wild_type_peptide, ) annotations = [ AnnotationFactory.build_annotation( @@ -178,3 +174,26 @@ def get_annnotations( ), ] return annotations + + def get_annotations_epitope_mhcii(self, epitope: PredictedEpitope) -> List[Annotation]: + return [ + AnnotationFactory.build_annotation( + value=self.get_self_similarity( + mutated_peptide=epitope.mutated_peptide, wt_peptide=epitope.wild_type_peptide), + name='Selfsimilarity') + ] + + def get_annotations_epitope_mhci(self, epitope: PredictedEpitope) -> List[Annotation]: + is_improved_binder = self.is_improved_binder( + score_mutation=epitope.rank_mutated, score_wild_type=epitope.rank_wild_type) + self_similarity = self.get_self_similarity( + mutated_peptide=epitope.mutated_peptide, wt_peptide=epitope.wild_type_peptide) + + return [ + AnnotationFactory.build_annotation(value=is_improved_binder, name='Improved_Binder_MHCI'), + AnnotationFactory.build_annotation(value=self_similarity, name='Selfsimilarity'), + AnnotationFactory.build_annotation( + value=self.self_similarity_of_conserved_binder_only( + similarity=self_similarity, is_improved_binder=is_improved_binder), + name='Selfsimilarity_conserved_binder') + ] \ No newline at end of file diff --git a/neofox/published_features/vaxrank/vaxrank.py b/neofox/published_features/vaxrank/vaxrank.py index 13d6ecc0..21d317e4 100755 --- a/neofox/published_features/vaxrank/vaxrank.py +++ b/neofox/published_features/vaxrank/vaxrank.py @@ -27,15 +27,11 @@ import math from typing import List -from neofox.model.neoantigen import Annotation +from neofox.model.neoantigen import Annotation, PredictedEpitope from neofox.model.factories import AnnotationFactory class VaxRank: - def __init__(self): - self.total_binding_score = None - self.ranking_score = None - self.expression_score = None def logistic_epitope_score( self, ic50, midpoint=350.0, width=150.0, ic50_cutoff=5000.0 @@ -61,49 +57,40 @@ def logistic_epitope_score( return logistic / normalizer - def total_binding(self, mut_scores): + def total_binding(self, epitope_predictions: List[PredictedEpitope]): """ adapted from: https://github.com/openvax/vaxrank/blob/master/vaxrank/epitope_prediction.py sums up MHC binding scores of all possible neoepitope candidates, transformed with logistic function into values between 0 and 1 """ - mut_scores_logistic = [] - mut_scores_list = mut_scores.split("/") - # print mut_scores_list + mut_scores_logistic = 0 # logistic transformation and sum over all epitopes deriving from mutations - [ - mut_scores_logistic.append( - self.logistic_epitope_score(ic50=float(mhc_affinity)) - ) - for mhc_affinity in mut_scores_list - ] - # print mut_scores_logistic - return sum(mut_scores_logistic) + for p in epitope_predictions: + mut_scores_logistic += self.logistic_epitope_score(ic50=float(p.affinity_mutated)) + + return mut_scores_logistic - def combined_score(self): + def combined_score(self, expression_score, total_binding_score): """ adapted from: https://github.com/openvax/vaxrank/blob/master/vaxrank/epitope_prediction.py final ranking score implemented in VaxRank """ - # print "rank score: " + str(float(self.expression_score) * float(self.total_binding_score)) combined_score = None try: - combined_score = self.expression_score * self.total_binding_score + combined_score = float(expression_score) * total_binding_score except (ValueError, TypeError): pass return combined_score - def run(self, mutation_scores, expression_score): - self.expression_score = expression_score - self.total_binding_score = self.total_binding(mutation_scores) - self.ranking_score = self.combined_score() - - def get_annotations(self) -> List[Annotation]: + def get_annotations(self, epitope_predictions: List[PredictedEpitope], expression_score) -> List[Annotation]: + expression_score = expression_score + total_binding_score = self.total_binding(epitope_predictions) + ranking_score = self.combined_score(expression_score=expression_score, total_binding_score=total_binding_score) return [ AnnotationFactory.build_annotation( - value=self.total_binding_score, name="vaxrank_binding_score" + value=total_binding_score, name="Vaxrank_bindingScore" ), AnnotationFactory.build_annotation( - value=self.ranking_score, name="vaxrank_total_score" + value=ranking_score, name="Vaxrank_totalScore" ), ] diff --git a/neofox/references/references.py b/neofox/references/references.py index bb9470e4..94e09e04 100755 --- a/neofox/references/references.py +++ b/neofox/references/references.py @@ -28,6 +28,15 @@ from neofox.model.neoantigen import Mhc1Name, Mhc2GeneName, MhcAllele, Mhc2Name, Resource +DEFAULT_MAKEBLASTDB = 'makeblastdb' +DEFAULT_PRIME = 'PRIME' +DEFAULT_NETMHCPAN = 'netMHCpan' +DEFAULT_NETMHC2PAN = 'netMHCIIpan' +DEFAULT_RSCRIPT = 'Rscript' +DEFAULT_MIXMHCPRED = "MixMHCpred" +DEFAULT_MIXMHC2PRED = "MixMHC2pred_unix" +DEFAULT_BLASTP = "blastp" + ORGANISM_HOMO_SAPIENS = 'human' HOMO_SAPIENS_MHC_I_GENES = [Mhc1Name.A, Mhc1Name.B, Mhc1Name.C] HOMO_SAPIENS_MHC_II_GENES = [Mhc2GeneName.DPA1, Mhc2GeneName.DPB1, Mhc2GeneName.DQA1, Mhc2GeneName.DQB1, @@ -75,70 +84,78 @@ class AbstractDependenciesConfiguration: - def _check_and_load_binary(self, variable_name, optional=False): - variable_value = os.environ.get(variable_name) - if not optional and variable_value is None: + def _check_and_load_binary(self, variable_name, default_value=None, optional=False, path_search=True): + """ + Fetches the binary from the provided environment variable, if not available uses the default. + If the binary is not a path to an executable, it searches for it in the PATH + """ + + program = None + variable_value = os.environ.get(variable_name, default=default_value) + + if variable_value != '': + fpath, _ = os.path.split(variable_value) + if fpath: + # makes sure that the provided path is absolute + if not os.path.isabs(variable_value): + raise NeofoxConfigurationException( + "Please, use an absolute path in the environment variable ${}!".format(variable_name)) + # checks that it is executable + if os.path.isfile(variable_value) and os.access(variable_value, os.X_OK): + program = variable_value + elif path_search: + # if no path searches for this command in the path + for path in os.environ.get("PATH", "").split(os.pathsep): + exe_file = os.path.join(path, variable_value) + if os.path.isfile(exe_file) and os.access(exe_file, os.X_OK): + program = variable_value + break + + if not optional and program is None: raise NeofoxConfigurationException( - "Please, set the environment variable ${} pointing to the right binary!".format( + "Please, set the environment variable ${} pointing to the right binary and make sure to have " + "execution permissions!".format( variable_name ) ) - # checks that the file exists - if ( - variable_value is not None - ): # only optional variables can be None at this stage - if not os.path.exists(variable_value): - raise NeofoxConfigurationException( - "The provided binary '{}' in ${} does not exist!".format( - variable_value, variable_name - ) - ) - # checks that it is executable - if not os.access(variable_value, os.X_OK): - raise NeofoxConfigurationException( - "The provided binary '{}' in ${} is not executable!".format( - variable_value, variable_name - ) - ) - return variable_value + + return program class DependenciesConfiguration(AbstractDependenciesConfiguration): def __init__(self): - self.blastp = self._check_and_load_binary(neofox.NEOFOX_BLASTP_ENV) + self.blastp = self._check_and_load_binary(neofox.NEOFOX_BLASTP_ENV, default_value=DEFAULT_BLASTP) self.mix_mhc2_pred = self._check_and_load_binary( - neofox.NEOFOX_MIXMHC2PRED_ENV, optional=True - ) + neofox.NEOFOX_MIXMHC2PRED_ENV, default_value=DEFAULT_MIXMHC2PRED, optional=True, path_search=False) if self.mix_mhc2_pred is not None: self.mix_mhc2_pred_alleles_list = os.path.join( - os.path.dirname(self.mix_mhc2_pred), MIXMHC2PRED_AVAILABLE_ALLELES_FILE - ) + os.path.dirname(self.mix_mhc2_pred), MIXMHC2PRED_AVAILABLE_ALLELES_FILE) else: self.mix_mhc2_pred_alleles_list = None self.mix_mhc_pred = self._check_and_load_binary( - neofox.NEOFOX_MIXMHCPRED_ENV, optional=True - ) + neofox.NEOFOX_MIXMHCPRED_ENV, default_value=DEFAULT_MIXMHCPRED, optional=True, path_search=False) if self.mix_mhc_pred is not None: self.mix_mhc_pred_alleles_list = os.path.join( - os.path.dirname(self.mix_mhc_pred), "lib", MIXMHCPRED_AVAILABLE_ALLELES_FILE - ) + os.path.dirname(self.mix_mhc_pred), "lib", MIXMHCPRED_AVAILABLE_ALLELES_FILE) else: self.mix_mhc_pred_alleles_list = None - self.rscript = self._check_and_load_binary(neofox.NEOFOX_RSCRIPT_ENV) - self.net_mhc2_pan = self._check_and_load_binary(neofox.NEOFOX_NETMHC2PAN_ENV) - self.net_mhc_pan = self._check_and_load_binary(neofox.NEOFOX_NETMHCPAN_ENV) - self.prime = self._check_and_load_binary(neofox.NEOFOX_PRIME_ENV) - self.prime_alleles_list = os.path.join( - os.path.dirname(self.prime), "lib", PRIME_AVAILABLE_ALLELES_FILE - ) + self.rscript = self._check_and_load_binary(neofox.NEOFOX_RSCRIPT_ENV, default_value=DEFAULT_RSCRIPT) + self.net_mhc2_pan = self._check_and_load_binary(neofox.NEOFOX_NETMHC2PAN_ENV, default_value=DEFAULT_NETMHC2PAN) + self.net_mhc_pan = self._check_and_load_binary(neofox.NEOFOX_NETMHCPAN_ENV, default_value=DEFAULT_NETMHCPAN) + self.prime = self._check_and_load_binary(neofox.NEOFOX_PRIME_ENV, default_value=DEFAULT_PRIME, optional=True, + path_search=False) + if self.prime: + self.prime_alleles_list = os.path.join( + os.path.dirname(self.prime), "lib", PRIME_AVAILABLE_ALLELES_FILE + ) class DependenciesConfigurationForInstaller(AbstractDependenciesConfiguration): def __init__(self): - self.net_mhc2_pan = self._check_and_load_binary(neofox.NEOFOX_NETMHC2PAN_ENV) - self.net_mhc_pan = self._check_and_load_binary(neofox.NEOFOX_NETMHCPAN_ENV) - self.make_blastdb = self._check_and_load_binary(neofox.NEOFOX_MAKEBLASTDB_ENV) - self.rscript = self._check_and_load_binary(neofox.NEOFOX_RSCRIPT_ENV) + self.net_mhc2_pan = self._check_and_load_binary(neofox.NEOFOX_NETMHC2PAN_ENV, default_value=DEFAULT_NETMHC2PAN) + self.net_mhc_pan = self._check_and_load_binary(neofox.NEOFOX_NETMHCPAN_ENV, default_value=DEFAULT_NETMHCPAN) + self.make_blastdb = self._check_and_load_binary(neofox.NEOFOX_MAKEBLASTDB_ENV, default_value=DEFAULT_MAKEBLASTDB) + self.rscript = self._check_and_load_binary(neofox.NEOFOX_RSCRIPT_ENV, default_value=DEFAULT_RSCRIPT) class MhcDatabase(ABC): diff --git a/neofox/tests/fake_classes.py b/neofox/tests/fake_classes.py index 8d287231..3f26d331 100755 --- a/neofox/tests/fake_classes.py +++ b/neofox/tests/fake_classes.py @@ -22,10 +22,7 @@ from neofox.MHC_predictors.netmhcpan.combine_netmhcpan_pred_multiple_binders import ( BestAndMultipleBinder, ) -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import ( - PredictedEpitope, -) -from neofox.model.neoantigen import Resource +from neofox.model.neoantigen import Resource, PredictedEpitope from neofox.references.references import ( ReferenceFolder, AvailableAlleles, @@ -57,7 +54,7 @@ def get_available_alleles(self): class FakeDependenciesConfiguration(DependenciesConfiguration): - def _check_and_load_binary(self, variable_name, optional=False): + def _check_and_load_binary(self, variable_name, default_value=None, optional=False, path_search=False): return os.environ.get(variable_name, "some_non_empty_fake_value") diff --git a/neofox/tests/integration_tests/integration_test_tools.py b/neofox/tests/integration_tests/integration_test_tools.py index e8fb1896..3d44baf5 100755 --- a/neofox/tests/integration_tests/integration_test_tools.py +++ b/neofox/tests/integration_tests/integration_test_tools.py @@ -18,12 +18,16 @@ # along with this program. If not, see .# import random import tempfile +from unittest import TestCase import dotenv from Bio.Alphabet.IUPAC import IUPACData +from neofox.helpers.epitope_helper import EpitopeHelper from neofox.model.factories import MhcFactory -from neofox.references.references import ReferenceFolder, DependenciesConfiguration, ORGANISM_HOMO_SAPIENS +from neofox.model.neoantigen import PredictedEpitope, MhcAllele, Mhc2Isoform +from neofox.references.references import ReferenceFolder, DependenciesConfiguration, ORGANISM_HOMO_SAPIENS, \ + ORGANISM_MUS_MUSCULUS def load_references(organism=ORGANISM_HOMO_SAPIENS): @@ -118,3 +122,109 @@ def get_h2_two_test(h2_database): ("TTTDSDGKF", "OTTDSWGKF"), ("TTTDSDGKF", "ZTTDSWGKF") ] + + +class BaseIntegrationTest(TestCase): + + def setUp(self): + self.references, self.configuration = load_references() + self.references_mouse, self.configuration_mouse = load_references( + organism=ORGANISM_MUS_MUSCULUS) + + def _get_test_mhci_allele(self, allele) -> MhcAllele: + mhci = MhcFactory.build_mhc1_alleles([allele], mhc_database=self.references.get_mhc_database()) + return mhci[0].alleles[0] + + def _get_test_mhcii_isoform(self, isoform) -> Mhc2Isoform: + mhcii = MhcFactory.build_mhc2_alleles([isoform], mhc_database=self.references.get_mhc_database()) + return mhcii[0].isoforms[0] + + def assert_float_annotation(self, annotated_neoepitope, annotation_name): + annotation_value = EpitopeHelper.get_annotation_by_name( + annotated_neoepitope.neofox_annotations.annotations, annotation_name) + self.assertIsInstance(annotation_value, str) + self.assertIsInstance(float(annotation_value), float) + + def assert_annotation(self, annotated_neoepitope, annotation_name): + annotation_value = EpitopeHelper.get_annotation_by_name( + annotated_neoepitope.neofox_annotations.annotations, annotation_name) + self.assertIsInstance(annotation_value, str) + + def assert_neoepitope_mhci(self, original_neoepitope: PredictedEpitope, annotated_neoepitope: PredictedEpitope): + self.assertIsNotNone(annotated_neoepitope) + # input data remains the same + self.assertEqual(annotated_neoepitope.mutated_peptide, original_neoepitope.mutated_peptide) + if original_neoepitope.wild_type_peptide is not None and original_neoepitope.wild_type_peptide != '': + self.assertEqual(annotated_neoepitope.wild_type_peptide, original_neoepitope.wild_type_peptide) + else: + self.assertIsNotNone(annotated_neoepitope.wild_type_peptide) + self.assertNotEqual(annotated_neoepitope.wild_type_peptide, '') + self.assertEqual(annotated_neoepitope.allele_mhc_i.name, original_neoepitope.allele_mhc_i.name) + + # netMHCpan annotations + self.assertIsInstance(annotated_neoepitope.rank_mutated, float) + self.assertIsInstance(annotated_neoepitope.rank_wild_type, float) + self.assertIsInstance(annotated_neoepitope.affinity_mutated, float) + self.assertIsInstance(annotated_neoepitope.affinity_wild_type, float) + + # MixMHCpred annotations + self.assert_float_annotation(annotated_neoepitope, annotation_name="MixMHCpred_score") + self.assert_float_annotation(annotated_neoepitope, annotation_name="MixMHCpred_rank") + self.assert_float_annotation(annotated_neoepitope, annotation_name="MixMHCpred_WT_score") + self.assert_float_annotation(annotated_neoepitope, annotation_name="MixMHCpred_WT_rank") + + # PRIME annotations + self.assert_float_annotation(annotated_neoepitope, annotation_name="PRIME_score") + self.assert_float_annotation(annotated_neoepitope, annotation_name="PRIME_rank") + self.assert_float_annotation(annotated_neoepitope, annotation_name="PRIME_WT_score") + self.assert_float_annotation(annotated_neoepitope, annotation_name="PRIME_WT_rank") + + # additional annotations + self.assert_annotation(annotated_neoepitope, annotation_name="position_mutation") + self.assert_annotation(annotated_neoepitope, annotation_name="anchor_mutated") + self.assert_annotation(annotated_neoepitope, annotation_name="amplitude") + self.assert_annotation(annotated_neoepitope, annotation_name="pathogen_similarity") + self.assert_annotation(annotated_neoepitope, annotation_name="recognition_potential") + self.assert_annotation(annotated_neoepitope, annotation_name="DAI") + self.assert_annotation(annotated_neoepitope, annotation_name="Improved_Binder_MHCI") + self.assert_annotation(annotated_neoepitope, annotation_name="Selfsimilarity") + self.assert_annotation(annotated_neoepitope, annotation_name="Selfsimilarity_conserved_binder") + self.assert_annotation(annotated_neoepitope, annotation_name="mutation_not_found_in_proteome") + self.assert_annotation(annotated_neoepitope, annotation_name="dissimilarity_score") + self.assert_annotation(annotated_neoepitope, annotation_name="number_of_mismatches") + self.assert_annotation(annotated_neoepitope, annotation_name="IEDB_Immunogenicity") + self.assert_annotation(annotated_neoepitope, annotation_name="hex_alignment_score") + + # others to comes + self.assert_annotation(annotated_neoepitope, annotation_name="Priority_score") + self.assert_annotation(annotated_neoepitope, annotation_name="Tcell_predictor") + + def assert_neoepitope_mhcii(self, original_neoepitope: PredictedEpitope, annotated_neoepitope: PredictedEpitope): + self.assertIsNotNone(annotated_neoepitope) + # input data remains the same + self.assertEqual(annotated_neoepitope.mutated_peptide, original_neoepitope.mutated_peptide) + if original_neoepitope.wild_type_peptide is not None and original_neoepitope.wild_type_peptide != '': + self.assertEqual(annotated_neoepitope.wild_type_peptide, original_neoepitope.wild_type_peptide) + else: + self.assertIsNotNone(annotated_neoepitope.wild_type_peptide) + self.assertNotEqual(annotated_neoepitope.wild_type_peptide, '') + self.assertEqual(annotated_neoepitope.isoform_mhc_i_i.name, original_neoepitope.isoform_mhc_i_i.name) + + # netMHCpan annotations + self.assertIsInstance(annotated_neoepitope.rank_mutated, float) + self.assertIsInstance(annotated_neoepitope.rank_wild_type, float) + self.assertIsInstance(annotated_neoepitope.affinity_mutated, float) + self.assertIsInstance(annotated_neoepitope.affinity_wild_type, float) + + # MixMHCpred annotations + self.assert_float_annotation(annotated_neoepitope, annotation_name="MixMHC2pred_rank") + self.assert_float_annotation(annotated_neoepitope, annotation_name="MixMHC2pred_WT_rank") + + # additional annotations + self.assert_annotation(annotated_neoepitope, annotation_name="amplitude") + self.assert_annotation(annotated_neoepitope, annotation_name="pathogen_similarity") + self.assert_annotation(annotated_neoepitope, annotation_name="Selfsimilarity") + self.assert_annotation(annotated_neoepitope, annotation_name="mutation_not_found_in_proteome") + self.assert_annotation(annotated_neoepitope, annotation_name="dissimilarity_score") + self.assert_annotation(annotated_neoepitope, annotation_name="IEDB_Immunogenicity") + self.assert_annotation(annotated_neoepitope, annotation_name="hex_alignment_score") diff --git a/neofox/tests/integration_tests/test_alternative_mutation.py b/neofox/tests/integration_tests/test_alternative_mutation.py index 1db9f0c4..3193d5e1 100755 --- a/neofox/tests/integration_tests/test_alternative_mutation.py +++ b/neofox/tests/integration_tests/test_alternative_mutation.py @@ -31,7 +31,7 @@ BestAndMultipleBinderMhcII, ) from neofox.annotation_resources.uniprot.uniprot import Uniprot -from neofox.tests.tools import get_mutation +from neofox.tests.tools import get_neoantigen class TestBestMultipleBinder(TestCase): @@ -60,38 +60,34 @@ def test_best_multiple_mhc2_run(self): blastp_runner=self.proteome_blastp_runner ) # this is some valid example neoantigen candidate sequence - mutation = get_mutation( + mutation = get_neoantigen( # mutated_xmer="VVKWKFMVSTADPGSFTSRPACSSSAAPLGISQPRSSCTLPEPPLWSVPCPSCRKIYTACPSQEKNLKKPVPKSYLIHAGLEPLTFTNMFPSWEHRDDTAEITEMDMEVSNQITLVEDVLAKLCKTIYLLANLL", mutated_xmer="VVKWKFMVSTADPGSFTSRPACSSSAAPLGISQPRSSCTLPEPPLWSVPCPSCRKIYTA", wild_type_xmer=None, ) best_multiple.run( - mutation=mutation, + neoantigen=mutation, mhc2_alleles_patient=self.test_mhc_two, mhc2_alleles_available=self.available_alleles_mhc2, uniprot=self.uniprot ) - logger.info(best_multiple.best_predicted_epitope_rank.rank) - logger.info(best_multiple.best_predicted_epitope_affinity.affinity_score) - logger.info(best_multiple.best_predicted_epitope_rank.peptide) - logger.info(best_multiple.best_predicted_epitope_rank_wt.peptide) + logger.info(best_multiple.best_predicted_epitope_rank.rank_mutated) + logger.info(best_multiple.best_predicted_epitope_affinity.affinity_mutated) + logger.info(best_multiple.best_predicted_epitope_rank.mutated_peptide) + logger.info(best_multiple.best_predicted_epitope_rank.wild_type_peptide) logger.info(best_multiple.phbr_ii) - self.assertEqual(0.8, best_multiple.best_predicted_epitope_rank.rank) + self.assertEqual(0.8, best_multiple.best_predicted_epitope_rank.rank_mutated) self.assertEqual( - 185.02, best_multiple.best_predicted_epitope_affinity.affinity_score + 185.02, best_multiple.best_predicted_epitope_affinity.affinity_mutated ) self.assertEqual( - "VVKWKFMVSTADPGS", best_multiple.best_predicted_epitope_rank.peptide + "VVKWKFMVSTADPGS", best_multiple.best_predicted_epitope_rank.mutated_peptide ) self.assertEqual( - "ITPWRFKLSCMPPNS", best_multiple.best_predicted_epitope_rank_wt.peptide + "ITPWRFKLSCMPPNS", best_multiple.best_predicted_epitope_rank.wild_type_peptide ) self.assertIsNotNone(best_multiple.phbr_ii) self.assertAlmostEqual(2.9386450524753664, best_multiple.phbr_ii) - self.assertEqual( - best_multiple.best_predicted_epitope_rank.hla, - best_multiple.best_predicted_epitope_rank_wt.hla, - ) def test_best_multiple_run(self): best_multiple = BestAndMultipleBinder( @@ -99,26 +95,21 @@ def test_best_multiple_run(self): blastp_runner=self.proteome_blastp_runner ) # this is some valid example neoantigen candidate sequence - mutation = get_mutation( + mutation = get_neoantigen( mutated_xmer="VVKWKFMVSTADPGSFTSRPACSSSAAPLGISQPRSSCTLPEPPLWSVPCPSCRKIYTACPSQEKNLKKPVPKSYLIHAGLEPLTFTNMFPSWEHRDDTAEITEMDMEVSNQITLVEDVLAKLCKTIYLLANLL", wild_type_xmer=None, ) best_multiple.run( - mutation=mutation, + neoantigen=mutation, mhc1_alleles_patient=self.test_mhc_one, mhc1_alleles_available=self.available_alleles_mhc1, uniprot=self.uniprot, ) - self.assertEqual(17.79, best_multiple.best_epitope_by_affinity.affinity_score) - self.assertEqual('HLA-A*02:01', best_multiple.best_epitope_by_affinity.hla.name) - self.assertEqual(0.081, best_multiple.best_epitope_by_rank.rank) - self.assertEqual("HLA-A*02:01", best_multiple.best_epitope_by_rank.hla.name) - self.assertEqual("TLPEPPLWSV", best_multiple.best_epitope_by_rank.peptide) - self.assertEqual("SLPQPPITEV", best_multiple.best_wt_epitope_by_rank.peptide) - self.assertEqual( - best_multiple.best_ninemer_epitope_by_rank.hla.name, - best_multiple.best_ninemer_wt_epitope_by_rank.hla.name, - ) + self.assertEqual(17.79, best_multiple.best_epitope_by_affinity.affinity_mutated) + self.assertEqual('HLA-A*02:01', best_multiple.best_epitope_by_affinity.allele_mhc_i.name) + self.assertEqual(0.081, best_multiple.best_epitope_by_rank.rank_mutated) + self.assertEqual("HLA-A*02:01", best_multiple.best_epitope_by_rank.allele_mhc_i.name) + self.assertEqual("TLPEPPLWSV", best_multiple.best_epitope_by_rank.mutated_peptide) self.assertEqual(3, best_multiple.generator_rate_cdn) - self.assertAlmostEqual(0.23085258129451622, best_multiple.phbr_i) + self.assertAlmostEqual(0.22940380188017157, best_multiple.phbr_i) diff --git a/neofox/tests/integration_tests/test_api.py b/neofox/tests/integration_tests/test_api.py old mode 100644 new mode 100755 index e763a21c..69685ccc --- a/neofox/tests/integration_tests/test_api.py +++ b/neofox/tests/integration_tests/test_api.py @@ -1,9 +1,9 @@ from typing import List from unittest import TestCase - -from neofox.model.factories import NeoantigenFactory, PatientFactory +from neofox.model.factories import NeoantigenFactory, PatientFactory, NeoepitopeFactory from neofox.model.neoantigen import Neoantigen from neofox.neofox import NeoFox +from neofox.references.references import ORGANISM_HOMO_SAPIENS from neofox.tests.integration_tests import integration_test_tools @@ -47,3 +47,35 @@ def test_no_mhc2(self): self.assertEqual(len(annotated_neoantigens), 1) self.assertIsInstance(annotated_neoantigens[0], Neoantigen) self.assertGreater(len(annotated_neoantigens[0].neofox_annotations.annotations), 0) + + def test_build_neoepitope_mhc_i(self): + + neoepitope = NeoepitopeFactory.build_neoepitope( + mutated_peptide="AAAAFAAAA", + wild_type_peptide="AAAALAAAA", + allele_mhc_i='HLA-A*01:01', + organism=ORGANISM_HOMO_SAPIENS, + mhc_database=self.hla_database + ) + self.assertIsNotNone(neoepitope) + + def test_build_neoepitope_mhc_i_i(self): + + neoepitope = NeoepitopeFactory.build_neoepitope( + mutated_peptide="AAAAFAAAA", + wild_type_peptide="AAAALAAAA", + isoform_mhc_i_i='DRB1*01:01', + organism=ORGANISM_HOMO_SAPIENS, + mhc_database=self.hla_database + ) + self.assertIsNotNone(neoepitope) + + def test_build_neoepitope_without_mhc(self): + + neoepitope = NeoepitopeFactory.build_neoepitope( + mutated_peptide="AAAAFAAAA", + wild_type_peptide="AAAALAAAA", + patient_identifier='123', + mhc_database=self.hla_database + ) + self.assertIsNotNone(neoepitope) diff --git a/neofox/tests/integration_tests/test_best_multiple_binder.py b/neofox/tests/integration_tests/test_best_multiple_binder.py index b95b0f5e..d9e6693b 100755 --- a/neofox/tests/integration_tests/test_best_multiple_binder.py +++ b/neofox/tests/integration_tests/test_best_multiple_binder.py @@ -19,6 +19,7 @@ from logzero import logger from unittest import TestCase from neofox.helpers.blastp_runner import BlastpRunner +from neofox.helpers.epitope_helper import EpitopeHelper from neofox.model.factories import MhcFactory from neofox.model.mhc_parser import MhcParser import neofox.tests.integration_tests.integration_test_tools as integration_test_tools @@ -32,7 +33,7 @@ ) from neofox.MHC_predictors.netmhcpan.netmhcIIpan_prediction import NetMhcIIPanPredictor from neofox.annotation_resources.uniprot.uniprot import Uniprot -from neofox.tests.tools import get_mutation +from neofox.tests.tools import get_neoantigen class TestBestMultipleBinder(TestCase): @@ -60,26 +61,21 @@ def test_best_multiple_run(self): blastp_runner=self.proteome_blastp_runner ) # this is some valid example neoantigen candidate sequence - mutation = get_mutation( + mutation = get_neoantigen( mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET", ) best_multiple.run( - mutation=mutation, + neoantigen=mutation, mhc1_alleles_patient=self.test_mhc_one, mhc1_alleles_available=self.available_alleles_mhc1, uniprot=self.uniprot, ) - self.assertEqual(602.12, best_multiple.best_epitope_by_affinity.affinity_score) - self.assertEqual('HLA-A*02:01', best_multiple.best_epitope_by_affinity.hla.name) - self.assertEqual(0.492, best_multiple.best_epitope_by_rank.rank) - self.assertEqual('HLA-A*02:01', best_multiple.best_epitope_by_rank.hla.name) - self.assertEqual("ILVTDQTRL", best_multiple.best_epitope_by_rank.peptide) - self.assertEqual( - best_multiple.best_ninemer_epitope_by_rank.hla.name, - best_multiple.best_ninemer_wt_epitope_by_rank.hla.name, - ) - + self.assertEqual(602.12, best_multiple.best_epitope_by_affinity.affinity_mutated) + self.assertEqual('HLA-A*02:01', best_multiple.best_epitope_by_affinity.allele_mhc_i.name) + self.assertEqual(0.492, best_multiple.best_epitope_by_rank.rank_mutated) + self.assertEqual('HLA-A*02:01', best_multiple.best_epitope_by_rank.allele_mhc_i.name) + self.assertEqual("ILVTDQTRL", best_multiple.best_epitope_by_rank.mutated_peptide) def test_phbr1(self): best_multiple = BestAndMultipleBinder( @@ -90,16 +86,15 @@ def test_phbr1(self): runner=self.runner, configuration=self.configuration, mhc_parser=self.mhc_parser, blastp_runner=self.proteome_blastp_runner ) - mutation = get_mutation( + mutation = get_neoantigen( mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET", ) # all alleles = heterozygous - predictions = netmhcpan.mhc_prediction( - self.test_mhc_one, self.available_alleles_mhc1, mutation.mutated_xmer - ) + available_alleles = netmhcpan.get_only_available_alleles(self.test_mhc_one, self.available_alleles_mhc1) + predictions = netmhcpan.mhc_prediction(available_alleles, mutation.mutated_xmer) - predicted_neoepitopes = netmhcpan.remove_peptides_in_proteome( + predicted_neoepitopes = EpitopeHelper.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot ) best_epitopes_per_allele = ( @@ -121,11 +116,10 @@ def test_phbr1(self): "HLA-C*05:01", ], self.hla_database ) - predictions = netmhcpan.mhc_prediction( - self.test_mhc_one, self.available_alleles_mhc1, mutation.mutated_xmer - ) - predicted_neoepitopes = netmhcpan.remove_peptides_in_proteome( + predictions = netmhcpan.mhc_prediction(available_alleles, mutation.mutated_xmer) + + predicted_neoepitopes = EpitopeHelper.remove_peptides_in_proteome( predictions=predictions,uniprot=self.uniprot ) best_epitopes_per_allele = ( @@ -141,10 +135,8 @@ def test_phbr1(self): ["HLA-A*24:02", "HLA-A*02:01", "HLA-B*15:01", "HLA-B*44:02", "HLA-C*05:01"], self.hla_database ) - predictions = netmhcpan.mhc_prediction( - self.test_mhc_one, self.available_alleles_mhc1, mutation.mutated_xmer - ) - predicted_neoepitopes = netmhcpan.remove_peptides_in_proteome( + predictions = netmhcpan.mhc_prediction(available_alleles, mutation.mutated_xmer) + predicted_neoepitopes = EpitopeHelper.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot ) best_epitopes_per_allele = ( @@ -161,12 +153,12 @@ def test_best_multiple_mhc2_run(self): blastp_runner=self.proteome_blastp_runner ) # this is some valid example neoantigen candidate sequence - mutation = get_mutation( + mutation = get_neoantigen( mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET", ) best_multiple.run( - mutation=mutation, + neoantigen=mutation, mhc2_alleles_patient=self.test_mhc_two, mhc2_alleles_available=self.available_alleles_mhc2, uniprot=self.uniprot @@ -174,12 +166,12 @@ def test_best_multiple_mhc2_run(self): logger.info(best_multiple.best_predicted_epitope_rank) logger.info(best_multiple.best_predicted_epitope_affinity) logger.info(best_multiple.phbr_ii) - self.assertEqual(3.26, best_multiple.best_predicted_epitope_rank.rank) + self.assertEqual(3.26, best_multiple.best_predicted_epitope_rank.rank_mutated) self.assertEqual( - 1103.46, best_multiple.best_predicted_epitope_affinity.affinity_score + 1103.46, best_multiple.best_predicted_epitope_affinity.affinity_mutated ) self.assertEqual( - "SQDILVTDQTRLEAT", best_multiple.best_predicted_epitope_rank.peptide + "SQDILVTDQTRLEAT", best_multiple.best_predicted_epitope_rank.mutated_peptide ) def test_phbr2(self): @@ -191,7 +183,7 @@ def test_phbr2(self): runner=self.runner, configuration=self.configuration, mhc_parser=self.mhc_parser, blastp_runner=self.proteome_blastp_runner ) - mutation = get_mutation( + mutation = get_neoantigen( mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET", ) @@ -203,7 +195,7 @@ def test_phbr2(self): predictions = netmhc2pan.mhc2_prediction( patient_mhc2_isoforms, mutation.mutated_xmer ) - filtered_predictions = netmhc2pan.remove_peptides_in_proteome( + filtered_predictions = EpitopeHelper.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot ) logger.info(filtered_predictions) @@ -238,7 +230,7 @@ def test_phbr2(self): predictions = netmhc2pan.mhc2_prediction( patient_mhc2_isoforms, mutation.mutated_xmer ) - filtered_predictions = netmhc2pan.remove_peptides_in_proteome( + filtered_predictions = EpitopeHelper.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot ) best_predicted_epitopes_per_alelle = ( @@ -273,7 +265,7 @@ def test_phbr2(self): predictions = netmhc2pan.mhc2_prediction( patient_mhc2_isoforms, mutation.mutated_xmer ) - filtered_predictions = netmhc2pan.remove_peptides_in_proteome( + filtered_predictions = EpitopeHelper.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot ) best_predicted_epitopes_per_alelle = ( @@ -295,34 +287,23 @@ def test_generator_rate(self): runner=self.runner, configuration=self.configuration, mhc_parser=self.mhc_parser, blastp_runner=self.proteome_blastp_runner ) - mutation = get_mutation( + mutation = get_neoantigen( mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET", ) # all alleles = heterozygous - predictions = netmhcpan.mhc_prediction( - self.test_mhc_one, self.available_alleles_mhc1, mutation.mutated_xmer - ) - - predictions_wt = netmhcpan.mhc_prediction( - self.test_mhc_one, self.available_alleles_mhc1, mutation.wild_type_xmer - ) + available_alleles = netmhcpan.get_only_available_alleles(self.test_mhc_one, self.available_alleles_mhc1) + predictions = netmhcpan.mhc_prediction(available_alleles, mutation.mutated_xmer) + predictions_wt = netmhcpan.mhc_prediction(available_alleles, mutation.wild_type_xmer) - predicted_neoepitopes = netmhcpan.remove_peptides_in_proteome( - predictions=predictions, uniprot=self.uniprot - ) - filtered_predictions_wt = netmhcpan.filter_peptides_covering_snv( - position_of_mutation=mutation.position, predictions=predictions_wt - ) + predicted_neoepitopes = EpitopeHelper.remove_peptides_in_proteome(predictions=predictions, uniprot=self.uniprot) + filtered_predictions_wt = EpitopeHelper.filter_peptides_covering_snv( + position_of_mutation=mutation.position, predictions=predictions_wt) + paired_predictions = EpitopeHelper.pair_predictions( + predictions=predicted_neoepitopes, predictions_wt=filtered_predictions_wt) - generator_rate_ADN = best_multiple.determine_number_of_alternative_binders( - predictions=predicted_neoepitopes, predictions_wt=filtered_predictions_wt - ) - generator_rate_CDN = best_multiple.determine_number_of_binders( - predictions=predicted_neoepitopes, threshold=50 - ) - logger.info(generator_rate_ADN) - logger.info(generator_rate_CDN) + generator_rate_ADN = best_multiple.determine_number_of_alternative_binders(predictions=paired_predictions) + generator_rate_CDN = best_multiple.determine_number_of_binders(predictions=paired_predictions, threshold=50) self.assertEqual(generator_rate_ADN, 0) self.assertEqual(generator_rate_CDN, 0) @@ -336,7 +317,7 @@ def test_generator_rate_mhcII(self): runner=self.runner, configuration=self.configuration, mhc_parser=self.mhc_parser, blastp_runner=self.proteome_blastp_runner ) - mutation = get_mutation( + mutation = get_neoantigen( mutated_xmer="RTNLLAALHRSVRWRAADQGHRSAFLV", wild_type_xmer="RTNLLAALHRSVRRRAADQGHRSAFLV", ) @@ -353,18 +334,17 @@ def test_generator_rate_mhcII(self): patient_mhc2_isoforms, mutation.wild_type_xmer ) - predicted_neoepitopes = netmhc2pan.remove_peptides_in_proteome( + predicted_neoepitopes = EpitopeHelper.remove_peptides_in_proteome( predictions=predictions, uniprot=self.uniprot ) - filtered_predictions_wt = netmhc2pan.filter_peptides_covering_snv( + filtered_predictions_wt = EpitopeHelper.filter_peptides_covering_snv( position_of_mutation=mutation.position, predictions=predictions_wt ) - generator_rate_ADN = best_multiple.determine_number_of_alternative_binders( - predictions=predicted_neoepitopes, predictions_wt=filtered_predictions_wt - ) - generator_rate_CDN = best_multiple.determine_number_of_binders( - predictions=predicted_neoepitopes - ) - self.assertEqual(generator_rate_ADN, 0) + paired_predictions = EpitopeHelper.pair_predictions( + predictions=predicted_neoepitopes, predictions_wt=filtered_predictions_wt) + + generator_rate_ADN = best_multiple.determine_number_of_alternative_binders(predictions=paired_predictions) + generator_rate_CDN = best_multiple.determine_number_of_binders(predictions=paired_predictions) + self.assertEqual(generator_rate_ADN, 6) self.assertEqual(generator_rate_CDN, 0) diff --git a/neofox/tests/integration_tests/test_dissimilarity.py b/neofox/tests/integration_tests/test_dissimilarity.py index 488df4cb..118a22df 100755 --- a/neofox/tests/integration_tests/test_dissimilarity.py +++ b/neofox/tests/integration_tests/test_dissimilarity.py @@ -22,6 +22,7 @@ import neofox.tests.integration_tests.integration_test_tools as integration_test_tools from neofox.helpers.blastp_runner import BlastpRunner from neofox.helpers.runner import Runner +from neofox.model.neoantigen import PredictedEpitope from neofox.published_features.dissimilarity_garnish.dissimilaritycalculator import ( DissimilarityCalculator, ) @@ -38,44 +39,41 @@ def setUp(self): def test_dissimilar_sequences(self): result = DissimilarityCalculator(proteome_blastp_runner=self.proteome_blastp_runner).calculate_dissimilarity( - mutated_peptide="tocino", mhc_affinity=600) + PredictedEpitope(mutated_peptide="tocino", affinity_mutated=600)) self.assertEqual(1, result) def test_similar_sequences(self): result = DissimilarityCalculator(proteome_blastp_runner=self.proteome_blastp_runner).calculate_dissimilarity( - mutated_peptide="DDDDDD", mhc_affinity=600) + PredictedEpitope(mutated_peptide="DDDDDD", affinity_mutated=600)) self.assertTrue(result < 0.000001) def test_missing_aminoacid_change(self): result = DissimilarityCalculator(proteome_blastp_runner=self.proteome_blastp_runner).calculate_dissimilarity( - mutated_peptide="DDUDDD", mhc_affinity=600) + PredictedEpitope(mutated_peptide="DDUDDD", affinity_mutated=600)) self.assertIsNone(result) def test_dissimilarity_mhcii(self): # peptide with point mutation result = DissimilarityCalculator(proteome_blastp_runner=self.proteome_blastp_runner).calculate_dissimilarity( - mutated_peptide="LGLSDSQFLQTFLFM", mhc_affinity=430) + PredictedEpitope(mutated_peptide="LGLSDSQFLQTFLFM", affinity_mutated=430)) self.assertEqual(result, 0) # unsimmilar peptide result = DissimilarityCalculator(proteome_blastp_runner=self.proteome_blastp_runner).calculate_dissimilarity( - mutated_peptide="LELERVLVQY", mhc_affinity=430) + PredictedEpitope(mutated_peptide="LELERVLVQY", affinity_mutated=430)) self.assertAlmostEqual(0.0038214427855995936, result) def test_dissimilar_sequences(self): result = DissimilarityCalculator(proteome_blastp_runner=self.proteome_blastp_runner).calculate_dissimilarity( - mutated_peptide="tocino", mhc_affinity=600) + PredictedEpitope(mutated_peptide="tocino", affinity_mutated=600)) self.assertEqual(1, result) def test_affinity_threshold(self): # peptide with point mutation - dissimilariyty_calculator = DissimilarityCalculator( - proteome_blastp_runner=self.proteome_blastp_runner, - affinity_threshold=1000 - ) + dissimilariyty_calculator = DissimilarityCalculator(proteome_blastp_runner=self.proteome_blastp_runner) result = dissimilariyty_calculator.calculate_dissimilarity( - mutated_peptide="LGLSDSQFLQTFLFM", mhc_affinity=1030) - self.assertIsNone(result) + PredictedEpitope(mutated_peptide="LGLSDSQFLQTFLFM", affinity_mutated=1030)) + self.assertIsNotNone(result) result = dissimilariyty_calculator.calculate_dissimilarity( - mutated_peptide="LGLSDSQFLQTFLFM", mhc_affinity=530) + PredictedEpitope(mutated_peptide="LGLSDSQFLQTFLFM", affinity_mutated=530)) self.assertIsNotNone(result) diff --git a/neofox/tests/integration_tests/test_mixmhcpred.py b/neofox/tests/integration_tests/test_mixmhcpred.py index 01268e6f..ece5f62a 100755 --- a/neofox/tests/integration_tests/test_mixmhcpred.py +++ b/neofox/tests/integration_tests/test_mixmhcpred.py @@ -24,11 +24,11 @@ from neofox.model.neoantigen import Mhc2Name from neofox.helpers.epitope_helper import EpitopeHelper import neofox.tests.integration_tests.integration_test_tools as integration_test_tools -from neofox.MHC_predictors.MixMHCpred.mixmhc2pred import MixMhc2Pred +from neofox.MHC_predictors.MixMHCpred.mixmhc2pred import MixMHC2pred from neofox.MHC_predictors.MixMHCpred.mixmhcpred import MixMHCpred from neofox.helpers.runner import Runner from neofox.annotation_resources.uniprot.uniprot import Uniprot -from neofox.tests.tools import get_mutation +from neofox.tests.tools import get_neoantigen class TestMixMHCPred(TestCase): @@ -39,7 +39,7 @@ def setUp(self): self.mixmhcpred = MixMHCpred( runner=self.runner, configuration=self.configuration, mhc_parser=mhc_parser ) - self.mixmhc2pred = MixMhc2Pred( + self.mixmhc2pred = MixMHC2pred( runner=self.runner, configuration=self.configuration, mhc_parser=mhc_parser ) self.hla_database = self.references.get_mhc_database() @@ -49,119 +49,118 @@ def setUp(self): def test_mixmhcpred_epitope_iedb(self): # this is an epitope from IEDB of length 9 - mutation = get_mutation(mutated_xmer="NLVPMVATV", wild_type_xmer="NLVPIVATV") - best_peptide, best_rank, best_allele, best_score = self.mixmhcpred.run( - mutation=mutation, mhc=self.test_mhc_one, uniprot=self.uniprot - ) - self.assertEquals("NLVPMVATV", best_peptide) - self.assertAlmostEqual(0.306957, best_score, delta=0.00001) - self.assertEquals(0.6, best_rank) - self.assertEquals("HLA-A*02:01", best_allele) + mutation = get_neoantigen(mutated_xmer="NLVPMVATV", wild_type_xmer="NLVPIVATV") + self.mixmhcpred.run(neoantigen=mutation, mhc=self.test_mhc_one, uniprot=self.uniprot) + best_result = EpitopeHelper.select_best_by_affinity( + predictions=self.mixmhcpred.results, maximum=True) + self.assertEquals("NLVPMVATV", best_result.mutated_peptide) + self.assertAlmostEqual(0.306957, best_result.affinity_mutated, delta=0.00001) + self.assertEquals(0.6, best_result.rank_mutated) + self.assertEquals("HLA-A*02:01", best_result.allele_mhc_i.name) def test_mixmhcpred_too_small_epitope(self): - mutation = get_mutation(mutated_xmer="NLVP", wild_type_xmer="NLNP") - best_peptide, best_rank, best_allele, best_score = self.mixmhcpred.run( - mutation=mutation, mhc=self.test_mhc_one, uniprot=self.uniprot - ) - self.assertIsNone(best_peptide) - self.assertIsNone(best_score) - self.assertIsNone(best_rank) - self.assertIsNone(best_allele) + mutation = get_neoantigen(mutated_xmer="NLVP", wild_type_xmer="NLNP") + self.mixmhcpred.run(neoantigen=mutation, mhc=self.test_mhc_one, uniprot=self.uniprot) + best_result = EpitopeHelper.select_best_by_affinity( + predictions=self.mixmhcpred.results, maximum=True) + self.assertIsNone(best_result.mutated_peptide) + self.assertIsNone(best_result.rank_mutated) + self.assertIsNone(best_result.allele_mhc_i.name) + self.assertIsNone(best_result.affinity_mutated) def test_mixmhcpred_not_supported_allele(self): """ this is a combination of neoepitope and HLA alleles from Balachandran """ - mutation = get_mutation(mutated_xmer="SIYGGLVLI", wild_type_xmer="PIYGGLVLI") - best_peptide, best_rank, best_allele, best_score = self.mixmhcpred.run( - mutation=mutation, + mutation = get_neoantigen(mutated_xmer="SIYGGLVLI", wild_type_xmer="PIYGGLVLI") + self.mixmhcpred.run( + neoantigen=mutation, mhc=MhcFactory.build_mhc1_alleles(["A02:01", "B44:02", "C05:17", "C05:01"], self.hla_database), uniprot=self.uniprot ) - self.assertEqual('SIYGGLVLI', best_peptide) - self.assertEqual(0.15829400000000002, best_score) - self.assertEqual(1, best_rank) - self.assertEqual('HLA-A*02:01', best_allele) + best_result = EpitopeHelper.select_best_by_affinity( + predictions=self.mixmhcpred.results, maximum=True) + self.assertEqual('SIYGGLVLI', best_result.mutated_peptide) + self.assertAlmostEqual(0.158294, best_result.affinity_mutated, places=5) + self.assertEqual(1, best_result.rank_mutated) + self.assertEqual('HLA-A*02:01', best_result.allele_mhc_i.name) def test_mixmhcpred_rare_aminoacid(self): for wild_type_xmer, mutated_xmer in integration_test_tools.mutations_with_rare_aminoacids: - mutation = get_mutation(mutated_xmer=mutated_xmer, wild_type_xmer=wild_type_xmer) - best_peptide, best_rank, best_allele, best_score = self.mixmhcpred.run( - mutation=mutation, mhc=self.test_mhc_one, - uniprot=self.uniprot - ) + mutation = get_neoantigen(mutated_xmer=mutated_xmer, wild_type_xmer=wild_type_xmer) + self.mixmhcpred.run(neoantigen=mutation, mhc=self.test_mhc_one, uniprot=self.uniprot) + best_result = EpitopeHelper.select_best_by_affinity( + predictions=self.mixmhcpred.results, maximum=True) # rare aminoacids only return empty results when in the mutated sequence if EpitopeHelper.contains_rare_amino_acid(mutated_xmer): - self.assertIsNone(best_peptide) - self.assertIsNone(best_rank) - self.assertIsNone(best_allele) - self.assertIsNone(best_score) + self.assertIsNone(best_result.mutated_peptide) + self.assertIsNone(best_result.rank_mutated) + self.assertIsNone(best_result.allele_mhc_i.name) + self.assertIsNone(best_result.affinity_mutated) else: - self.assertIsNotNone(best_peptide) - self.assertIsNotNone(best_rank) - self.assertIsNotNone(best_allele) - self.assertIsNotNone(best_score) - + self.assertIsNotNone(best_result.mutated_peptide) + self.assertIsNotNone(best_result.rank_mutated) + self.assertIsNotNone(best_result.allele_mhc_i) + self.assertIsNotNone(best_result.affinity_mutated) def test_mixmhcpred2_epitope_iedb(self): # this is an epitope from IEDB of length 15 - mutation = get_mutation( + neoantigen = get_neoantigen( mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET") - best_peptide, best_rank, best_allele = self.mixmhc2pred.run( - mutation=mutation, mhc=self.test_mhc_two, + self.mixmhc2pred.run( + neoantigen=neoantigen, mhc=self.test_mhc_two, uniprot=self.uniprot ) - self.assertEquals("DEVLGEPSQDILVT", best_peptide) - self.assertEquals(3.06, best_rank) - self.assertEquals("HLA-DPA1*01:03-DPB1*04:01", best_allele) + best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results) + self.assertEquals("DEVLGEPSQDILVT", best_result.mutated_peptide) + self.assertEquals(3.06, best_result.rank_mutated) + self.assertEquals("HLA-DPA1*01:03-DPB1*04:01", best_result.isoform_mhc_i_i.name) def test_mixmhcpred2_epitope_iedb_forcing_no_drb1(self): # this is an epitope from IEDB of length 15 - mutation = get_mutation( + neoantigen = get_neoantigen( mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET") - best_peptide, best_rank, best_allele = self.mixmhc2pred.run( + self.mixmhc2pred.run( # forces no DRB1 allele to get as a result one of the composite isoforms - mutation=mutation, mhc=[m for m in self.test_mhc_two if m.name != Mhc2Name.DR], + neoantigen=neoantigen, mhc=[m for m in self.test_mhc_two if m.name != Mhc2Name.DR], uniprot=self.uniprot ) - self.assertEquals("DEVLGEPSQDILVT", best_peptide) - self.assertEquals(3.06, best_rank) - self.assertEquals("HLA-DPA1*01:03-DPB1*04:01", best_allele) + best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results) + self.assertEquals("DEVLGEPSQDILVT", best_result.mutated_peptide) + self.assertEquals(3.06, best_result.rank_mutated) + self.assertEquals("HLA-DPA1*01:03-DPB1*04:01", best_result.isoform_mhc_i_i.name) def test_mixmhcpred2_too_small_epitope(self): - mutation = get_mutation(mutated_xmer="ENPVVHFF", wild_type_xmer="ENPVVHFF") - best_peptide, best_rank, best_allele = self.mixmhc2pred.run( - mutation=mutation, mhc=self.test_mhc_two, uniprot=self.uniprot - ) - self.assertIsNone(best_peptide) - self.assertIsNone(best_rank) - self.assertIsNone(best_allele) + neoantigen = get_neoantigen(mutated_xmer="ENPVVHFF", wild_type_xmer="ENPVVHFF") + self.mixmhc2pred.run(neoantigen=neoantigen, mhc=self.test_mhc_two, uniprot=self.uniprot) + best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results) + self.assertIsNone(best_result.mutated_peptide) + self.assertIsNone(best_result.rank_mutated) + self.assertIsNone(best_result.isoform_mhc_i_i.name) def test_mixmhcpred2_no_mutation(self): for wild_type_xmer, mutated_xmer in integration_test_tools.mutations_with_rare_aminoacids: - mutation = get_mutation(mutated_xmer=mutated_xmer, wild_type_xmer=wild_type_xmer) - best_peptide, best_rank, best_allele = self.mixmhc2pred.run( - mutation=mutation, mhc=self.test_mhc_two, uniprot=self.uniprot - ) - self.assertIsNone(best_peptide) - self.assertIsNone(best_rank) - self.assertIsNone(best_allele) + neoantigen = get_neoantigen(mutated_xmer=mutated_xmer, wild_type_xmer=wild_type_xmer) + self.mixmhc2pred.run(neoantigen=neoantigen, mhc=self.test_mhc_two, uniprot=self.uniprot) + best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results) + self.assertIsNone(best_result.mutated_peptide) + self.assertIsNone(best_result.rank_mutated) + self.assertIsNone(best_result.isoform_mhc_i_i.name) def test_mixmhc2pred_rare_aminoacid(self): # this is an epitope from IEDB of length 9 - mutation = get_mutation(mutated_xmer="XTTDSWGKF", wild_type_xmer="XTTDSDGKF") - best_peptide, best_rank, best_allele = self.mixmhc2pred.run( - mutation=mutation, mhc=self.test_mhc_one, uniprot=self.uniprot - ) - self.assertIsNone(best_peptide) - self.assertIsNone(best_rank) - self.assertIsNone(best_allele) + neoantigen = get_neoantigen(mutated_xmer="XTTDSWGKF", wild_type_xmer="XTTDSDGKF") + self.mixmhc2pred.run(neoantigen=neoantigen, mhc=self.test_mhc_one, uniprot=self.uniprot) + best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results) + self.assertIsNone(best_result.mutated_peptide) + self.assertIsNone(best_result.rank_mutated) + self.assertIsNone(best_result.isoform_mhc_i_i.name) def test_mixmhc2pred_allele(self): - mutation = get_mutation(mutated_xmer="TNENLDLQELVEKLEKN", wild_type_xmer="TNENLDLQNLVEKLEKN") + neoantigen = get_neoantigen(mutated_xmer="TNENLDLQELVEKLEKN", wild_type_xmer="TNENLDLQNLVEKLEKN") # this is a MHC II genotype which results in no available alleles for MixMHC2pred MHC_TWO_NEW = MhcFactory.build_mhc2_alleles( [ @@ -178,17 +177,15 @@ def test_mixmhc2pred_allele(self): ) alleles = self.mixmhc2pred.transform_hla_ii_alleles_for_prediction(MHC_TWO_NEW) logger.info(alleles) - best_peptide, best_rank, best_allele = self.mixmhc2pred.run( - mutation=mutation, mhc=MHC_TWO_NEW, uniprot=self.uniprot - ) - logger.info(best_peptide) - self.assertIsNone(best_peptide) - self.assertIsNone(best_rank) - self.assertIsNone(best_allele) + self.mixmhc2pred.run(neoantigen=neoantigen, mhc=MHC_TWO_NEW, uniprot=self.uniprot) + best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results) + self.assertIsNone(best_result.mutated_peptide) + self.assertIsNone(best_result.rank_mutated) + self.assertIsNone(best_result.isoform_mhc_i_i.name) def test_generate_nmers(self): - mutation = get_mutation(mutated_xmer="DDDDDVDDD", wild_type_xmer="DDDDDDDDD") - result = EpitopeHelper.generate_nmers(mutation=mutation, lengths=[8, 9, 10, 11], uniprot=self.uniprot) + neoantigen = get_neoantigen(mutated_xmer="DDDDDVDDD", wild_type_xmer="DDDDDDDDD") + result = EpitopeHelper.generate_nmers(neoantigen=neoantigen, lengths=[8, 9, 10, 11], uniprot=self.uniprot) logger.info(result) self.assertIsNotNone(result) self.assertEqual(3, len(result)) diff --git a/neofox/tests/integration_tests/test_neoag.py b/neofox/tests/integration_tests/test_neoag.py index a190c8c1..35caf85d 100755 --- a/neofox/tests/integration_tests/test_neoag.py +++ b/neofox/tests/integration_tests/test_neoag.py @@ -18,12 +18,11 @@ # along with this program. If not, see .# from unittest import TestCase -from neofox.model.neoantigen import Annotation +from neofox.model.neoantigen import Annotation, PredictedEpitope, MhcAllele from neofox.published_features.neoag.neoag_gbm_model import NeoagCalculator from neofox.helpers.runner import Runner import neofox.tests.integration_tests.integration_test_tools as integration_test_tools -from neofox.MHC_predictors.netmhcpan.abstract_netmhcpan_predictor import PredictedEpitope -from neofox.tests.tools import get_mutation +from neofox.tests.tools import get_neoantigen class TestNeoantigenFitness(TestCase): @@ -34,42 +33,34 @@ def setUp(self): def test_neoag(self): - mutation = get_mutation( - mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", + mutation = get_neoantigen( + mutated_xmer= "DEVLGEPSQDILVTDQTRLEATISPET", wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET" ) result = NeoagCalculator( runner=self.runner, configuration=self.configuration ).get_annotation( - sample_id="12345", - mutated_peptide_mhci=PredictedEpitope( - peptide="DDDDDV", affinity_score=0, pos=0, hla="hla", rank=0 - ), - wt_peptide_mhci=PredictedEpitope( - peptide="DDDDDD", affinity_score=0, pos=0, hla="hla", rank=0 + epitope_mhci=PredictedEpitope( + mutated_peptide="ILVTDQTRL", wild_type_peptide="ILVIDQTRL", + affinity_mutated=0, position=0, allele_mhc_i=MhcAllele(name="hla"), rank_mutated=0 ), - mutation=mutation, - peptide_variant_position="123" + neoantigen=mutation, ) self.assertTrue(isinstance(result, Annotation)) self.assertTrue(float(result.value) > 0) def test_affinity_threshold(self): - mutation = get_mutation( + mutation = get_neoantigen( mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET", ) result = NeoagCalculator( - runner=self.runner, configuration=self.configuration, affinity_threshold=1 + runner=self.runner, configuration=self.configuration ).get_annotation( - sample_id="12345", - mutated_peptide_mhci=PredictedEpitope( - peptide="DDDDDV", affinity_score=10, pos=0, hla="hla", rank=0 - ), - wt_peptide_mhci=PredictedEpitope( - peptide="DDDDDD", affinity_score=0, pos=0, hla="hla", rank=0 + epitope_mhci=PredictedEpitope( + mutated_peptide="DDDDDV", affinity_mutated=10, position=0, allele_mhc_i=MhcAllele(name="hla"), + rank_mutated=0 ), - mutation=mutation, - peptide_variant_position="123" + neoantigen=mutation ) self.assertEqual(result.value, "NA") diff --git a/neofox/tests/integration_tests/test_neoantigen_annotator.py b/neofox/tests/integration_tests/test_neoantigen_annotator.py new file mode 100644 index 00000000..6df925e2 --- /dev/null +++ b/neofox/tests/integration_tests/test_neoantigen_annotator.py @@ -0,0 +1,181 @@ +from unittest import TestCase + +from neofox.annotator.neoantigen_annotator import NeoantigenAnnotator +from neofox.model.factories import MhcFactory, NeoantigenFactory +from neofox.model.neoantigen import PredictedEpitope, MhcAllele, Neoantigen, Mhc2Isoform, Patient +from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import TcellPrediction +from neofox.published_features.self_similarity.self_similarity import SelfSimilarityCalculator +from neofox.tests.integration_tests import integration_test_tools +from neofox.tests.integration_tests.integration_test_tools import get_hla_one_test, get_hla_two_test + + +class NeoantigenAnnotatorTest(TestCase): + + def setUp(self) -> None: + self.references, self.configuration = integration_test_tools.load_references() + self.annotator = NeoantigenAnnotator( + references=self.references, + configuration=self.configuration, + tcell_predictor=TcellPrediction(), + self_similarity=SelfSimilarityCalculator() + ) + self.patient = Patient( + identifier="123", + mhc1=get_hla_one_test(self.references.get_mhc_database()), + mhc2=get_hla_two_test(self.references.get_mhc_database()) + ) + + def test_neoantigen_annotation(self): + neoantigen = NeoantigenFactory.build_neoantigen( + wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET", + mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", + patient_identifier="123", + gene="BRCA2" + ) + annotated_neoantigen = self.annotator.get_annotated_neoantigen(neoantigen=neoantigen, patient=self.patient) + self._assert_neoantigen(annotated_neoantigen, neoantigen) + self._assert_epitopes(annotated_neoantigen=annotated_neoantigen) + + def test_neoantigen_annotation_with_all_epitopes(self): + neoantigen = NeoantigenFactory.build_neoantigen( + wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET", + mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", + patient_identifier="123", + gene="BRCA2" + ) + annotated_neoantigen = self.annotator.get_annotated_neoantigen( + neoantigen=neoantigen, patient=self.patient, with_all_neoepitopes=True) + self._assert_neoantigen(annotated_neoantigen, neoantigen) + self._assert_epitopes(annotated_neoantigen=annotated_neoantigen, with_all_epitopes=True) + + def test_neoantigen_annotation_without_wild_type(self): + neoantigen = NeoantigenFactory.build_neoantigen( + mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", + patient_identifier="123", + gene="BRCA2" + ) + annotated_neoantigen = self.annotator.get_annotated_neoantigen(neoantigen=neoantigen, patient=self.patient) + self._assert_neoantigen(annotated_neoantigen, neoantigen) + # wild type xmer is still empty! + self.assertIsNone(annotated_neoantigen.wild_type_xmer) + self._assert_epitopes(annotated_neoantigen=annotated_neoantigen) + + def test_neoantigen_annotation_without_wild_type_and_with_all_epitopes(self): + neoantigen = NeoantigenFactory.build_neoantigen( + mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", + patient_identifier="123", + gene="BRCA2" + ) + annotated_neoantigen = self.annotator.get_annotated_neoantigen( + neoantigen=neoantigen, patient=self.patient, with_all_neoepitopes=True) + self._assert_neoantigen(annotated_neoantigen, neoantigen) + # wild type xmer is still empty! + self.assertIsNone(annotated_neoantigen.wild_type_xmer) + self._assert_epitopes(annotated_neoantigen, with_all_epitopes=True) + + def test_neoantigen_annotation_with_vaf_and_without_tx_expression(self): + neoantigen = NeoantigenFactory.build_neoantigen( + wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET", + mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET", + patient_identifier="123", + gene="BRCA2", + dna_variant_allele_frequency=0.5 + ) + annotated_neoantigen = self.annotator.get_annotated_neoantigen( + neoantigen=neoantigen, patient=self.patient, with_all_neoepitopes=True) + self._assert_neoantigen(annotated_neoantigen, neoantigen) + + def test_neoepitope_annotation_mhci(self): + epitope = PredictedEpitope( + mutated_peptide="AAAAAADAAAAA", + wild_type_peptide="AAAAAAAAAAAA", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01') + ) + neoantigen = Neoantigen(gene='BRCA2', dna_variant_allele_frequency=0.5, rna_variant_allele_frequency=0.5, + rna_expression=0.5) + annotated_epitope = self.annotator.get_additional_annotations_neoepitope_mhci( + epitope=epitope, + neoantigen=neoantigen + ) + self.assertIsNotNone(annotated_epitope) + self.assertEqual(annotated_epitope.mutated_peptide, epitope.mutated_peptide) + self.assertEqual(annotated_epitope.wild_type_peptide, epitope.wild_type_peptide) + self.assertEqual(annotated_epitope.allele_mhc_i.name, epitope.allele_mhc_i.name) + self.assertGreater(len(annotated_epitope.neofox_annotations.annotations), 0) + + def test_neoepitope_annotation_mhci_without_wild_type(self): + epitope = PredictedEpitope( + mutated_peptide="AAAAAADAAAAA", + #wild_type_peptide="AAAAAAAAAAAA", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01') + ) + neoantigen = Neoantigen(gene='BRCA2', dna_variant_allele_frequency=0.5, rna_variant_allele_frequency=0.5, + rna_expression=0.5) + annotated_epitope = self.annotator.get_additional_annotations_neoepitope_mhci( + epitope=epitope, + neoantigen=neoantigen + ) + self.assertIsNotNone(annotated_epitope) + self.assertEqual(annotated_epitope.mutated_peptide, epitope.mutated_peptide) + self.assertEqual(annotated_epitope.wild_type_peptide, epitope.wild_type_peptide) + self.assertEqual(annotated_epitope.allele_mhc_i.name, epitope.allele_mhc_i.name) + self.assertGreater(len(annotated_epitope.neofox_annotations.annotations), 0) + + def test_neoepitope_annotation_mhcii(self): + epitope = PredictedEpitope( + mutated_peptide="AAAAAADAAAAAAA", + wild_type_peptide="AAAAAAAAAAAAAA", + isoform_mhc_i_i=self._get_test_mhcii_isoform('HLA-DRB1*04:02') + ) + annotated_epitope = self.annotator.get_additional_annotations_neoepitope_mhcii(epitope=epitope) + self.assertIsNotNone(annotated_epitope) + self.assertEqual(annotated_epitope.mutated_peptide, epitope.mutated_peptide) + self.assertEqual(annotated_epitope.wild_type_peptide, epitope.wild_type_peptide) + self.assertEqual(annotated_epitope.isoform_mhc_i_i.name, epitope.isoform_mhc_i_i.name) + self.assertGreater(len(annotated_epitope.neofox_annotations.annotations), 0) + + def _get_test_mhci_allele(self, allele) -> MhcAllele: + mhci = MhcFactory.build_mhc1_alleles([allele], mhc_database=self.references.get_mhc_database()) + return mhci[0].alleles[0] + + def _get_test_mhcii_isoform(self, isoform) -> Mhc2Isoform: + mhcii = MhcFactory.build_mhc2_alleles([isoform], mhc_database=self.references.get_mhc_database()) + return mhcii[0].isoforms[0] + + def _assert_neoantigen(self, annotated_neoantigen: Neoantigen, neoantigen: Neoantigen): + self.assertIsNotNone(annotated_neoantigen) + self.assertEqual(annotated_neoantigen.mutated_xmer, neoantigen.mutated_xmer) + self.assertEqual(annotated_neoantigen.wild_type_xmer, neoantigen.wild_type_xmer) + self.assertEqual(annotated_neoantigen.position, neoantigen.position) + self.assertGreater(len(annotated_neoantigen.neofox_annotations.annotations), 0) + annotation_names = [a.name for a in annotated_neoantigen.neofox_annotations.annotations] + self.assertTrue("NetMHCpan_bestRank_peptide" in annotation_names) + self.assertTrue("NetMHCpan_bestRank_allele" in annotation_names) + + def _assert_epitopes(self, annotated_neoantigen, with_all_epitopes=False): + # neoepitopes for both MHC I and MHC II are not empty + self.assertGreater(len(annotated_neoantigen.neoepitopes_mhc_i), 0) + self.assertGreater(len(annotated_neoantigen.neoepitopes_mhc_i_i), 0) + observed_mixmhcpred_annotations = False + for e in annotated_neoantigen.neoepitopes_mhc_i + annotated_neoantigen.neoepitopes_mhc_i_i: + # WT peptides are not empty! + self.assertIsNotNone(e.wild_type_peptide) + self.assertIsNotNone(e.mutated_peptide) + annotation_names = [a.name for a in e.neofox_annotations.annotations] + observed_mixmhcpred_annotations = \ + observed_mixmhcpred_annotations or \ + "MixMHCpred_score" in annotation_names or "MixMHC2pred_score" in annotation_names + if with_all_epitopes: + # they do have extra annotations + self.assertTrue( + "dissimilarity_score" in annotation_names, msg="Annotations: {}".format(annotation_names)) + self.assertTrue( + "IEDB_Immunogenicity" in annotation_names, msg="Annotations: {}".format(annotation_names)) + else: + # they do not have extra annotations + self.assertFalse( + "dissimilarity_score" in annotation_names, msg="Annotations: {}".format(annotation_names)) + self.assertFalse( + "IEDB_Immunogenicity" in annotation_names, msg="Annotations: {}".format(annotation_names)) + # not all epitopes may have results for MixMHCpred + self.assertTrue(observed_mixmhcpred_annotations) diff --git a/neofox/tests/integration_tests/test_neoantigen_fitness.py b/neofox/tests/integration_tests/test_neoantigen_fitness.py index fac5ab18..24d6decb 100755 --- a/neofox/tests/integration_tests/test_neoantigen_fitness.py +++ b/neofox/tests/integration_tests/test_neoantigen_fitness.py @@ -80,23 +80,12 @@ def test_recognition_potential(self): self.assertEqual( 1.0, self.neoantigen_fitness_calculator.calculate_recognition_potential( - amplitude=1.0, pathogen_similarity=1.0, mutation_in_anchor=False + amplitude=1.0, pathogen_similarity=1.0 ), ) self.assertEqual( None, self.neoantigen_fitness_calculator.calculate_recognition_potential( - amplitude=None, pathogen_similarity=1.0, mutation_in_anchor=False - ), - ) - - def test_affinity_threshold(self): - # tests a pathogen sequence and expects 1.0 similarity - neoantigen_fitness_calculator = NeoantigenFitnessCalculator( - iedb_blastp_runner=self.iedb_blastp_runner, affinity_threshold=1 - ) - self.assertIsNone( - neoantigen_fitness_calculator.calculate_recognition_potential( - amplitude=1.0, pathogen_similarity=1.0, mhc_affinity_mut=10, mutation_in_anchor=False + amplitude=None, pathogen_similarity=1.0 ), ) diff --git a/neofox/tests/integration_tests/test_neoepitope_annotator.py b/neofox/tests/integration_tests/test_neoepitope_annotator.py new file mode 100644 index 00000000..f5954344 --- /dev/null +++ b/neofox/tests/integration_tests/test_neoepitope_annotator.py @@ -0,0 +1,161 @@ +from neofox.annotator.neoepitope_annotator import NeoepitopeAnnotator +from neofox.helpers.epitope_helper import EpitopeHelper +from neofox.model.factories import MhcFactory +from neofox.model.neoantigen import PredictedEpitope, MhcAllele, Mhc2Isoform, Patient +from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import TcellPrediction +from neofox.published_features.self_similarity.self_similarity import SelfSimilarityCalculator +from neofox.tests.integration_tests.integration_test_tools import get_hla_one_test, get_hla_two_test, \ + BaseIntegrationTest + + +class NeoepitopeAnnotatorTest(BaseIntegrationTest): + + def setUp(self) -> None: + super().setUp() + self.annotator = NeoepitopeAnnotator( + references=self.references, + configuration=self.configuration, + tcell_predictor=TcellPrediction(), + self_similarity=SelfSimilarityCalculator() + ) + self.patient = Patient( + identifier="123", + mhc1=get_hla_one_test(self.references.get_mhc_database()), + mhc2=get_hla_two_test(self.references.get_mhc_database()) + ) + + def test_neoepitope_mhci_annotation(self): + + neoepitope = PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01') + ) + + annotated_neoepitope = self.annotator.get_annotated_neoepitope(neoepitope=neoepitope) + self.assert_neoepitope_mhci(original_neoepitope=neoepitope, annotated_neoepitope=annotated_neoepitope) + + def test_neoepitope_mhci_without_wild_type(self): + + neoepitope = PredictedEpitope( + mutated_peptide="DILVTDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01') + ) + + annotated_neoepitope = self.annotator.get_annotated_neoepitope(neoepitope=neoepitope) + self.assert_neoepitope_mhci(original_neoepitope=neoepitope, annotated_neoepitope=annotated_neoepitope) + + def test_neoepitope_mhci_9mer_with_frequencies_and_gene(self): + """ + this checks fields that are only annotated when expression, vaf and/or gene are provided + """ + neoepitope = PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + rna_variant_allele_frequency=0.5, + dna_variant_allele_frequency=1.0, + rna_expression=125, + gene='BRCA2' + ) + + annotated_neoepitope = self.annotator.get_annotated_neoepitope(neoepitope=neoepitope) + self.assert_neoepitope_mhci(original_neoepitope=neoepitope, annotated_neoepitope=annotated_neoepitope) + self.assert_float_annotation(annotated_neoepitope, annotation_name="Priority_score") + self.assert_float_annotation(annotated_neoepitope, annotation_name="Tcell_predictor") + + def test_neoepitope_mhci_10mer_no_tcell_predictor(self): + """ + this checks fields that are only annotated when expression, vaf and/or gene are provided + """ + neoepitope = PredictedEpitope( + mutated_peptide="DILVTDQTRA", + wild_type_peptide="DILVIDQTRA", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + gene='BRCA2' + ) + + annotated_neoepitope = self.annotator.get_annotated_neoepitope(neoepitope=neoepitope) + self.assert_neoepitope_mhci(original_neoepitope=neoepitope, annotated_neoepitope=annotated_neoepitope) + annotation_value = EpitopeHelper.get_annotation_by_name( + annotated_neoepitope.neofox_annotations.annotations, "Tcell_predictor") + self.assertEqual(annotation_value, "NA") + + def test_neoepitope_mhci_without_dna_vaf(self): + """ + this checks fields that are only annotated when expression, vaf and/or gene are provided + """ + neoepitope_with_dna_vaf = PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + dna_variant_allele_frequency=1.0, + rna_variant_allele_frequency=0.1, + rna_expression=125, + gene='BRCA2' + ) + neoepitope_without_dna_vaf = PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + rna_variant_allele_frequency=0.1, + rna_expression=125, + gene='BRCA2' + ) + + annotated_neoepitope1 = self.annotator.get_annotated_neoepitope(neoepitope=neoepitope_with_dna_vaf) + self.assert_neoepitope_mhci(original_neoepitope=neoepitope_with_dna_vaf, + annotated_neoepitope=annotated_neoepitope1) + + annotated_neoepitope2 = self.annotator.get_annotated_neoepitope(neoepitope=neoepitope_without_dna_vaf) + self.assert_neoepitope_mhci(original_neoepitope=neoepitope_without_dna_vaf, + annotated_neoepitope=annotated_neoepitope2) + + self.assertNotEqual( + EpitopeHelper.get_annotation_by_name( + annotated_neoepitope1.neofox_annotations.annotations, "Priority_score"), + EpitopeHelper.get_annotation_by_name( + annotated_neoepitope2.neofox_annotations.annotations, "Priority_score") + ) + + def test_neoepitope_mhci_without_vaf(self): + """ + this checks fields that are only annotated when expression, vaf and/or gene are provided + """ + neoepitope_without_vaf = PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + dna_variant_allele_frequency=None, + rna_variant_allele_frequency=None, + gene='BRCA2' + ) + + annotated_neoepitope = self.annotator.get_annotated_neoepitope(neoepitope=neoepitope_without_vaf) + self.assert_neoepitope_mhci(original_neoepitope=neoepitope_without_vaf, + annotated_neoepitope=annotated_neoepitope) + + self.assertEqual( + EpitopeHelper.get_annotation_by_name( + annotated_neoepitope.neofox_annotations.annotations, "Priority_score"), "NA") + + def test_neoepitope_mhcii_annotation(self): + + neoepitope = PredictedEpitope( + mutated_peptide="DEVLGEPSQDILVTDQTR", + wild_type_peptide="DEVLGEPSQDILVIDQTR", + isoform_mhc_i_i=self._get_test_mhcii_isoform("HLA-DRB1*01:01") + ) + + annotated_neoepitope = self.annotator.get_annotated_neoepitope(neoepitope=neoepitope) + self.assert_neoepitope_mhcii(original_neoepitope=neoepitope, annotated_neoepitope=annotated_neoepitope) + + def test_neoepitope_mhcii_without_wild_type(self): + + neoepitope = PredictedEpitope( + mutated_peptide="DEVLGEPSQDILVTDQTR", + isoform_mhc_i_i=self._get_test_mhcii_isoform("HLA-DRB1*01:01") + ) + + annotated_neoepitope = self.annotator.get_annotated_neoepitope(neoepitope=neoepitope) + self.assert_neoepitope_mhcii(original_neoepitope=neoepitope, annotated_neoepitope=annotated_neoepitope) diff --git a/neofox/tests/integration_tests/test_neoepitope_binding_annotator.py b/neofox/tests/integration_tests/test_neoepitope_binding_annotator.py new file mode 100644 index 00000000..cef32adf --- /dev/null +++ b/neofox/tests/integration_tests/test_neoepitope_binding_annotator.py @@ -0,0 +1,88 @@ +from unittest import TestCase + +from neofox.annotation_resources.uniprot.uniprot import Uniprot +from neofox.annotator.neoepitope_mhc_binding_annotator import NeoepitopeMhcBindingAnnotator +from neofox.helpers.blastp_runner import BlastpRunner +from neofox.helpers.epitope_helper import EpitopeHelper +from neofox.helpers.runner import Runner +from neofox.model.factories import MhcFactory +from neofox.model.neoantigen import PredictedEpitope +from neofox.tests.integration_tests import integration_test_tools + + +class NeoepitopeMhcBindingAnnotatorTest(TestCase): + + def setUp(self) -> None: + self.references, self.configuration = integration_test_tools.load_references() + proteome_blastp_runner = BlastpRunner( + runner=Runner(), configuration=self.configuration, + database=self.references.get_proteome_database()) + self.annotator = NeoepitopeMhcBindingAnnotator( + references=self.references, + configuration=self.configuration, + proteome_blastp_runner=proteome_blastp_runner, + uniprot=Uniprot(self.references.uniprot_pickle)) + + def test_neoepitope_mhc1(self): + + allele = MhcFactory.build_mhc1_alleles(["HLA-A*03:01"], self.references.get_mhc_database())[0].alleles[0] + neoepitope = PredictedEpitope( + mutated_peptide="PSQDILVID", + wild_type_peptide="PSQDILVTD", + allele_mhc_i=allele + ) + annotated_neoepitope = self.annotator.get_mhc_binding_annotations(neoepitope=neoepitope) + + self.assertIsNotNone(annotated_neoepitope) + # input data remains the same + self.assertEqual(annotated_neoepitope.mutated_peptide, neoepitope.mutated_peptide) + self.assertEqual(annotated_neoepitope.wild_type_peptide, neoepitope.wild_type_peptide) + self.assertEqual(annotated_neoepitope.allele_mhc_i.name, neoepitope.allele_mhc_i.name) + + # netMHCpan annotations + self.assertIsInstance(annotated_neoepitope.rank_mutated, float) + self.assertIsInstance(annotated_neoepitope.rank_wild_type, float) + self.assertIsInstance(annotated_neoepitope.affinity_mutated, float) + self.assertIsInstance(annotated_neoepitope.affinity_wild_type, float) + + # MixMHCpred annotations + self._assert_float_annotation(annotated_neoepitope, annotation_name="MixMHCpred_score") + self._assert_float_annotation(annotated_neoepitope, annotation_name="MixMHCpred_rank") + self._assert_float_annotation(annotated_neoepitope, annotation_name="MixMHCpred_WT_score") + self._assert_float_annotation(annotated_neoepitope, annotation_name="MixMHCpred_WT_rank") + + # PRIME annotations + self._assert_float_annotation(annotated_neoepitope, annotation_name="PRIME_score") + self._assert_float_annotation(annotated_neoepitope, annotation_name="PRIME_rank") + self._assert_float_annotation(annotated_neoepitope, annotation_name="PRIME_WT_score") + self._assert_float_annotation(annotated_neoepitope, annotation_name="PRIME_WT_rank") + + def _assert_float_annotation(self, annotated_neoepitope, annotation_name): + mixmhcpred_affinity = EpitopeHelper.get_annotation_by_name( + annotated_neoepitope.neofox_annotations.annotations, annotation_name) + self.assertIsInstance(mixmhcpred_affinity, str) + self.assertIsInstance(float(mixmhcpred_affinity), float) + + def test_neoepitope_mhc2(self): + + isoform = MhcFactory.build_mhc2_alleles(["HLA-DRB1*01:01"], self.references.get_mhc_database())[0].isoforms[0] + neoepitope = PredictedEpitope( + mutated_peptide="DEVLGEPSQDILVTDQTR", + wild_type_peptide="DEVLGEPSQDILVIDQTR", + isoform_mhc_i_i=isoform + ) + annotated_neoepitope = self.annotator.get_mhc_binding_annotations(neoepitope=neoepitope) + + self.assertIsNotNone(annotated_neoepitope) + # input data remains the same + self.assertEqual(annotated_neoepitope.mutated_peptide, neoepitope.mutated_peptide) + self.assertEqual(annotated_neoepitope.wild_type_peptide, neoepitope.wild_type_peptide) + self.assertEqual(annotated_neoepitope.isoform_mhc_i_i.name, neoepitope.isoform_mhc_i_i.name) + # netMHC2pan annotations + self.assertIsInstance(annotated_neoepitope.rank_mutated, float) + self.assertIsInstance(annotated_neoepitope.rank_wild_type, float) + self.assertIsInstance(annotated_neoepitope.affinity_mutated, float) + self.assertIsInstance(annotated_neoepitope.affinity_wild_type, float) + # MixMHC2pred annotations + self._assert_float_annotation(annotated_neoepitope, annotation_name="MixMHC2pred_rank") + self._assert_float_annotation(annotated_neoepitope, annotation_name="MixMHC2pred_WT_rank") diff --git a/neofox/tests/integration_tests/test_neofox.py b/neofox/tests/integration_tests/test_neofox.py index 369df6c7..585f91d3 100755 --- a/neofox/tests/integration_tests/test_neofox.py +++ b/neofox/tests/integration_tests/test_neofox.py @@ -26,8 +26,9 @@ from neofox import NEOFOX_MIXMHCPRED_ENV, NEOFOX_MIXMHC2PRED_ENV, NEOFOX_PRIME_ENV import neofox.tests +from neofox.helpers.epitope_helper import EpitopeHelper from neofox.model.conversion import ModelConverter -from neofox.model.neoantigen import Neoantigen, Mutation, Patient +from neofox.model.neoantigen import Neoantigen, Patient from neofox.model.factories import NOT_AVAILABLE_VALUE, PatientFactory, MhcFactory from neofox.neofox import NeoFox from neofox.references.references import ORGANISM_MUS_MUSCULUS @@ -51,7 +52,7 @@ def setUp(self): # self.runner = Runner() self.patient_id = "Pt29" input_file = pkg_resources.resource_filename( - neofox.tests.__name__, "resources/test_candidate_file.txt" + neofox.tests.__name__, "resources/test_data_model_realistic.txt" ) patients_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/test_patient_file.txt" @@ -68,14 +69,13 @@ def setUp(self): def test_neoantigens_without_gene(self): """""" - neoantigens, patients, patient_id = self._get_test_data() + neoantigens, patients = self._get_test_data() for n in neoantigens: n.gene = "" annotations = NeoFox( neoantigens=neoantigens, - patient_id=patient_id, patients=patients, - num_cpus=1, + num_cpus=4, ).get_annotations() self.assertEqual(5, len(annotations)) self.assertIsInstance(annotations[0], Neoantigen) @@ -83,7 +83,7 @@ def test_neoantigens_without_gene(self): def _get_test_data(self): input_file = pkg_resources.resource_filename( - neofox.tests.__name__, "resources/test_model_file.txt" + neofox.tests.__name__, "resources/test_data_model.txt" ) data = pd.read_csv(input_file, sep="\t") data = data.replace({np.nan: None}) @@ -92,14 +92,11 @@ def _get_test_data(self): neofox.tests.__name__, "resources/test_patient_file.txt" ) patients = ModelConverter.parse_patients_file(patients_file, self.hla_database) - patient_id = "Pt29" - return neoantigens, patients, patient_id + return neoantigens, patients def test_neofox(self): """ - This test is equivalent to the command line call: - neofox --candidate-file /projects/SUMMIT/WP1.2/neofox/development/Pt29.sequences4testing.txt --patient-id Pt29 - --patients-data ../resources/patient.pt29.csv + This test is equivalent to the command line call of neofox NOTE: we will need to check the output when the calculation of resuls and printing to stdout have been decoupled """ @@ -109,10 +106,6 @@ def test_neofox(self): datetime.now() ), ) - output_file_neoantigens = pkg_resources.resource_filename( - neofox.tests.__name__, - "resources/output_{:%Y%m%d%H%M%S}.neoantigens.tsv".format(datetime.now()), - ) output_json_neoantigens = pkg_resources.resource_filename( neofox.tests.__name__, "resources/output_{:%Y%m%d%H%M%S}.neoantigen_candidates.json".format( @@ -121,28 +114,27 @@ def test_neofox(self): ) annotations = NeoFox( neoantigens=self.neoantigens, - patient_id=self.patient_id, patients=self.patients, num_cpus=4, ).get_annotations() annotation_names = [a.name for n in annotations for a in n.neofox_annotations.annotations] # check it does contain any of the MixMHCpred annotations - self.assertIn("MixMHC2pred_best_peptide", annotation_names) - self.assertIn("MixMHC2pred_best_rank", annotation_names) - self.assertIn("MixMHC2pred_best_allele", annotation_names) - self.assertIn("MixMHCpred_best_peptide", annotation_names) - self.assertIn("MixMHCpred_best_score", annotation_names) - self.assertIn("MixMHCpred_best_rank", annotation_names) - self.assertIn("MixMHCpred_best_allele", annotation_names) + self.assertIn("MixMHC2pred_bestRank_peptide", annotation_names) + self.assertIn("MixMHC2pred_bestRank_rank", annotation_names) + self.assertIn("MixMHC2pred_bestRank_allele", annotation_names) + self.assertIn("MixMHCpred_bestScore_peptide", annotation_names) + self.assertIn("MixMHCpred_bestScore_score", annotation_names) + self.assertIn("MixMHCpred_bestScore_rank", annotation_names) + self.assertIn("MixMHCpred_bestScore_allele", annotation_names) # checks it does have some of the NetMHCpan annotations - self.assertIn("Best_affinity_MHCI_9mer_position_mutation", annotation_names) - self.assertIn("Best_rank_MHCII_score", annotation_names) + self.assertIn("NetMHCpan_bestAffinity9mer_positionMutation", annotation_names) + self.assertIn("NetMHCIIpan_bestRank_rank", annotation_names) # writes output - ModelConverter.annotations2table(neoantigens=annotations).to_csv( + ModelConverter.annotations2neoantigens_table(neoantigens=annotations).to_csv( output_file, sep="\t", index=False) - ModelConverter._objects2dataframe(annotations).to_csv(output_file_neoantigens, sep="\t", index=False) + with open(output_json_neoantigens, "wb") as f: f.write(json.dumps(ModelConverter.objects2json(annotations))) @@ -156,10 +148,7 @@ def test_neomouse(self): datetime.now() ), ) - output_file_neoantigens = pkg_resources.resource_filename( - neofox.tests.__name__, - "resources/output_mouse_{:%Y%m%d%H%M%S}.neoantigens.tsv".format(datetime.now()), - ) + output_json_neoantigens = pkg_resources.resource_filename( neofox.tests.__name__, "resources/output_mouse_{:%Y%m%d%H%M%S}.neoantigen_candidates.json".format( @@ -168,7 +157,6 @@ def test_neomouse(self): ) annotations = NeoFox( neoantigens=self.neoantigens_mouse, - patient_id=self.patient_id, patients=self.patients_mouse, num_cpus=4, reference_folder=self.references_mouse @@ -176,13 +164,13 @@ def test_neomouse(self): annotation_names = [a.name for n in annotations for a in n.neofox_annotations.annotations] # checks it does have some of the NetMHCpan annotations - self.assertIn("Best_affinity_MHCI_9mer_position_mutation", annotation_names) - self.assertIn("Best_rank_MHCII_score", annotation_names) + self.assertIn("NetMHCpan_bestAffinity9mer_positionMutation", annotation_names) + self.assertIn("NetMHCIIpan_bestRank_rank", annotation_names) # writes output - ModelConverter.annotations2table(neoantigens=annotations).to_csv( + ModelConverter.annotations2neoantigens_table(neoantigens=annotations).to_csv( output_file, sep="\t", index=False) - ModelConverter._objects2dataframe(annotations).to_csv(output_file_neoantigens, sep="\t", index=False) + with open(output_json_neoantigens, "wb") as f: f.write(json.dumps(ModelConverter.objects2json(annotations))) @@ -192,14 +180,13 @@ def test_neomouse(self): def test_neofox_only_one_neoantigen(self): """""" input_file = pkg_resources.resource_filename( - neofox.tests.__name__, "resources/test_data_only_one.txt" + neofox.tests.__name__, "resources/test_data_model_only_one.txt" ) neoantigens = ModelConverter.parse_candidate_file( input_file ) annotations = NeoFox( neoantigens=neoantigens, - patient_id=self.patient_id, patients=self.patients, num_cpus=4, ).get_annotations() @@ -209,16 +196,15 @@ def test_neofox_only_one_neoantigen(self): def test_neofox_model_input(self): """""" - neoantigens, patients, patient_id = self._get_test_data() + neoantigens, patients = self._get_test_data() annotations = NeoFox( neoantigens=neoantigens, - patient_id=patient_id, patients=patients, - num_cpus=2, + num_cpus=4, ).get_annotations() self.assertEqual(5, len(annotations)) self.assertIsInstance(annotations[0], Neoantigen) - self.assertTrue(len(annotations[0].neofox_annotations.annotations) == 86) + self.assertTrue(len(annotations[0].neofox_annotations.annotations) == 82) def test_neofox_without_mixmhcpreds(self): """ @@ -229,29 +215,67 @@ def test_neofox_without_mixmhcpreds(self): del os.environ[NEOFOX_MIXMHC2PRED_ENV] annotations = NeoFox( neoantigens=self.neoantigens, - patient_id=self.patient_id, patients=self.patients, - num_cpus=1, + num_cpus=4, + ).get_annotations() + annotation_names = [a.name for n in annotations for a in n.neofox_annotations.annotations] + # check it does not contain any of the MixMHCpred annotations + self.assertNotIn("MixMHC2pred_bestRank_peptide", annotation_names) + self.assertNotIn("MixMHC2pred_bestRank_rank", annotation_names) + self.assertNotIn("MixMHC2pred_bestRank_allele", annotation_names) + self.assertNotIn("MixMHCpred_bestScore_peptide", annotation_names) + self.assertNotIn("MixMHCpred_bestScore_score", annotation_names) + self.assertNotIn("MixMHCpred_bestScore_rank", annotation_names) + self.assertNotIn("MixMHCpred_bestScore_allele", annotation_names) + # checks it does have some of the NetMHCpan annotations + self.assertIn("NetMHCpan_bestAffinity9mer_positionMutation", annotation_names) + self.assertIn("NetMHCIIpan_bestRank_rank", annotation_names) + + def test_neofox_without_prime(self): + """ + This test aims at testing neofox when Prime is not configured. As these are optional it + shoudl just run, but without these annotations in the output + """ + del os.environ[NEOFOX_PRIME_ENV] + annotations = NeoFox( + neoantigens=self.neoantigens[0:1], + patients=self.patients, + num_cpus=4, ).get_annotations() annotation_names = [a.name for n in annotations for a in n.neofox_annotations.annotations] # check it does not contain any of the MixMHCpred annotations - self.assertNotIn("MixMHC2pred_best_peptide", annotation_names) - self.assertNotIn("MixMHC2pred_best_rank", annotation_names) - self.assertNotIn("MixMHC2pred_best_allele", annotation_names) - self.assertNotIn("MixMHCpred_best_peptide", annotation_names) - self.assertNotIn("MixMHCpred_best_score", annotation_names) - self.assertNotIn("MixMHCpred_best_rank", annotation_names) - self.assertNotIn("MixMHCpred_best_allele", annotation_names) + self.assertNotIn("PRIME_best_peptide", annotation_names) + self.assertNotIn("PRIME_best_rank", annotation_names) + self.assertNotIn("PRIME_best_allele", annotation_names) # checks it does have some of the NetMHCpan annotations - self.assertIn("Best_affinity_MHCI_9mer_position_mutation", annotation_names) - self.assertIn("Best_rank_MHCII_score", annotation_names) + self.assertIn("NetMHCpan_bestAffinity9mer_positionMutation", annotation_names) + self.assertIn("NetMHCIIpan_bestRank_rank", annotation_names) + + def test_neofox_with_prime_and_without_mixmhcpred(self): + """ + This test aims at testing neofox when Prime is configured, but not MixMHCpred. As PRIME depends on + MixMHCpred no PRIME annotations should be provided + """ + del os.environ[NEOFOX_MIXMHCPRED_ENV] + annotations = NeoFox( + neoantigens=self.neoantigens[0:1], + patients=self.patients, + num_cpus=4, + ).get_annotations() + annotation_names = [a.name for n in annotations for a in n.neofox_annotations.annotations] + # check it does not contain any of the MixMHCpred annotations + self.assertNotIn("PRIME_best_peptide", annotation_names) + self.assertNotIn("PRIME_best_rank", annotation_names) + self.assertNotIn("PRIME_best_allele", annotation_names) + # checks it does have some of the NetMHCpan annotations + self.assertIn("NetMHCpan_bestAffinity9mer_positionMutation", annotation_names) + self.assertIn("NetMHCIIpan_bestRank_rank", annotation_names) @unittest.skip def test_neofox_performance(self): def compute_annotations(): return NeoFox( neoantigens=self.neoantigens, - patient_id=self.patient_id, patients=self.patients, num_cpus=4, ).get_annotations() @@ -269,7 +293,6 @@ def test_neofox_performance_single_neoantigen(self): def compute_annotations(): return NeoFox( neoantigens=neoantigens, - patient_id=self.patient_id, patients=self.patients, num_cpus=4, ).get_annotations() @@ -277,16 +300,15 @@ def compute_annotations(): print("Average time: {}".format(timeit.timeit(compute_annotations, number=10))) def test_neofox_with_config(self): - neoantigens, patients, patient_id = self._get_test_data() + neoantigens, patients = self._get_test_data() config_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/neofox_config.txt" ) try: NeoFox( neoantigens=neoantigens, - patient_id=patient_id, patients=patients, - num_cpus=1, + num_cpus=4, configuration_file=config_file, ) except NeofoxConfigurationException as e: @@ -296,40 +318,39 @@ def test_neofox_with_config(self): def test_neofox_without_mhc2(self): """""" - neoantigens, patients, patient_id = self._get_test_data() + neoantigens, patients = self._get_test_data() for p in patients: p.mhc2 = [] annotations = NeoFox( neoantigens=neoantigens, - patient_id=self.patient_id, patients=patients, - num_cpus=1, + num_cpus=4, ).get_annotations() self.assertEqual(5, len(annotations)) self.assertIsInstance(annotations[0], Neoantigen) - self.assertTrue(len(annotations[0].neofox_annotations.annotations) == 65) + self.assertEqual(len(annotations[0].neofox_annotations.annotations), 63) def test_neofox_without_mhc1(self): - neoantigens, patients, patient_id = self._get_test_data() + neoantigens, patients = self._get_test_data() for p in patients: p.mhc1 = [] annotations = NeoFox( neoantigens=neoantigens, - patient_id=patient_id, patients=patients, - num_cpus=1, + num_cpus=4, ).get_annotations() self.assertEqual(5, len(annotations)) self.assertIsInstance(annotations[0], Neoantigen) - self.assertTrue(len(annotations[0].neofox_annotations.annotations) == 39) + self.assertEqual(len(annotations[0].neofox_annotations.annotations), 39) def test_gene_expression_imputation(self): - neoantigens, patients, patient_id = self._get_test_data() + neoantigens, patients = self._get_test_data() + for p in patients: + p.is_rna_available = False neofox = NeoFox( neoantigens=neoantigens, - patient_id=patient_id, patients=patients, - num_cpus=1, + num_cpus=4, ) for n in neofox.neoantigens: self.assertIsNotNone(n.imputed_gene_expression) @@ -337,88 +358,83 @@ def test_gene_expression_imputation(self): def test_neoantigens_with_non_existing_gene(self): """""" - neoantigens, patients, patient_id = self._get_test_data() + neoantigens, patients = self._get_test_data() for n in neoantigens: n.gene = "IDONTEXIST" neofox = NeoFox( neoantigens=neoantigens, - patient_id=patient_id, patients=patients, - num_cpus=1, + num_cpus=4, ) for n in neofox.neoantigens: self.assertIsNone(n.imputed_gene_expression) def test_neoantigens_with_empty_gene(self): """""" - neoantigens, patients, patient_id = self._get_test_data() + neoantigens, patients = self._get_test_data() for n in neoantigens: n.gene = "" neofox = NeoFox( neoantigens=neoantigens, - patient_id=patient_id, patients=patients, - num_cpus=1, + num_cpus=4, ) for n in neofox.neoantigens: self.assertIsNone(n.imputed_gene_expression) def test_neoantigens_with_empty_rna_expression(self): """""" - neoantigens, patients, patient_id = self._get_test_data() + neoantigens, patients = self._get_test_data() for n in neoantigens: n.rna_expression = None + n.patient_identifier = "patient_without_tumor_type" neofox = NeoFox( neoantigens=neoantigens, - patient_id=patient_id, patients=patients, - num_cpus=1, + num_cpus=4, ) - for p in neofox.patients.values(): - if p.identifier == patient_id: - self.assertFalse(p.is_rna_available) + for n in neofox.neoantigens: + self.assertIsNone(n.rna_expression) def test_neoantigens_with_rna_expression(self): """""" - neoantigens, patients, patient_id = self._get_test_data() + neoantigens, patients = self._get_test_data() for n in neoantigens: n.rna_expression = 1.2 neofox = NeoFox( neoantigens=neoantigens, - patient_id=patient_id, patients=patients, - num_cpus=1, + num_cpus=4, ) - for p in neofox.patients.values(): - if p.identifier == patient_id: - self.assertTrue(p.is_rna_available) + + for n in neofox.neoantigens: + self.assertEqual(n.rna_expression, 1.2) + def test_patient_with_non_existing_allele_does_not_crash(self): """""" - neoantigens, patients, patient_id = self._get_test_data() + neoantigens, patients = self._get_test_data() for p in patients: # sets one MHC I allele to a non existing allele p.mhc1[0].alleles[0] = MhcFactory.build_mhc1_alleles(["HLA-A*99:99"], mhc_database=self.hla_database)[0].alleles[0] neofox = NeoFox( neoantigens=neoantigens, - patient_id=patient_id, patients=patients, - num_cpus=1, + num_cpus=4, ) neofox.get_annotations() def test_neoantigens_with_rare_aminoacids(self): """""" - neoantigens, patients, patient_id = self._get_test_data() + neoantigens, patients = self._get_test_data() for n in neoantigens: - position_to_replace = int(len(n.mutation.mutated_xmer)/2) - n.mutation.mutated_xmer = n.mutation.mutated_xmer[:position_to_replace] + "U" + \ - n.mutation.mutated_xmer[position_to_replace+1:] + position_to_replace = int(len(n.mutated_xmer)/2) + n.mutated_xmer = n.mutated_xmer[:position_to_replace] + "U" + \ + n.mutated_xmer[position_to_replace+1:] annotations = NeoFox( neoantigens=neoantigens, - patient_id=patient_id, patients=patients, - num_cpus=1, + num_cpus=4, ).get_annotations() self.assertEqual(5, len(annotations)) self.assertIsInstance(annotations[0], Neoantigen) @@ -431,10 +447,8 @@ def test_neoantigens_with_rare_aminoacids(self): def test_neoantigen_without_9mer_netmhcpan_results(self): patient_identifier = "12345" neoantigen = Neoantigen( - mutation=Mutation( - wild_type_xmer="HLAQHQRVHTGEKPYKCNECGKTFRQT", - mutated_xmer="HLAQHQRVHTGEKAYKCNECGKTFRQT" - ), + wild_type_xmer="HLAQHQRVHTGEKPYKCNECGKTFRQT", + mutated_xmer="HLAQHQRVHTGEKAYKCNECGKTFRQT", patient_identifier=patient_identifier ) patient = PatientFactory.build_patient( @@ -448,7 +462,7 @@ def test_neoantigen_without_9mer_netmhcpan_results(self): annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], - num_cpus=1, + num_cpus=4, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations) @@ -456,9 +470,7 @@ def test_neoantigen_without_9mer_netmhcpan_results(self): def test_neoantigen_in_proteome(self): patient_identifier = "12345" neoantigen = Neoantigen( - mutation=Mutation( - mutated_xmer="PKLLENLLSKGETISFLECF" - ), + mutated_xmer="PKLLENLLSKGETISFLECF", patient_identifier=patient_identifier ) patient = PatientFactory.build_patient( @@ -472,7 +484,7 @@ def test_neoantigen_in_proteome(self): annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], - num_cpus=1, + num_cpus=4, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations) @@ -480,10 +492,8 @@ def test_neoantigen_in_proteome(self): def test_neoantigen_failing(self): patient_identifier = "12345" neoantigen = Neoantigen( - mutation=Mutation( - wild_type_xmer="ARPDMFCLFHGKRYFPGESWHPYLEPQ", - mutated_xmer="ARPDMFCLFHGKRHFPGESWHPYLEPQ" - ), + wild_type_xmer="ARPDMFCLFHGKRYFPGESWHPYLEPQ", + mutated_xmer="ARPDMFCLFHGKRHFPGESWHPYLEPQ", patient_identifier=patient_identifier ) patient = Patient( @@ -496,7 +506,7 @@ def test_neoantigen_failing(self): annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], - num_cpus=1, + num_cpus=4, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations) @@ -504,9 +514,7 @@ def test_neoantigen_failing(self): def test_neoantigen_no_wt_failing(self): patient_identifier = "12345" neoantigen = Neoantigen( - mutation=Mutation( - mutated_xmer="SPSFPLEPDDEVFTAIAKAMEEMVEDS" - ), + mutated_xmer="SPSFPLEPDDEVFTAIAKAMEEMVEDS", patient_identifier=patient_identifier ) patient = Patient( @@ -519,7 +527,7 @@ def test_neoantigen_no_wt_failing(self): annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], - num_cpus=1, + num_cpus=4, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations) @@ -563,6 +571,59 @@ def test_neofox_synthetic_data(self): ).get_annotations() self.assertIsNotNone(annotations) + def test_with_all_neoepitopes(self): + """ + This test aims at testing neofox when MixMHCpred and MixMHC2pred are not configured. As these are optional it + shoudl just run, but without these annotations in the output + """ + annotations = NeoFox( + neoantigens=self.neoantigens[2:3], # only one as this is quite slow, the first one is an indel! + patients=self.patients, + num_cpus=4, + with_all_neoepitopes=True + ).get_annotations() + + found_recognition_potential = False + self.assertIsNotNone(annotations) + for n in annotations: + self.assertIsNotNone(n.neoepitopes_mhc_i) + self.assertIsNotNone(n.neoepitopes_mhc_i_i) + for e in n.neoepitopes_mhc_i: + self.assertIsNotNone(e.mutated_peptide) + self.assertIsNotNone(e.wild_type_peptide) + self.assertGreater(e.affinity_mutated, 0) + self.assertGreater(e.affinity_wild_type, 0) + self.assertGreater(e.rank_mutated, 0) + self.assertLess(e.rank_mutated, neofox.RANK_MHCI_THRESHOLD_DEFAULT) + self.assertGreater(e.rank_wild_type, 0) + self.assertIsNotNone(e.allele_mhc_i) + self.assertNotEqual(e.allele_mhc_i.name, '') + self.assertIsNotNone(e.isoform_mhc_i_i) + self.assertEqual(e.isoform_mhc_i_i.name, '') + recognition_potential = EpitopeHelper.get_annotation_by_name( + e.neofox_annotations.annotations, name='recognition_potential') + found_recognition_potential = found_recognition_potential or recognition_potential != 'NA' + for e in n.neoepitopes_mhc_i_i: + self.assertIsNotNone(e.mutated_peptide) + self.assertIsNotNone(e.wild_type_peptide) + self.assertGreater(e.affinity_mutated, 0) + self.assertGreater(e.affinity_wild_type, 0) + self.assertGreater(e.rank_mutated, 0) + self.assertLess(e.rank_mutated, neofox.RANK_MHCII_THRESHOLD_DEFAULT) + self.assertGreater(e.rank_wild_type, 0) + self.assertIsNotNone(e.allele_mhc_i) + self.assertEqual(e.allele_mhc_i.name, '') + self.assertIsNotNone(e.isoform_mhc_i_i) + self.assertNotEqual(e.isoform_mhc_i_i.name, '') + + self.assertTrue(found_recognition_potential) + + df_epitopes_mhci = ModelConverter.annotations2epitopes_table(annotations, mhc=neofox.MHC_I) + self.assertFalse(any(c.startswith('isoformMhcII') for c in df_epitopes_mhci.columns)) + + df_epitopes_mhcii = ModelConverter.annotations2epitopes_table(annotations, mhc=neofox.MHC_II) + self.assertFalse(any(c.startswith('alleleMhcI') for c in df_epitopes_mhcii.columns)) + def _regression_test_on_output_file(self, new_file, previous_filename="resources/output_previous.txt"): previous_file = pkg_resources.resource_filename(neofox.tests.__name__, previous_filename) if os.path.exists(previous_file): diff --git a/neofox/tests/integration_tests/test_neofox_epitope.py b/neofox/tests/integration_tests/test_neofox_epitope.py new file mode 100755 index 00000000..7bb11e30 --- /dev/null +++ b/neofox/tests/integration_tests/test_neofox_epitope.py @@ -0,0 +1,269 @@ +# +# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH. +# +# This file is part of Neofox +# (see https://github.com/tron-bioinformatics/neofox). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see .# +import os +import unittest +import pkg_resources +import neofox.tests +from neofox.command_line import _write_results_epitopes +from neofox.model.conversion import ModelConverter +from neofox.model.neoantigen import PredictedEpitope, Patient +from neofox.model.validation import ModelValidator +from neofox.neofox_epitope import NeoFoxEpitope +from neofox.tests.integration_tests.integration_test_tools import get_hla_one_test, get_hla_two_test, \ + BaseIntegrationTest + + +class TestNeofoxEpitope(BaseIntegrationTest): + + def setUp(self): + super().setUp() + + input_file = pkg_resources.resource_filename( + neofox.tests.__name__, "resources/test_data_model_realistic.txt" + ) + patients_file = pkg_resources.resource_filename( + neofox.tests.__name__, "resources/test_patient_file.txt" + ) + patients_file_mouse = pkg_resources.resource_filename( + neofox.tests.__name__, "resources/test_patient_file_mouse.txt" + ) + self.hla_database = self.references.get_mhc_database() + self.h2_database = self.references_mouse.get_mhc_database() + self.patients = ModelConverter.parse_patients_file(patients_file, self.hla_database) + self.patients_mouse = ModelConverter.parse_patients_file(patients_file_mouse, self.h2_database) + self.neoantigens = ModelConverter.parse_candidate_file(input_file) + self.neoantigens_mouse = ModelConverter.parse_candidate_file(input_file) + + def test_neofox_epitope(self): + + neoepitopes = [ + PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + ), + PredictedEpitope( + mutated_peptide="DEVLGEPSQDILVTDQTR", + wild_type_peptide="DEVLGEPSQDILVIDQTR", + isoform_mhc_i_i=self._get_test_mhcii_isoform("HLA-DRB1*01:01") + ) + ] + + annotated_neoepitopes = NeoFoxEpitope( + neoepitopes=neoepitopes, + num_cpus=4, + ).get_annotations() + + self.assertEqual(len(annotated_neoepitopes), 2) # 16 from the patient neoepitope + for e, ae in zip(neoepitopes, annotated_neoepitopes): + if ModelValidator.is_mhci_epitope(e): + self.assert_neoepitope_mhci(original_neoepitope=e, annotated_neoepitope=ae) + elif ModelValidator.is_mhcii_epitope(e): + self.assert_neoepitope_mhcii(original_neoepitope=e, annotated_neoepitope=ae) + else: + self.assertTrue(False) + + def test_neofox_epitope_with_patients(self): + + neoepitopes = [ + PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + ), + PredictedEpitope( + mutated_peptide="DEVLGEPSQDILVTDQTR", + wild_type_peptide="DEVLGEPSQDILVIDQTR", + isoform_mhc_i_i=self._get_test_mhcii_isoform("HLA-DRB1*01:01") + ), + PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + patient_identifier="123" + ), + ] + + patients = [ + Patient( + identifier="123", + mhc1=get_hla_one_test(self.references.get_mhc_database()), + mhc2=get_hla_two_test(self.references.get_mhc_database()) + ) + ] + + annotated_neoepitopes = NeoFoxEpitope( + neoepitopes=neoepitopes, + patients=patients, + num_cpus=4, + ).get_annotations() + + self.assertEqual(len(annotated_neoepitopes), 18) # 16 from the patient neoepitope + + def test_neofox_epitope_writing_output_table(self): + + neoepitopes = [ + PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + ), + PredictedEpitope( + mutated_peptide="DEVLGEPSQDILVTDQTR", + wild_type_peptide="DEVLGEPSQDILVIDQTR", + isoform_mhc_i_i=self._get_test_mhcii_isoform("HLA-DRB1*01:01") + ) + ] + + annotated_neoepitopes = NeoFoxEpitope( + neoepitopes=neoepitopes, + num_cpus=4, + ).get_annotations() + + _write_results_epitopes( + annotated_neoepitopes, + output_folder=pkg_resources.resource_filename(neofox.tests.__name__, "resources"), + output_prefix="test_neoepitopes") + + self.assertTrue(os.path.exists(pkg_resources.resource_filename( + neofox.tests.__name__, "resources/test_neoepitopes_mhcI_epitope_candidates_annotated.tsv"))) + self.assertTrue(os.path.exists(pkg_resources.resource_filename( + neofox.tests.__name__, "resources/test_neoepitopes_mhcII_epitope_candidates_annotated.tsv"))) + + def test_neofox_epitope_writing_output_table_with_patients(self): + + neoepitopes = [ + PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + ), + PredictedEpitope( + mutated_peptide="DEVLGEPSQDILVTDQTR", + wild_type_peptide="DEVLGEPSQDILVIDQTR", + isoform_mhc_i_i=self._get_test_mhcii_isoform("HLA-DRB1*01:01") + ), + PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + patient_identifier="123" + ) + ] + + patients = [ + Patient( + identifier="123", + mhc1=get_hla_one_test(self.references.get_mhc_database()), + mhc2=get_hla_two_test(self.references.get_mhc_database()) + ) + ] + + annotated_neoepitopes = NeoFoxEpitope( + neoepitopes=neoepitopes, + patients=patients, + num_cpus=4, + ).get_annotations() + + _write_results_epitopes( + annotated_neoepitopes, + output_folder=pkg_resources.resource_filename(neofox.tests.__name__, "resources"), + output_prefix="test_neoepitopes_with_patients") + + self.assertTrue(os.path.exists(pkg_resources.resource_filename( + neofox.tests.__name__, "resources/test_neoepitopes_with_patients_mhcI_epitope_candidates_annotated.tsv"))) + self.assertTrue(os.path.exists(pkg_resources.resource_filename( + neofox.tests.__name__, "resources/test_neoepitopes_with_patients_mhcII_epitope_candidates_annotated.tsv"))) + + def test_with_expression_imputation(self): + + neoepitopes = [ + PredictedEpitope( + patient_identifier='123', + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + rna_expression=12345, + gene='PTEN' + ), + PredictedEpitope( + patient_identifier='123', + mutated_peptide="DEVLGEPSQDILVTDQTR", + wild_type_peptide="DEVLGEPSQDILVIDQTR", + isoform_mhc_i_i=self._get_test_mhcii_isoform("HLA-DRB1*01:01"), + rna_expression=12345, + gene='PTEN' + ) + ] + + patients = [ + Patient( + identifier="123", + mhc1=get_hla_one_test(self.references.get_mhc_database()), + mhc2=get_hla_two_test(self.references.get_mhc_database()), + tumor_type='BRCA' + ) + ] + + neofox_runner = NeoFoxEpitope( + neoepitopes=neoepitopes, + patients=patients, + num_cpus=4 + ) + for n, n2 in zip(neoepitopes, neofox_runner.neoepitopes): + self.assertIsNotNone(n2.imputed_gene_expression) + self.assertNotEqual(n2.imputed_gene_expression, 0) + self.assertEqual(n2.imputed_gene_expression, n2.rna_expression) + + def _assert_neeoepitope(self, neoepitope: PredictedEpitope): + # netMHCpan or netMHC2pan annotations + self.assertIsInstance(neoepitope.rank_mutated, float) + self.assertIsInstance(neoepitope.rank_wild_type, float) + self.assertIsInstance(neoepitope.affinity_mutated, float) + self.assertIsInstance(neoepitope.affinity_wild_type, float) + + # MixMHCpred annotations + self.assert_float_annotation(neoepitope, annotation_name="MixMHCpred_score") + self.assert_float_annotation(neoepitope, annotation_name="MixMHCpred_rank") + self.assert_float_annotation(neoepitope, annotation_name="MixMHCpred_WT_score") + self.assert_float_annotation(neoepitope, annotation_name="MixMHCpred_WT_rank") + + # PRIME annotations + self.assert_float_annotation(neoepitope, annotation_name="PRIME_score") + self.assert_float_annotation(neoepitope, annotation_name="PRIME_rank") + self.assert_float_annotation(neoepitope, annotation_name="PRIME_WT_score") + self.assert_float_annotation(neoepitope, annotation_name="PRIME_WT_rank") + + # additional annotations + self.assert_annotation(neoepitope, annotation_name="position_mutation") + self.assert_annotation(neoepitope, annotation_name="anchor_mutated") + self.assert_annotation(neoepitope, annotation_name="amplitude") + self.assert_annotation(neoepitope, annotation_name="pathogen_similarity") + self.assert_annotation(neoepitope, annotation_name="recognition_potential") + self.assert_annotation(neoepitope, annotation_name="DAI") + self.assert_annotation(neoepitope, annotation_name="Improved_Binder_MHCI") + self.assert_annotation(neoepitope, annotation_name="Selfsimilarity") + self.assert_annotation(neoepitope, annotation_name="Selfsimilarity_conserved_binder") + self.assert_annotation(neoepitope, annotation_name="mutation_not_found_in_proteome") + self.assert_annotation(neoepitope, annotation_name="dissimilarity_score") + self.assert_annotation(neoepitope, annotation_name="number_of_mismatches") + self.assert_annotation(neoepitope, annotation_name="IEDB_Immunogenicity") + self.assert_annotation(neoepitope, annotation_name="hex_alignment_score") + + # others to comes + self.assert_annotation(neoepitope, annotation_name="Priority_score") + self.assert_annotation(neoepitope, annotation_name="Tcell_predictor") \ No newline at end of file diff --git a/neofox/tests/integration_tests/test_netmhcpan.py b/neofox/tests/integration_tests/test_netmhcpan.py index b060d067..b274be2b 100755 --- a/neofox/tests/integration_tests/test_netmhcpan.py +++ b/neofox/tests/integration_tests/test_netmhcpan.py @@ -44,11 +44,9 @@ def test_netmhcpan_epitope_iedb(self): ) # this is an epitope from IEDB of length 9 mutated = "NLVPMVATV" - predictions = netmhcpan_predictor.mhc_prediction( - sequence=mutated, - mhc_alleles=self.test_mhc_one, - set_available_mhc=self.available_alleles.get_available_mhc_i(), - ) + available_alleles = netmhcpan_predictor.get_only_available_alleles( + mhc_alleles=self.test_mhc_one, set_available_mhc=self.available_alleles.get_available_mhc_i()) + predictions = netmhcpan_predictor.mhc_prediction(available_alleles=available_alleles, sequence=mutated) self.assertEqual(18, len(predictions)) def test_netmhcpan_too_small_epitope(self): @@ -57,11 +55,9 @@ def test_netmhcpan_too_small_epitope(self): blastp_runner=self.proteome_blastp_runner ) mutated = "NLVP" - predictions = netmhcpan_predictor.mhc_prediction( - sequence=mutated, - mhc_alleles=self.test_mhc_one, - set_available_mhc=self.available_alleles.get_available_mhc_i(), - ) + available_alleles = netmhcpan_predictor.get_only_available_alleles( + mhc_alleles=self.test_mhc_one, set_available_mhc=self.available_alleles.get_available_mhc_i()) + predictions = netmhcpan_predictor.mhc_prediction(sequence=mutated, available_alleles=available_alleles) self.assertEqual(0, len(predictions)) def test_netmhcpan_rare_aminoacid(self): @@ -71,11 +67,9 @@ def test_netmhcpan_rare_aminoacid(self): ) # this is an epitope from IEDB of length 9 mutated = "XTTDSWGKF" - predictions = netmhcpan_predictor.mhc_prediction( - sequence=mutated, - mhc_alleles=self.test_mhc_one, - set_available_mhc=self.available_alleles.get_available_mhc_i(), - ) + available_alleles = netmhcpan_predictor.get_only_available_alleles( + mhc_alleles=self.test_mhc_one, set_available_mhc=self.available_alleles.get_available_mhc_i()) + predictions = netmhcpan_predictor.mhc_prediction(sequence=mutated, available_alleles=available_alleles) self.assertEqual(18, len(predictions)) def test_netmhc2pan_epitope_iedb(self): @@ -90,11 +84,11 @@ def test_netmhc2pan_epitope_iedb(self): predictions = netmhc2pan_predictor.mhc2_prediction(sequence=mutated, mhc_alleles=combinations) self.assertEqual(10, len(predictions)) for p in predictions: - self.assertIsNotNone(p.peptide) - self.assertIsNotNone(p.hla) - self.assertIsNotNone(p.affinity_score) - self.assertIsNotNone(p.pos) - self.assertIsNotNone(p.rank) + self.assertIsNotNone(p.mutated_peptide) + self.assertIsNotNone(p.allele_mhc_i) + self.assertIsNotNone(p.affinity_mutated) + self.assertIsNotNone(p.position) + self.assertIsNotNone(p.rank_mutated) def test_netmhc2pan_too_small_epitope(self): netmhc2pan_predictor = NetMhcIIPanPredictor( diff --git a/neofox/tests/integration_tests/test_netmhcpan_mouse.py b/neofox/tests/integration_tests/test_netmhcpan_mouse.py index c0e2c29b..bc59eb83 100755 --- a/neofox/tests/integration_tests/test_netmhcpan_mouse.py +++ b/neofox/tests/integration_tests/test_netmhcpan_mouse.py @@ -45,11 +45,9 @@ def test_netmhcpan_epitope_iedb(self): ) # this is an epitope from IEDB of length 9 mutated = "NLVPMVATV" - predictions = netmhcpan_predictor.mhc_prediction( - sequence=mutated, - mhc_alleles=self.test_mhc_one, - set_available_mhc=self.available_alleles.get_available_mhc_i(), - ) + available_alleles = netmhcpan_predictor.get_only_available_alleles( + mhc_alleles=self.test_mhc_one, set_available_mhc=self.available_alleles.get_available_mhc_i()) + predictions = netmhcpan_predictor.mhc_prediction(sequence=mutated, available_alleles=available_alleles) self.assertEqual(9, len(predictions)) def test_netmhcpan_too_small_epitope(self): @@ -58,11 +56,9 @@ def test_netmhcpan_too_small_epitope(self): blastp_runner=self.proteome_blastp_runner ) mutated = "NLVP" - predictions = netmhcpan_predictor.mhc_prediction( - sequence=mutated, - mhc_alleles=self.test_mhc_one, - set_available_mhc=self.available_alleles.get_available_mhc_i(), - ) + available_alleles = netmhcpan_predictor.get_only_available_alleles( + mhc_alleles=self.test_mhc_one, set_available_mhc=self.available_alleles.get_available_mhc_i()) + predictions = netmhcpan_predictor.mhc_prediction(sequence=mutated, available_alleles=available_alleles) self.assertEqual(0, len(predictions)) def test_netmhcpan_rare_aminoacid(self): @@ -72,11 +68,9 @@ def test_netmhcpan_rare_aminoacid(self): ) # this is an epitope from IEDB of length 9 mutated = "XTTDSWGKF" - predictions = netmhcpan_predictor.mhc_prediction( - sequence=mutated, - mhc_alleles=self.test_mhc_one, - set_available_mhc=self.available_alleles.get_available_mhc_i(), - ) + available_alleles = netmhcpan_predictor.get_only_available_alleles( + mhc_alleles=self.test_mhc_one, set_available_mhc=self.available_alleles.get_available_mhc_i()) + predictions = netmhcpan_predictor.mhc_prediction(sequence=mutated, available_alleles=available_alleles) self.assertEqual(9, len(predictions)) def test_netmhc2pan_epitope_iedb(self): @@ -94,11 +88,11 @@ def test_netmhc2pan_epitope_iedb(self): def _assert_predictions(self, predictions): for p in predictions: - self.assertIsNotNone(p.peptide) - self.assertIsNotNone(p.hla) - self.assertIsNotNone(p.affinity_score) - self.assertIsNotNone(p.pos) - self.assertIsNotNone(p.rank) + self.assertIsNotNone(p.mutated_peptide) + self.assertIsNotNone(p.allele_mhc_i) + self.assertIsNotNone(p.affinity_mutated) + self.assertIsNotNone(p.position) + self.assertIsNotNone(p.rank_mutated) def test_netmhc2pan_too_small_epitope(self): netmhc2pan_predictor = NetMhcIIPanPredictor( diff --git a/neofox/tests/integration_tests/test_prime.py b/neofox/tests/integration_tests/test_prime.py index 40b87d0a..fdeeb934 100755 --- a/neofox/tests/integration_tests/test_prime.py +++ b/neofox/tests/integration_tests/test_prime.py @@ -21,10 +21,10 @@ from neofox.model.factories import MhcFactory from neofox.model.mhc_parser import HlaParser import neofox.tests.integration_tests.integration_test_tools as integration_test_tools -from neofox.published_features.prime import Prime +from neofox.MHC_predictors.prime import Prime from neofox.helpers.runner import Runner from neofox.annotation_resources.uniprot.uniprot import Uniprot -from neofox.tests.tools import get_mutation +from neofox.tests.tools import get_neoantigen class TestPrime(TestCase): @@ -40,54 +40,56 @@ def setUp(self): self.uniprot = Uniprot(self.references.uniprot_pickle) def test_prime_epitope(self): - mutation = get_mutation(mutated_xmer="LVTDQTRLE", wild_type_xmer="LVTDQTRNE") - best_peptide, best_rank, best_allele, best_score = self.prime.run( - mutation=mutation, mhc=self.test_mhc_one, uniprot=self.uniprot - ) - self.assertEquals("LVTDQTRL", best_peptide) - self.assertAlmostEqual(0.163810, best_score, delta=0.00001) - self.assertEquals(3.00, best_rank) - self.assertEquals("HLA-C*05:01", best_allele) + neoantigen = get_neoantigen(mutated_xmer="LVTDQTRLE", wild_type_xmer="LVTDQTRNE") + self.prime.run(neoantigen=neoantigen, mhc=self.test_mhc_one, uniprot=self.uniprot) + best_result = EpitopeHelper.select_best_by_affinity( + predictions=self.prime.results, maximum=True) + self.assertEquals("LVTDQTRL", best_result.mutated_peptide) + self.assertAlmostEqual(0.163810, best_result.affinity_mutated, delta=0.00001) + self.assertEquals(3.00, best_result.rank_mutated) + self.assertEquals("HLA-C*05:01", best_result.allele_mhc_i.name) def test_prime_too_small_epitope(self): - mutation = get_mutation(mutated_xmer="NLVP", wild_type_xmer="NLNP") - best_peptide, best_rank, best_allele, best_score = self.prime.run( - mutation=mutation, mhc=self.test_mhc_one, uniprot=self.uniprot - ) - self.assertIsNone(best_peptide) - self.assertIsNone(best_score) - self.assertIsNone(best_rank) - self.assertIsNone(best_allele) + neoantigen = get_neoantigen(mutated_xmer="NLVP", wild_type_xmer="NLNP") + self.prime.run(neoantigen=neoantigen, mhc=self.test_mhc_one, uniprot=self.uniprot) + best_result = EpitopeHelper.select_best_by_affinity( + predictions=self.prime.results, maximum=True) + self.assertIsNone(best_result.mutated_peptide) + self.assertIsNone(best_result.affinity_mutated) + self.assertIsNone(best_result.rank_mutated) + self.assertIsNone(best_result.allele_mhc_i.name) def test_prime_not_supported_allele(self): """ this is a combination of neoepitope and HLA alleles from Balachandran """ - mutation = get_mutation(mutated_xmer="SIYGGLVLI", wild_type_xmer="PIYGGLVLI") - best_peptide, best_rank, best_allele, best_score = self.prime.run( - mutation=mutation, + neoantigen = get_neoantigen(mutated_xmer="SIYGGLVLI", wild_type_xmer="PIYGGLVLI") + self.prime.run( + neoantigen=neoantigen, mhc=MhcFactory.build_mhc1_alleles(["A02:01", "B44:02", "C05:17", "C05:01"], self.hla_database), uniprot=self.uniprot ) - self.assertEqual('SIYGGLVLI', best_peptide) - self.assertEqual(0.186328, best_score) - self.assertEqual(0.2, best_rank) - self.assertEqual('HLA-A*02:01', best_allele) + best_result = EpitopeHelper.select_best_by_affinity( + predictions=self.prime.results, maximum=True) + self.assertEqual('SIYGGLVLI', best_result.mutated_peptide) + self.assertEqual(0.186328, best_result.affinity_mutated) + self.assertEqual(0.2, best_result.rank_mutated) + self.assertEqual('HLA-A*02:01', best_result.allele_mhc_i.name) def test_prime_rare_aminoacid(self): for wild_type_xmer, mutated_xmer in integration_test_tools.mutations_with_rare_aminoacids: - mutation = get_mutation(mutated_xmer=mutated_xmer, wild_type_xmer=wild_type_xmer) - best_peptide, best_rank, best_allele, best_score = self.prime.run( - mutation=mutation, mhc=self.test_mhc_one, uniprot=self.uniprot - ) + neoantigen = get_neoantigen(mutated_xmer=mutated_xmer, wild_type_xmer=wild_type_xmer) + self.prime.run(neoantigen=neoantigen, mhc=self.test_mhc_one, uniprot=self.uniprot) # rare aminoacids only return empty results when in the mutated sequence + best_result = EpitopeHelper.select_best_by_affinity( + predictions=self.prime.results, maximum=True) if EpitopeHelper.contains_rare_amino_acid(mutated_xmer): - self.assertIsNone(best_peptide) - self.assertIsNone(best_rank) - self.assertIsNone(best_allele) - self.assertIsNone(best_score) + self.assertIsNone(best_result.mutated_peptide) + self.assertIsNone(best_result.rank_mutated) + self.assertIsNone(best_result.allele_mhc_i.name) + self.assertIsNone(best_result.affinity_mutated) else: - self.assertIsNotNone(best_peptide) - self.assertIsNotNone(best_rank) - self.assertIsNotNone(best_allele) - self.assertIsNotNone(best_score) + self.assertIsNotNone(best_result.mutated_peptide) + self.assertIsNotNone(best_result.rank_mutated) + self.assertIsNotNone(best_result.allele_mhc_i.name) + self.assertIsNotNone(best_result.affinity_mutated) diff --git a/neofox/tests/resources/alleles.Pt29_without_mhc2.csv b/neofox/tests/resources/alleles.Pt29_without_mhc2.csv new file mode 100644 index 00000000..745962d5 --- /dev/null +++ b/neofox/tests/resources/alleles.Pt29_without_mhc2.csv @@ -0,0 +1,3 @@ +identifier mhcIAlleles +Pt29 HLA-A*03:01,HLA-A*02:01,HLA-B*07:02,HLA-B*18:01,HLA-C*07:02,HLA-C*12:03 +Pt30 HLA-A*03:01,HLA-A*02:01,HLA-B*07:02,HLA-B*18:01,HLA-C*07:02,HLA-C*12:03 \ No newline at end of file diff --git a/neofox/tests/resources/balachandran_supplementary_table1_neoantigens.tsv b/neofox/tests/resources/balachandran_supplementary_table1_neoantigens.tsv index cd8343fa..c1b36e58 100644 --- a/neofox/tests/resources/balachandran_supplementary_table1_neoantigens.tsv +++ b/neofox/tests/resources/balachandran_supplementary_table1_neoantigens.tsv @@ -1,4 +1,4 @@ -dnaVariantAlleleFrequency gene identifier mutation.mutatedXmer mutation.wildTypeXmer patientIdentifier rnaExpression rnaVariantAlleleFrequency +dnaVariantAlleleFrequency gene identifier mutatedXmer wildTypeXmer patientIdentifier rnaExpression rnaVariantAlleleFrequency 0.0 bPR1hUExGJn3+aOKrqy3gw== HIQDLYTVL RIQDLYTVL 1.0 0.0 0.0 0.0 AtzsufB7B3Yok9IwZ+lSRQ== GYYTLLNVF GNYTLLNVF 1.0 0.0 0.0 0.0 BIwwmAU1FPHZ4LkYHGzECQ== LTLFGYYTL LTLFGNYTL 1.0 0.0 0.0 diff --git a/neofox/tests/resources/test_candidate_file.txt b/neofox/tests/resources/test_candidate_file.txt deleted file mode 100755 index c2e5af56..00000000 --- a/neofox/tests/resources/test_candidate_file.txt +++ /dev/null @@ -1,11 +0,0 @@ -patient key mutation gene RefSeq_transcript UCSC_transcript transcript_expression exon exon_expression transcript_position codon substitution +-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) [WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) mRNA_for_+13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) MHC_I_peptide_length_(best_prediction) MHC_I_allele_(best_prediction) MHC_I_score_(best_prediction) MHC_I_epitope_(best_prediction) MHC_I_epitope_(WT) MHC_I_score_(WT) MHC_II_peptide_length_(best_prediction) MHC_II_allele_(best_prediction) MHC_II_score_(best_prediction) MHC_II_epitope_(best_prediction) MHC_II_epitope_(WT) MHC_II_score_(WT) mutations_in_transcript distance_to_next_mutation(AA_residues) next_mutation(potential_to_change_27mer) next_mutation_source peptide_count_for_this_mutation_in_this_transcript phase_of_next_mutation other_transcripts_with_this_peptide peptide_resulting_from_this_mutation distinct_peptides_resulting_from_this_mutation keys_of_distinct_peptides_resulting_from_this_mutation coverage_tumor coverage_normal coverage_RNA VAF_in_tumor VAF_in_normal VAF_in_RNA VAF_RNA_raw VAF_RNA_limits -Ptx 56 chr2_176983779 HOXD10 NM_002148 uc002ukj.3 0.250592319 uc002ukj.3#exon.1 0 843 281 NA RKKRCPYTKHQTLELERVLVQYVPHPRAPPRDQ RKKRCPYTKHQTLELEKEFLFNMYLTRERRLEISKSVNLTDRQVKIWFQNRRMKLKKMSRENRIRELTANLTFS AGAAAGAAGAGGTGCCCTTACACTAAGCACCAAACGCTGGAATTAGAAAGAGTTCTTGTTCAATATGTACCTCACCCGCGAGCGCCGCCTAGAGATCAG 10 HLA-B*18:01 0.25 LELERVLVQY LELEKEFLFN 9.15 15 HLA-DRB1*03:01 2.46 LELERVLVQYVPHPR LELEKEFLFNMYLTR 13.53 1 NA NA NA 0 NA NA 1 1 NA 345 101 2 0.160818713 0 0 0 NA -Ptx 27 chr5_82817144 VCAN NM_004385 uc003kii.3 0.519506894 uc003kii.3#exon.6 0 3019 1006 I1007T DEVLGEPSQDILVTDQTRLEATISPET DEVLGEPSQDILVIDQTRLEATISPET GATGAAGTTCTAGGTGAACCCTCTCAAGACATACTTGTCACTGATCAGACTCGCCTTGAAGCGACTATTTCTCCAGAAACT 11 HLA-C*05:01 0.3 VTDQTRLEATI VIDQTRLEATI 0.4 15 HLA-DRB1*03:01 0.23 QDILVTDQTRLEATI QDILVIDQTRLEATI 0.1 1 NA NA NA 0 NA uc010jau.2 1 1 NA 129 38 7 0.294573643 0 0.857142857 0.857142857 NA -Ptx 24 chr1_154996999 DCST2 NM_144622 uc001fgm.3 0.28245175 uc001fgm.3#exon.10 0 1689 563 R564W RTNLLAALHRSVRWRAADQGHRSAFLV RTNLLAALHRSVRRRAADQGHRSAFLV CGAACCAATCTGTTGGCTGCCCTGCACCGATCAGTGAGGTGGCGGGCGGCTGACCAGGGCCACAGAAGTGCCTTCCTAGTG 9 HLA-B*44:02 2.15 AALHRSVRW AALHRSVRR 51 15 HLA-DRB1*03:01 4.01 TNLLAALHRSVRWRA TNLLAALHRSVRRRA 5.24 1 NA NA NA 0 NA NA 1 1 NA 340 47 0 0.108823529 0 -1 -1 NA -Ptx 148 chr9_133577547 EXOSC2 NM_014285 uc004bzu.2 7.942770402 uc004bzu.2#exon.6 0 522 174 NA SLKYGKLGQGVLVQVSPPW SLKYGKLGQGVLVQVSPSLVKRQKTHFHDLPCGASVILGNNGFIWIYPTPEHKEEEAGGFIANLEPVSLADREVISRLRNCIISLVTQRMMLYDTSILYCYEASLPHQIKDILKPEIMEEIVMETRQRLLEQEG AGCCTGAAATATGGAAAACTAGGTCAGGGGGTTTTGGTCCAGGTTTCCCCTCCCTGG 9 HLA-B*44:02 4.15 VLVQVSPPW VLVQVSPSL 28.5 15 HLA-DRB1*03:01 25.7 GKLGQGVLVQVSPPW GKLGQGVLVQVSPSL 6.66 1 NA NA NA 0 NA NA 1 2 58 162 45 78 0.378881988 0 0.064 0.064 NA -Ptx 28 chr1_115258747 NRAS NM_002524 uc009wgu.3 42.85666275 uc009wgu.3#exon.1 0 33 11 G12C MTEYKLVVVGACGVGKSALTIQLIQ MTEYKLVVVGAGGVGKSALTIQLIQ ATGACTGAGTACAAACTGGTGGTGGTTGGAGCATGTGGTGTTGGGAAAAGCGCACTGACAATCCAGCTAATCCAG 10 HLA-A*02:01 2.35 KLVVVGACGV KLVVVGAGGV 3.9 15 HLA-DRB1*01:01 12.77 MTEYKLVVVGACGVG MTEYKLVVVGAGGVG 9.95 1 NA NA NA 0 NA NA 1 1 NA 574 199 253 0.853658537 0 0.964426877 0.964426877 NA -Ptx 63 chr1_179989810 CEP350 NM_014810 uc001gnt.3 6.765113758 uc001gnt.3#exon.11 0 2901 967 Q968K QTDSSSSDMQACSKDKAKISLGSSIDS QTDSSSSDMQACSQDKAKISLGSSIDS CAGACTGACTCTTCTAGCTCTGATATGCAAGCCTGTTCTAAAGACAAAGCCAAAATATCTCTTGGTTCCAGCATAGATTCA 10 HLA-B*44:02 3.7 SDMQACSKDK SDMQACSQDK 3.05 15 HLA-DRB1*03:01 7.5 SDMQACSKDKAKISL SDMQACSQDKAKISL 18.62 2 23 chr1_179989741 SNP_replicate_1 0 NA uc001gnu.3 uc009wxl.2 1 1 NA 959 99 39 0.164754953 0 0.205128205 0.205128205 NA -Ptx 117 chrX_75397540 CXorf26 NM_016500 uc004ecl.1 15.38792717 uc004ecl.1#exon.5 0 499 166 G167E YNKAVYISVQDKEEEKGVNNGGEKRAD YNKAVYISVQDKEGEKGVNNGGEKRAD TATAACAAAGCTGTTTATATCAGTGTTCAGGACAAAGAAGAAGAGAAAGGAGTCAACAATGGAGGAGAAAAAAGAGCTGAC 10 HLA-C*05:01 2.6 VQDKEEEKGV VQDKEGEKGV 1.5 15 HLA-DRB1*03:01 7.64 YNKAVYISVQDKEEE YNKAVYISVQDKEGE 7.64 1 NA NA NA 0 NA NA 1 1 NA 100 45 172 0.46 0 0.534883721 0.534883721 NA -Ptx 110 chr11_133799661 IGSF9B NM_014987 uc001qgx.4 0.071129512 uc001qgx.4#exon.11 0 1534 511 A512V ASTHLTVIGTSPHVPGSVRVQVSMTTA ASTHLTVIGTSPHAPGSVRVQVSMTTA GCCAGCACCCACCTCACCGTCATCGGCACCAGCCCCCATGTCCCGGGCAGTGTCCGGGTCCAGGTCTCCATGACAACTGCC 9 HLA-A*02:01 4.1 TVIGTSPHV TVIGTSPHA 12 15 HLA-DRB1*03:01 7.62 PHVPGSVRVQVSMTT PHAPGSVRVQVSMTT 15.36 1 NA NA NA 0 NA uc001qgy.1 1 1 NA 90 45 0 0.422222222 0.022222222 -1 -1 NA -Pty 26 chr14_31792892 HEATR5A NM_015473 uc001wrf.4 NA uc001wrf.4#exon.23 0 3664 1221 R1222Q TRRDEKSHPFTNPQWATRVFAAECVCR TRRDEKSHPFTNPRWATRVFAAECVCR ACCAGACGTGATGAAAAATCCCATCCTTTTACCAATCCCCAATGGGCTACTAGAGTCTTTGCTGCTGAATGTGTCTGTAGG 11 HLA-C*05:01 0.5 FTNPQWATRVF FTNPRWATRVF 0.6 15 HLA-DRB1*03:01 9.61 PQWATRVFAAECVCR PRWATRVFAAECVCR 9.61 1 NA NA NA 0 NA uc010ami.3 uc001wrg.1 1 1 NA 214 114 19 0.799065421 0 0.947368421 0.947368421 NA -Pty 77 chr11_74429841 CHRDL2 NM_015424 uc001ovh.3 NA uc001ovh.3#exon.1 0 117 39 Y40H ARPDMFCLFHGKRHFPGESWHPYLEPQ ARPDMFCLFHGKRYSPGESWHPYLEPQ GCCCGCCCAGACATGTTCTGCCTTTTCCATGGGAAGAGACACTTCCCCGGCGAGAGCTGGCACCCCTACTTGGAGCCACAA 11 HLA-C*07:02 0.5 HFPGESWHPYL YSPGESWHPYL 6.4 15 HLA-DRB1*11:04 9.28 ARPDMFCLFHGKRHF ARPDMFCLFHGKRYS 9.28 2 1 chr11_74429837 MyMut 0 in_phase:merged_two_codons uc001ovi.3 uc001ovk.1 1 1 69 54 4 0.231884058 0.018518519 0 0 diff --git a/neofox/tests/resources/test_candidate_file_Pty.txt b/neofox/tests/resources/test_candidate_file_Pty.txt deleted file mode 100755 index 22e1b01a..00000000 --- a/neofox/tests/resources/test_candidate_file_Pty.txt +++ /dev/null @@ -1,12 +0,0 @@ -patient key mutation gene RefSeq_transcript UCSC_transcript transcript_expression exon exon_expression transcript_position codon substitution +-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) [WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) mRNA_for_+13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) MHC_I_peptide_length_(best_prediction) MHC_I_allele_(best_prediction) MHC_I_score_(best_prediction) MHC_I_epitope_(best_prediction) MHC_I_epitope_(WT) MHC_I_score_(WT) MHC_II_peptide_length_(best_prediction) MHC_II_allele_(best_prediction) MHC_II_score_(best_prediction) MHC_II_epitope_(best_prediction) MHC_II_epitope_(WT) MHC_II_score_(WT) mutations_in_transcript distance_to_next_mutation(AA_residues) next_mutation(potential_to_change_27mer) next_mutation_source peptide_count_for_this_mutation_in_this_transcript phase_of_next_mutation other_transcripts_with_this_peptide peptide_resulting_from_this_mutation distinct_peptides_resulting_from_this_mutation keys_of_distinct_peptides_resulting_from_this_mutation coverage_tumor coverage_normal coverage_RNA VAF_in_tumor VAF_in_normal VAF_in_RNA VAF_RNA_raw VAF_RNA_limits -Pty 56 chr2_176983779 HOXD10 NM_002148 uc002ukj.3 NA uc002ukj.3#exon.1 0 843 281 NA RKKRCPYTKHQTLELERVLVQYVPHPRAPPRDQ RKKRCPYTKHQTLELEKEFLFNMYLTRERRLEISKSVNLTDRQVKIWFQNRRMKLKKMSRENRIRELTANLTFS AGAAAGAAGAGGTGCCCTTACACTAAGCACCAAACGCTGGAATTAGAAAGAGTTCTTGTTCAATATGTACCTCACCCGCGAGCGCCGCCTAGAGATCAG 10 HLA-B*18:01 0.25 LELERVLVQY LELEKEFLFN 9.15 15 HLA-DRB1*03:01 2.46 LELERVLVQYVPHPR LELEKEFLFNMYLTR 13.53 1 NA NA NA 0 NA NA 1 1 NA 345 101 2 0.160818713 0 0 0 NA -Pty 27 chr5_82817144 VCAN NM_004385 uc003kii.3 NA uc003kii.3#exon.6 0 3019 1006 I1007T DEVLGEPSQDILVTDQTRLEATISPET DEVLGEPSQDILVIDQTRLEATISPET GATGAAGTTCTAGGTGAACCCTCTCAAGACATACTTGTCACTGATCAGACTCGCCTTGAAGCGACTATTTCTCCAGAAACT 11 HLA-C*05:01 0.3 VTDQTRLEATI VIDQTRLEATI 0.4 15 HLA-DRB1*03:01 0.23 QDILVTDQTRLEATI QDILVIDQTRLEATI 0.1 1 NA NA NA 0 NA uc010jau.2 1 1 NA 129 38 7 0.294573643 0 0.857142857 0.857142857 NA -Pty 24 chr1_154996999 DCST2 NM_144622 uc001fgm.3 NA uc001fgm.3#exon.10 0 1689 563 R564W RTNLLAALHRSVRWRAADQGHRSAFLV RTNLLAALHRSVRRRAADQGHRSAFLV CGAACCAATCTGTTGGCTGCCCTGCACCGATCAGTGAGGTGGCGGGCGGCTGACCAGGGCCACAGAAGTGCCTTCCTAGTG 9 HLA-B*44:02 2.15 AALHRSVRW AALHRSVRR 51 15 HLA-DRB1*03:01 4.01 TNLLAALHRSVRWRA TNLLAALHRSVRRRA 5.24 1 NA NA NA 0 NA NA 1 1 NA 340 47 0 0.108823529 0 -1 -1 NA -Pty 148 chr9_133577547 EXOSC2 NM_014285 uc004bzu.2 NA uc004bzu.2#exon.6 0 522 174 NA SLKYGKLGQGVLVQVSPPW SLKYGKLGQGVLVQVSPSLVKRQKTHFHDLPCGASVILGNNGFIWIYPTPEHKEEEAGGFIANLEPVSLADREVISRLRNCIISLVTQRMMLYDTSILYCYEASLPHQIKDILKPEIMEEIVMETRQRLLEQEG AGCCTGAAATATGGAAAACTAGGTCAGGGGGTTTTGGTCCAGGTTTCCCCTCCCTGG 9 HLA-B*44:02 4.15 VLVQVSPPW VLVQVSPSL 28.5 15 HLA-DRB1*03:01 25.7 GKLGQGVLVQVSPPW GKLGQGVLVQVSPSL 6.66 1 NA NA NA 0 NA NA 1 2 58 162 45 78 0.378881988 0 0.064 0.064 NA -Pty 28 chr1_115258747 NRAS NM_002524 uc009wgu.3 NA uc009wgu.3#exon.1 0 33 11 G12C MTEYKLVVVGACGVGKSALTIQLIQ MTEYKLVVVGAGGVGKSALTIQLIQ ATGACTGAGTACAAACTGGTGGTGGTTGGAGCATGTGGTGTTGGGAAAAGCGCACTGACAATCCAGCTAATCCAG 10 HLA-A*02:01 2.35 KLVVVGACGV KLVVVGAGGV 3.9 15 HLA-DRB1*01:01 12.77 MTEYKLVVVGACGVG MTEYKLVVVGAGGVG 9.95 1 NA NA NA 0 NA NA 1 1 NA 574 199 253 0.853658537 0 0.964426877 0.964426877 NA -Pty 63 chr1_179989810 CEP350 NM_014810 uc001gnt.3 NA uc001gnt.3#exon.11 0 2901 967 Q968K QTDSSSSDMQACSKDKAKISLGSSIDS QTDSSSSDMQACSQDKAKISLGSSIDS CAGACTGACTCTTCTAGCTCTGATATGCAAGCCTGTTCTAAAGACAAAGCCAAAATATCTCTTGGTTCCAGCATAGATTCA 10 HLA-B*44:02 3.7 SDMQACSKDK SDMQACSQDK 3.05 15 HLA-DRB1*03:01 7.5 SDMQACSKDKAKISL SDMQACSQDKAKISL 18.62 2 23 chr1_179989741 SNP_replicate_1 0 NA uc001gnu.3 uc009wxl.2 1 1 NA 959 99 39 0.164754953 0 0.205128205 0.205128205 NA -Pty 77 chr16_12798810 CPPED1 NM_018340 uc002dca.4 NA uc002dca.4#exon.2 0 384 128 D129Y DRAIPLVLVSGNHYIGNTPTAETVEEF DRAIPLVLVSGNHDIGNTPTAETVEEF GACAGGGCCATCCCACTGGTCCTTGTCAGCGGCAACCATTACATTGGCAACACCCCCACGGCCGAGACCGTCGAGGAGTTC 8 HLA-A*30:02 0.4 VLVSGNHY VLVSGNHD 16.15 15 HLA-DRB1*03:01 3.05 IPLVLVSGNHYIGNT IPLVLVSGNHDIGNT 4.37 1 NA NA NA 0 NA NA 1 1 NA 184 84 83 0.277173913 0 0.313253012 0.313253012 NA -Pty 117 chrX_75397540 CXorf26 NM_016500 uc004ecl.1 NA uc004ecl.1#exon.5 0 499 166 G167E YNKAVYISVQDKEEEKGVNNGGEKRAD YNKAVYISVQDKEGEKGVNNGGEKRAD TATAACAAAGCTGTTTATATCAGTGTTCAGGACAAAGAAGAAGAGAAAGGAGTCAACAATGGAGGAGAAAAAAGAGCTGAC 10 HLA-C*05:01 2.6 VQDKEEEKGV VQDKEGEKGV 1.5 15 HLA-DRB1*03:01 7.64 YNKAVYISVQDKEEE YNKAVYISVQDKEGE 7.64 1 NA NA NA 0 NA NA 1 1 NA 100 45 172 0.46 0 0.534883721 0.534883721 NA -Pty 110 chr11_133799661 IGSF9B NM_014987 uc001qgx.4 NA uc001qgx.4#exon.11 0 1534 511 A512V ASTHLTVIGTSPHVPGSVRVQVSMTTA ASTHLTVIGTSPHAPGSVRVQVSMTTA GCCAGCACCCACCTCACCGTCATCGGCACCAGCCCCCATGTCCCGGGCAGTGTCCGGGTCCAGGTCTCCATGACAACTGCC 9 HLA-A*02:01 4.1 TVIGTSPHV TVIGTSPHA 12 15 HLA-DRB1*03:01 7.62 PHVPGSVRVQVSMTT PHAPGSVRVQVSMTT 15.36 1 NA NA NA 0 NA uc001qgy.1 1 1 NA 90 45 0 0.422222222 0.022222222 -1 -1 NA -Pty 26 chr14_31792892 HEATR5A NM_015473 uc001wrf.4 NA uc001wrf.4#exon.23 0 3664 1221 R1222Q TRRDEKSHPFTNPQWATRVFAAECVCR TRRDEKSHPFTNPRWATRVFAAECVCR ACCAGACGTGATGAAAAATCCCATCCTTTTACCAATCCCCAATGGGCTACTAGAGTCTTTGCTGCTGAATGTGTCTGTAGG 11 HLA-C*05:01 0.5 FTNPQWATRVF FTNPRWATRVF 0.6 15 HLA-DRB1*03:01 9.61 PQWATRVFAAECVCR PRWATRVFAAECVCR 9.61 1 NA NA NA 0 NA uc010ami.3 uc001wrg.1 1 1 NA 214 114 19 0.799065421 0 0.947368421 0.947368421 NA -Pty 77 chr11_74429841 CHRDL2 NM_015424 uc001ovh.3 NA uc001ovh.3#exon.1 0 117 39 Y40H ARPDMFCLFHGKRHFPGESWHPYLEPQ ARPDMFCLFHGKRYSPGESWHPYLEPQ GCCCGCCCAGACATGTTCTGCCTTTTCCATGGGAAGAGACACTTCCCCGGCGAGAGCTGGCACCCCTACTTGGAGCCACAA 11 HLA-C*07:02 0.5 HFPGESWHPYL YSPGESWHPYL 6.4 15 HLA-DRB1*11:04 9.28 ARPDMFCLFHGKRHF ARPDMFCLFHGKRYS 9.28 2 1 chr11_74429837 MyMut 0 in_phase:merged_two_codons uc001ovi.3 uc001ovk.1 1 1 69 54 4 0.231884058 0.018518519 0 0 diff --git a/neofox/tests/resources/test_data.txt b/neofox/tests/resources/test_data.txt deleted file mode 100755 index 54beebff..00000000 --- a/neofox/tests/resources/test_data.txt +++ /dev/null @@ -1,12 +0,0 @@ -patient key mutation gene RefSeq_transcript UCSC_transcript transcript_expression exon exon_expression transcript_position codon substitution +-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) [WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) mRNA_for_+13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) MHC_I_peptide_length_(best_prediction) MHC_I_allele_(best_prediction) MHC_I_score_(best_prediction) MHC_I_epitope_(best_prediction) MHC_I_epitope_(WT) MHC_I_score_(WT) MHC_II_peptide_length_(best_prediction) MHC_II_allele_(best_prediction) MHC_II_score_(best_prediction) MHC_II_epitope_(best_prediction) MHC_II_epitope_(WT) MHC_II_score_(WT) mutations_in_transcript distance_to_next_mutation(AA_residues) next_mutation(potential_to_change_27mer) next_mutation_source peptide_count_for_this_mutation_in_this_transcript phase_of_next_mutation other_transcripts_with_this_peptide peptide_resulting_from_this_mutation distinct_peptides_resulting_from_this_mutation keys_of_distinct_peptides_resulting_from_this_mutation coverage_tumor coverage_normal coverage_RNA VAF_in_tumor VAF_in_normal VAF_in_RNA VAF_RNA_raw VAF_RNA_limits -Ptx 56 chr2_176983779 HOXD10 NM_002148 uc002ukj.3 0.250592319 uc002ukj.3#exon.1 0 843 281 NA RKKRCPYTKHQTLELERVLVQYVPHPRAPPRDQ RKKRCPYTKHQTLELEKEFLFNMYLTRERRLEISKSVNLTDRQVKIWFQNRRMKLKKMSRENRIRELTANLTFS AGAAAGAAGAGGTGCCCTTACACTAAGCACCAAACGCTGGAATTAGAAAGAGTTCTTGTTCAATATGTACCTCACCCGCGAGCGCCGCCTAGAGATCAG 10 HLA-B*18:01 0.25 LELERVLVQY LELEKEFLFN 9.15 15 HLA-DRB1*03:01 2.46 LELERVLVQYVPHPR LELEKEFLFNMYLTR 13.53 1 NA NA NA 0 NA NA 1 1 NA 345 101 2 0.160818713 0 0 0 NA -Ptx 27 chr5_82817144 VCAN NM_004385 uc003kii.3 0.519506894 uc003kii.3#exon.6 0 3019 1006 I1007T DEVLGEPSQDILVTDQTRLEATISPET DEVLGEPSQDILVIDQTRLEATISPET GATGAAGTTCTAGGTGAACCCTCTCAAGACATACTTGTCACTGATCAGACTCGCCTTGAAGCGACTATTTCTCCAGAAACT 11 HLA-C*05:01 0.3 VTDQTRLEATI VIDQTRLEATI 0.4 15 HLA-DRB1*03:01 0.23 QDILVTDQTRLEATI QDILVIDQTRLEATI 0.1 1 NA NA NA 0 NA uc010jau.2 1 1 NA 129 38 7 0.294573643 0 0.857142857 0.857142857 NA -Ptx 24 chr1_154996999 DCST2 NM_144622 uc001fgm.3 0.28245175 uc001fgm.3#exon.10 0 1689 563 R564W RTNLLAALHRSVRWRAADQGHRSAFLV RTNLLAALHRSVRRRAADQGHRSAFLV CGAACCAATCTGTTGGCTGCCCTGCACCGATCAGTGAGGTGGCGGGCGGCTGACCAGGGCCACAGAAGTGCCTTCCTAGTG 9 HLA-B*44:02 2.15 AALHRSVRW AALHRSVRR 51 15 HLA-DRB1*03:01 4.01 TNLLAALHRSVRWRA TNLLAALHRSVRRRA 5.24 1 NA NA NA 0 NA NA 1 1 NA 340 47 0 0.108823529 0 -1 -1 NA -Ptx 148 chr9_133577547 EXOSC2 NM_014285 uc004bzu.2 7.942770402 uc004bzu.2#exon.6 0 522 174 NA SLKYGKLGQGVLVQVSPPW SLKYGKLGQGVLVQVSPSLVKRQKTHFHDLPCGASVILGNNGFIWIYPTPEHKEEEAGGFIANLEPVSLADREVISRLRNCIISLVTQRMMLYDTSILYCYEASLPHQIKDILKPEIMEEIVMETRQRLLEQEG AGCCTGAAATATGGAAAACTAGGTCAGGGGGTTTTGGTCCAGGTTTCCCCTCCCTGG 9 HLA-B*44:02 4.15 VLVQVSPPW VLVQVSPSL 28.5 15 HLA-DRB1*03:01 25.7 GKLGQGVLVQVSPPW GKLGQGVLVQVSPSL 6.66 1 NA NA NA 0 NA NA 1 2 58 162 45 78 0.378881988 0 0.064 0.064 NA -Ptx 28 chr1_115258747 NRAS NM_002524 uc009wgu.3 42.85666275 uc009wgu.3#exon.1 0 33 11 G12C MTEYKLVVVGACGVGKSALTIQLIQ MTEYKLVVVGAGGVGKSALTIQLIQ ATGACTGAGTACAAACTGGTGGTGGTTGGAGCATGTGGTGTTGGGAAAAGCGCACTGACAATCCAGCTAATCCAG 10 HLA-A*02:01 2.35 KLVVVGACGV KLVVVGAGGV 3.9 15 HLA-DRB1*01:01 12.77 MTEYKLVVVGACGVG MTEYKLVVVGAGGVG 9.95 1 NA NA NA 0 NA NA 1 1 NA 574 199 253 0.853658537 0 0.964426877 0.964426877 NA -Ptx 63 chr1_179989810 CEP350 NM_014810 uc001gnt.3 6.765113758 uc001gnt.3#exon.11 0 2901 967 Q968K QTDSSSSDMQACSKDKAKISLGSSIDS QTDSSSSDMQACSQDKAKISLGSSIDS CAGACTGACTCTTCTAGCTCTGATATGCAAGCCTGTTCTAAAGACAAAGCCAAAATATCTCTTGGTTCCAGCATAGATTCA 10 HLA-B*44:02 3.7 SDMQACSKDK SDMQACSQDK 3.05 15 HLA-DRB1*03:01 7.5 SDMQACSKDKAKISL SDMQACSQDKAKISL 18.62 2 23 chr1_179989741 SNP_replicate_1 0 NA uc001gnu.3 uc009wxl.2 1 1 NA 959 99 39 0.164754953 0 0.205128205 0.205128205 NA -Ptx 77 chr16_12798810 CPPED1 NM_018340 uc002dca.4 10.91669936 uc002dca.4#exon.2 0 384 128 D129Y DRAIPLVLVSGNHYIGNTPTAETVEEF DRAIPLVLVSGNHDIGNTPTAETVEEF GACAGGGCCATCCCACTGGTCCTTGTCAGCGGCAACCATTACATTGGCAACACCCCCACGGCCGAGACCGTCGAGGAGTTC 8 HLA-A*30:02 0.4 VLVSGNHY VLVSGNHD 16.15 15 HLA-DRB1*03:01 3.05 IPLVLVSGNHYIGNT IPLVLVSGNHDIGNT 4.37 1 NA NA NA 0 NA NA 1 1 NA 184 84 83 0.277173913 0 0.313253012 0.313253012 NA -Ptx 117 chrX_75397540 CXorf26 NM_016500 uc004ecl.1 15.38792717 uc004ecl.1#exon.5 0 499 166 G167E YNKAVYISVQDKEEEKGVNNGGEKRAD YNKAVYISVQDKEGEKGVNNGGEKRAD TATAACAAAGCTGTTTATATCAGTGTTCAGGACAAAGAAGAAGAGAAAGGAGTCAACAATGGAGGAGAAAAAAGAGCTGAC 10 HLA-C*05:01 2.6 VQDKEEEKGV VQDKEGEKGV 1.5 15 HLA-DRB1*03:01 7.64 YNKAVYISVQDKEEE YNKAVYISVQDKEGE 7.64 1 NA NA NA 0 NA NA 1 1 NA 100 45 172 0.46 0 0.534883721 0.534883721 NA -Ptx 110 chr11_133799661 IGSF9B NM_014987 uc001qgx.4 0.071129512 uc001qgx.4#exon.11 0 1534 511 A512V ASTHLTVIGTSPHVPGSVRVQVSMTTA ASTHLTVIGTSPHAPGSVRVQVSMTTA GCCAGCACCCACCTCACCGTCATCGGCACCAGCCCCCATGTCCCGGGCAGTGTCCGGGTCCAGGTCTCCATGACAACTGCC 9 HLA-A*02:01 4.1 TVIGTSPHV TVIGTSPHA 12 15 HLA-DRB1*03:01 7.62 PHVPGSVRVQVSMTT PHAPGSVRVQVSMTT 15.36 1 NA NA NA 0 NA uc001qgy.1 1 1 NA 90 45 0 0.422222222 0.022222222 -1 -1 NA -Ptx 26 chr14_31792892 HEATR5A NM_015473 uc001wrf.4 4.055704514 uc001wrf.4#exon.23 0 3664 1221 R1222Q TRRDEKSHPFTNPQWATRVFAAECVCR TRRDEKSHPFTNPRWATRVFAAECVCR ACCAGACGTGATGAAAAATCCCATCCTTTTACCAATCCCCAATGGGCTACTAGAGTCTTTGCTGCTGAATGTGTCTGTAGG 11 HLA-C*05:01 0.5 FTNPQWATRVF FTNPRWATRVF 0.6 15 HLA-DRB1*03:01 9.61 PQWATRVFAAECVCR PRWATRVFAAECVCR 9.61 1 NA NA NA 0 NA uc010ami.3 uc001wrg.1 1 1 NA 214 114 19 0.799065421 0 0.947368421 0.947368421 NA -Ptx 77 chr11_74429841 CHRDL2 NM_015424 uc001ovh.3 0.170452164 uc001ovh.3#exon.1 0 117 39 Y40H ARPDMFCLFHGKRHFPGESWHPYLEPQ ARPDMFCLFHGKRYSPGESWHPYLEPQ GCCCGCCCAGACATGTTCTGCCTTTTCCATGGGAAGAGACACTTCCCCGGCGAGAGCTGGCACCCCTACTTGGAGCCACAA 11 HLA-C*07:02 0.5 HFPGESWHPYL YSPGESWHPYL 6.4 15 HLA-DRB1*11:04 9.28 ARPDMFCLFHGKRHF ARPDMFCLFHGKRYS 9.28 2 1 chr11_74429837 MyMut 0 in_phase:merged_two_codons uc001ovi.3 uc001ovk.1 1 1 69 54 4 0.231884058 0.018518519 0 0 diff --git a/neofox/tests/resources/test_data_json.json b/neofox/tests/resources/test_data_json.json index dcbf4e4a..41b5b419 100644 --- a/neofox/tests/resources/test_data_json.json +++ b/neofox/tests/resources/test_data_json.json @@ -1 +1 @@ -[{"identifier": "odJ99FdqvJoK1znK+iCpWQ==", "patientIdentifier": "Pt29", "gene": "BRCA2", "mutation": {"position": [7], "wildTypeXmer": "AAAAAALAAAAA", "mutatedXmer": "AAAAAAFAAAAA"}}, {"identifier": "E1xQlw06sPURPCTJVKI9NA==", "patientIdentifier": "Pt29", "gene": "BRCA2", "mutation": {"position": [7], "wildTypeXmer": "AAAAAAMAAAAA", "mutatedXmer": "AAAAAARAAAAA"}}, {"identifier": "BSOgOYCGS8CmJUib06lWMw==", "patientIdentifier": "Pt29", "gene": "BRCA2", "mutation": {"position": [7], "wildTypeXmer": "AAAAAAGAAAAA", "mutatedXmer": "AAAAAAKAAAAA"}}, {"identifier": "WYU7Wj9D0ryonb8aHi8c5Q==", "patientIdentifier": "Pt29", "gene": "BRCA2", "mutation": {"position": [7], "wildTypeXmer": "AAAAAACAAAAA", "mutatedXmer": "AAAAAAEAAAAA"}}, {"identifier": "8gUDplDGtnr5/NbIc+8gfg==", "patientIdentifier": "Pt29", "gene": "BRCA2", "mutation": {"position": [7], "wildTypeXmer": "AAAAAAKAAAAA", "mutatedXmer": "AAAAAACAAAAA"}}] \ No newline at end of file +[{"identifier": "odJ99FdqvJoK1znK+iCpWQ==", "patientIdentifier": "Pt29", "gene": "BRCA2", "position": [7], "wildTypeXmer": "AAAAAALAAAAA", "mutatedXmer": "AAAAAAFAAAAA"}, {"identifier": "E1xQlw06sPURPCTJVKI9NA==", "patientIdentifier": "Pt29", "gene": "BRCA2", "position": [7], "wildTypeXmer": "AAAAAAMAAAAA", "mutatedXmer": "AAAAAARAAAAA"}, {"identifier": "BSOgOYCGS8CmJUib06lWMw==", "patientIdentifier": "Pt29", "gene": "BRCA2", "position": [7], "wildTypeXmer": "AAAAAAGAAAAA", "mutatedXmer": "AAAAAAKAAAAA"}, {"identifier": "WYU7Wj9D0ryonb8aHi8c5Q==", "patientIdentifier": "Pt29", "gene": "BRCA2", "position": [7], "wildTypeXmer": "AAAAAACAAAAA", "mutatedXmer": "AAAAAAEAAAAA"}, {"identifier": "8gUDplDGtnr5/NbIc+8gfg==", "patientIdentifier": "Pt29", "gene": "BRCA2", "position": [7], "wildTypeXmer": "AAAAAAKAAAAA", "mutatedXmer": "AAAAAACAAAAA"}] \ No newline at end of file diff --git a/neofox/tests/resources/test_data_model.txt b/neofox/tests/resources/test_data_model.txt index 4757468b..9db9f2c8 100755 --- a/neofox/tests/resources/test_data_model.txt +++ b/neofox/tests/resources/test_data_model.txt @@ -1,4 +1,4 @@ -gene mutation.wildTypeXmer mutation.mutatedXmer patientIdentifier external_annotation_1 external_annotation_2 +gene wildTypeXmer mutatedXmer patientIdentifier external_annotation_1 external_annotation_2 BRCA2 AAAAAALAAAAA AAAAAAFAAAAA Pt29 blah1 blah2 BRCA2 AAAAAAMAAAAA AAAAAARAAAAA Pt29 blah1 blah2 BRCA2 AAAAAAGAAAAA AAAAAAKAAAAA Pt29 blah1 blah2 diff --git a/neofox/tests/resources/test_data_model_only_one.txt b/neofox/tests/resources/test_data_model_only_one.txt new file mode 100755 index 00000000..d2a024bd --- /dev/null +++ b/neofox/tests/resources/test_data_model_only_one.txt @@ -0,0 +1,2 @@ +gene wildTypeXmer mutatedXmer patientIdentifier external_annotation_1 external_annotation_2 +BRCA2 AAAAAALAAAAA AAAAAAFAAAAA Pt29 blah1 blah2 diff --git a/neofox/tests/resources/test_data_model_realistic.txt b/neofox/tests/resources/test_data_model_realistic.txt new file mode 100755 index 00000000..d854754b --- /dev/null +++ b/neofox/tests/resources/test_data_model_realistic.txt @@ -0,0 +1,11 @@ +gene wildTypeXmer mutatedXmer patientIdentifier external_annotation_1 external_annotation_2 rnaExpression rnaVaf dnaVaf +HOXD10 RKKRCPYTKHQTLELERVLVQYVPHPRAPPRDQ Ptx blah1 blah2 0.250592319 0 0.160818713 +VCAN DEVLGEPSQDILVIDQTRLEATISPET DEVLGEPSQDILVTDQTRLEATISPET Ptx blah1 blah2 0.519506894 0.857142857 0.294573643 +DCST2 RTNLLAALHRSVRRRAADQGHRSAFLV RTNLLAALHRSVRWRAADQGHRSAFLV Ptx blah1 blah2 0.28245175 -1 0.108823529 +EXOSC2 SLKYGKLGQGVLVQVSPPW Ptx blah1 blah2 7.942770402 0.064 0.378881988 +NRAS MTEYKLVVVGAGGVGKSALTIQLIQ MTEYKLVVVGACGVGKSALTIQLIQ Ptx blah1 blah2 42.85666275 0.964426877 0.853658537 +CEP350 QTDSSSSDMQACSQDKAKISLGSSIDS QTDSSSSDMQACSKDKAKISLGSSIDS Ptx blah1 blah2 6.765113758 0.205128205 0.164754953 +CXorf26 YNKAVYISVQDKEGEKGVNNGGEKRAD YNKAVYISVQDKEEEKGVNNGGEKRAD Ptx blah1 blah2 15.38792717 0.534883721 0.46 +IGSF9B ASTHLTVIGTSPHAPGSVRVQVSMTTA ASTHLTVIGTSPHVPGSVRVQVSMTTA Ptx blah1 blah2 0.071129512 -1 0.422222222 +HEATR5A TRRDEKSHPFTNPRWATRVFAAECVCR TRRDEKSHPFTNPQWATRVFAAECVCR Ptx blah1 blah2 NA 0.947368421 0.799065421 +CHRDL2 ARPDMFCLFHGKRYSPGESWHPYLEPQ ARPDMFCLFHGKRHFPGESWHPYLEPQ Ptx blah1 blah2 NA 0 0.231884058 diff --git a/neofox/tests/resources/test_data_model_realistic_Pty.txt b/neofox/tests/resources/test_data_model_realistic_Pty.txt new file mode 100755 index 00000000..d854754b --- /dev/null +++ b/neofox/tests/resources/test_data_model_realistic_Pty.txt @@ -0,0 +1,11 @@ +gene wildTypeXmer mutatedXmer patientIdentifier external_annotation_1 external_annotation_2 rnaExpression rnaVaf dnaVaf +HOXD10 RKKRCPYTKHQTLELERVLVQYVPHPRAPPRDQ Ptx blah1 blah2 0.250592319 0 0.160818713 +VCAN DEVLGEPSQDILVIDQTRLEATISPET DEVLGEPSQDILVTDQTRLEATISPET Ptx blah1 blah2 0.519506894 0.857142857 0.294573643 +DCST2 RTNLLAALHRSVRRRAADQGHRSAFLV RTNLLAALHRSVRWRAADQGHRSAFLV Ptx blah1 blah2 0.28245175 -1 0.108823529 +EXOSC2 SLKYGKLGQGVLVQVSPPW Ptx blah1 blah2 7.942770402 0.064 0.378881988 +NRAS MTEYKLVVVGAGGVGKSALTIQLIQ MTEYKLVVVGACGVGKSALTIQLIQ Ptx blah1 blah2 42.85666275 0.964426877 0.853658537 +CEP350 QTDSSSSDMQACSQDKAKISLGSSIDS QTDSSSSDMQACSKDKAKISLGSSIDS Ptx blah1 blah2 6.765113758 0.205128205 0.164754953 +CXorf26 YNKAVYISVQDKEGEKGVNNGGEKRAD YNKAVYISVQDKEEEKGVNNGGEKRAD Ptx blah1 blah2 15.38792717 0.534883721 0.46 +IGSF9B ASTHLTVIGTSPHAPGSVRVQVSMTTA ASTHLTVIGTSPHVPGSVRVQVSMTTA Ptx blah1 blah2 0.071129512 -1 0.422222222 +HEATR5A TRRDEKSHPFTNPRWATRVFAAECVCR TRRDEKSHPFTNPQWATRVFAAECVCR Ptx blah1 blah2 NA 0.947368421 0.799065421 +CHRDL2 ARPDMFCLFHGKRYSPGESWHPYLEPQ ARPDMFCLFHGKRHFPGESWHPYLEPQ Ptx blah1 blah2 NA 0 0.231884058 diff --git a/neofox/tests/resources/test_data_neoepitopes.txt b/neofox/tests/resources/test_data_neoepitopes.txt new file mode 100644 index 00000000..c0692764 --- /dev/null +++ b/neofox/tests/resources/test_data_neoepitopes.txt @@ -0,0 +1,3 @@ +mutatedPeptide wildTypePeptide alleleMhcI isoformMhcII patientIdentifier +DILVTDQTR DILVIDQTR HLA-A*01:01 +DEVLGEPSQDILVTDQTR DEVLGEPSQDILVIDQTR HLA-DRB1*01:01 diff --git a/neofox/tests/resources/test_data_neoepitopes_with_patients.txt b/neofox/tests/resources/test_data_neoepitopes_with_patients.txt new file mode 100644 index 00000000..aad4cab2 --- /dev/null +++ b/neofox/tests/resources/test_data_neoepitopes_with_patients.txt @@ -0,0 +1,4 @@ +mutatedPeptide wildTypePeptide alleleMhcI isoformMhcII patientIdentifier +DILVTDQTR DILVIDQTR HLA-A*01:01 +DEVLGEPSQDILVTDQTR DEVLGEPSQDILVIDQTR HLA-DRB1*01:01 +DILVTDQTR DILVIDQTR Pt29 diff --git a/neofox/tests/resources/test_data_only_one.txt b/neofox/tests/resources/test_data_only_one.txt deleted file mode 100755 index ec57098d..00000000 --- a/neofox/tests/resources/test_data_only_one.txt +++ /dev/null @@ -1,2 +0,0 @@ -patient key mutation gene RefSeq_transcript UCSC_transcript transcript_expression exon exon_expression transcript_position codon substitution +-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) [WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) mRNA_for_+13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) MHC_I_peptide_length_(best_prediction) MHC_I_allele_(best_prediction) MHC_I_score_(best_prediction) MHC_I_epitope_(best_prediction) MHC_I_epitope_(WT) MHC_I_score_(WT) MHC_II_peptide_length_(best_prediction) MHC_II_allele_(best_prediction) MHC_II_score_(best_prediction) MHC_II_epitope_(best_prediction) MHC_II_epitope_(WT) MHC_II_score_(WT) mutations_in_transcript distance_to_next_mutation(AA_residues) next_mutation(potential_to_change_27mer) next_mutation_source peptide_count_for_this_mutation_in_this_transcript phase_of_next_mutation other_transcripts_with_this_peptide peptide_resulting_from_this_mutation distinct_peptides_resulting_from_this_mutation keys_of_distinct_peptides_resulting_from_this_mutation coverage_tumor coverage_normal coverage_RNA VAF_in_tumor VAF_in_normal VAF_in_RNA VAF_RNA_raw VAF_RNA_limits -Ptx 27 chr5_82817144 VCAN NM_004385 uc003kii.3 0.519506894 uc003kii.3#exon.6 0 3019 1006 I1007T DEVLGEPSQDILVTDQTRLEATISPET DEVLGEPSQDILVIDQTRLEATISPET GATGAAGTTCTAGGTGAACCCTCTCAAGACATACTTGTCACTGATCAGACTCGCCTTGAAGCGACTATTTCTCCAGAAACT 11 HLA-C*05:01 0.3 VTDQTRLEATI VIDQTRLEATI 0.4 15 HLA-DRB1*03:01 0.23 QDILVTDQTRLEATI QDILVIDQTRLEATI 0.1 1 NA NA NA 0 NA uc010jau.2 1 1 NA 129 38 7 0.294573643 0 0.857142857 0.857142857 NA diff --git a/neofox/tests/resources/test_data_with_dot_in_column_name.txt b/neofox/tests/resources/test_data_with_dot_in_column_name.txt index 3aa04090..d5216df6 100755 --- a/neofox/tests/resources/test_data_with_dot_in_column_name.txt +++ b/neofox/tests/resources/test_data_with_dot_in_column_name.txt @@ -1,12 +1,6 @@ -patient key mutation gene RefSeq_transcript UCSC_transcript transcript_expression exon exon_expression transcript_position codon substitution +-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) [WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) mRNA_for_+13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) MHC_I_peptide_length_(best_prediction) MHC_I_allele_(best_prediction) MHC_I_score_(best_prediction) MHC_I_epitope_(best_prediction) MHC_I_epitope_(WT) MHC_I_score_(WT) MHC_II_peptide_length_(best_prediction) MHC_II_allele_(best_prediction) MHC_II_score_(best_prediction) MHC_II_epitope_(best_prediction) MHC_II_epitope_(WT) MHC_II_score_(WT) mutations_in_transcript distance_to_next_mutation(AA_residues) next_mutation(potential_to_change_27mer) next_mutation_source peptide_count_for_this_mutation_in_this_transcript phase_of_next_mutation other_transcripts_with_this_peptide peptide_resulting_from_this_mutation distinct_peptides_resulting_from_this_mutation keys_of_distinct_peptides_resulting_from_this_mutation coverage_tumor coverage_normal coverage_RNA VAF_in_tumor VAF_in_normal VAF_in_RNA VAF_RNA_raw VAF_RNA_limits my.annotation.with.dots -Ptx 56 chr2_176983779 HOXD10 NM_002148 uc002ukj.3 0.250592319 uc002ukj.3#exon.1 0 843 281 NA RKKRCPYTKHQTLELERVLVQYVPHPRAPPRDQ RKKRCPYTKHQTLELEKEFLFNMYLTRERRLEISKSVNLTDRQVKIWFQNRRMKLKKMSRENRIRELTANLTFS AGAAAGAAGAGGTGCCCTTACACTAAGCACCAAACGCTGGAATTAGAAAGAGTTCTTGTTCAATATGTACCTCACCCGCGAGCGCCGCCTAGAGATCAG 10 HLA-B*18:01 0.25 LELERVLVQY LELEKEFLFN 9.15 15 HLA-DRB1*03:01 2.46 LELERVLVQYVPHPR LELEKEFLFNMYLTR 13.53 1 NA NA NA 0 NA NA 1 1 NA 345 101 2 0.160818713 0 0 0 NA -Ptx 27 chr5_82817144 VCAN NM_004385 uc003kii.3 0.519506894 uc003kii.3#exon.6 0 3019 1006 I1007T DEVLGEPSQDILVTDQTRLEATISPET DEVLGEPSQDILVIDQTRLEATISPET GATGAAGTTCTAGGTGAACCCTCTCAAGACATACTTGTCACTGATCAGACTCGCCTTGAAGCGACTATTTCTCCAGAAACT 11 HLA-C*05:01 0.3 VTDQTRLEATI VIDQTRLEATI 0.4 15 HLA-DRB1*03:01 0.23 QDILVTDQTRLEATI QDILVIDQTRLEATI 0.1 1 NA NA NA 0 NA uc010jau.2 1 1 NA 129 38 7 0.294573643 0 0.857142857 0.857142857 NA -Ptx 24 chr1_154996999 DCST2 NM_144622 uc001fgm.3 0.28245175 uc001fgm.3#exon.10 0 1689 563 R564W RTNLLAALHRSVRWRAADQGHRSAFLV RTNLLAALHRSVRRRAADQGHRSAFLV CGAACCAATCTGTTGGCTGCCCTGCACCGATCAGTGAGGTGGCGGGCGGCTGACCAGGGCCACAGAAGTGCCTTCCTAGTG 9 HLA-B*44:02 2.15 AALHRSVRW AALHRSVRR 51 15 HLA-DRB1*03:01 4.01 TNLLAALHRSVRWRA TNLLAALHRSVRRRA 5.24 1 NA NA NA 0 NA NA 1 1 NA 340 47 0 0.108823529 0 -1 -1 NA -Ptx 148 chr9_133577547 EXOSC2 NM_014285 uc004bzu.2 7.942770402 uc004bzu.2#exon.6 0 522 174 NA SLKYGKLGQGVLVQVSPPW SLKYGKLGQGVLVQVSPSLVKRQKTHFHDLPCGASVILGNNGFIWIYPTPEHKEEEAGGFIANLEPVSLADREVISRLRNCIISLVTQRMMLYDTSILYCYEASLPHQIKDILKPEIMEEIVMETRQRLLEQEG AGCCTGAAATATGGAAAACTAGGTCAGGGGGTTTTGGTCCAGGTTTCCCCTCCCTGG 9 HLA-B*44:02 4.15 VLVQVSPPW VLVQVSPSL 28.5 15 HLA-DRB1*03:01 25.7 GKLGQGVLVQVSPPW GKLGQGVLVQVSPSL 6.66 1 NA NA NA 0 NA NA 1 2 58 162 45 78 0.378881988 0 0.064 0.064 NA -Ptx 28 chr1_115258747 NRAS NM_002524 uc009wgu.3 42.85666275 uc009wgu.3#exon.1 0 33 11 G12C MTEYKLVVVGACGVGKSALTIQLIQ MTEYKLVVVGAGGVGKSALTIQLIQ ATGACTGAGTACAAACTGGTGGTGGTTGGAGCATGTGGTGTTGGGAAAAGCGCACTGACAATCCAGCTAATCCAG 10 HLA-A*02:01 2.35 KLVVVGACGV KLVVVGAGGV 3.9 15 HLA-DRB1*01:01 12.77 MTEYKLVVVGACGVG MTEYKLVVVGAGGVG 9.95 1 NA NA NA 0 NA NA 1 1 NA 574 199 253 0.853658537 0 0.964426877 0.964426877 NA -Ptx 63 chr1_179989810 CEP350 NM_014810 uc001gnt.3 6.765113758 uc001gnt.3#exon.11 0 2901 967 Q968K QTDSSSSDMQACSKDKAKISLGSSIDS QTDSSSSDMQACSQDKAKISLGSSIDS CAGACTGACTCTTCTAGCTCTGATATGCAAGCCTGTTCTAAAGACAAAGCCAAAATATCTCTTGGTTCCAGCATAGATTCA 10 HLA-B*44:02 3.7 SDMQACSKDK SDMQACSQDK 3.05 15 HLA-DRB1*03:01 7.5 SDMQACSKDKAKISL SDMQACSQDKAKISL 18.62 2 23 chr1_179989741 SNP_replicate_1 0 NA uc001gnu.3 uc009wxl.2 1 1 NA 959 99 39 0.164754953 0 0.205128205 0.205128205 NA -Ptx 77 chr16_12798810 CPPED1 NM_018340 uc002dca.4 10.91669936 uc002dca.4#exon.2 0 384 128 D129Y DRAIPLVLVSGNHYIGNTPTAETVEEF DRAIPLVLVSGNHDIGNTPTAETVEEF GACAGGGCCATCCCACTGGTCCTTGTCAGCGGCAACCATTACATTGGCAACACCCCCACGGCCGAGACCGTCGAGGAGTTC 8 HLA-A*30:02 0.4 VLVSGNHY VLVSGNHD 16.15 15 HLA-DRB1*03:01 3.05 IPLVLVSGNHYIGNT IPLVLVSGNHDIGNT 4.37 1 NA NA NA 0 NA NA 1 1 NA 184 84 83 0.277173913 0 0.313253012 0.313253012 NA -Ptx 117 chrX_75397540 CXorf26 NM_016500 uc004ecl.1 15.38792717 uc004ecl.1#exon.5 0 499 166 G167E YNKAVYISVQDKEEEKGVNNGGEKRAD YNKAVYISVQDKEGEKGVNNGGEKRAD TATAACAAAGCTGTTTATATCAGTGTTCAGGACAAAGAAGAAGAGAAAGGAGTCAACAATGGAGGAGAAAAAAGAGCTGAC 10 HLA-C*05:01 2.6 VQDKEEEKGV VQDKEGEKGV 1.5 15 HLA-DRB1*03:01 7.64 YNKAVYISVQDKEEE YNKAVYISVQDKEGE 7.64 1 NA NA NA 0 NA NA 1 1 NA 100 45 172 0.46 0 0.534883721 0.534883721 NA -Ptx 110 chr11_133799661 IGSF9B NM_014987 uc001qgx.4 0.071129512 uc001qgx.4#exon.11 0 1534 511 A512V ASTHLTVIGTSPHVPGSVRVQVSMTTA ASTHLTVIGTSPHAPGSVRVQVSMTTA GCCAGCACCCACCTCACCGTCATCGGCACCAGCCCCCATGTCCCGGGCAGTGTCCGGGTCCAGGTCTCCATGACAACTGCC 9 HLA-A*02:01 4.1 TVIGTSPHV TVIGTSPHA 12 15 HLA-DRB1*03:01 7.62 PHVPGSVRVQVSMTT PHAPGSVRVQVSMTT 15.36 1 NA NA NA 0 NA uc001qgy.1 1 1 NA 90 45 0 0.422222222 0.022222222 -1 -1 NA -Ptx 26 chr14_31792892 HEATR5A NM_015473 uc001wrf.4 4.055704514 uc001wrf.4#exon.23 0 3664 1221 R1222Q TRRDEKSHPFTNPQWATRVFAAECVCR TRRDEKSHPFTNPRWATRVFAAECVCR ACCAGACGTGATGAAAAATCCCATCCTTTTACCAATCCCCAATGGGCTACTAGAGTCTTTGCTGCTGAATGTGTCTGTAGG 11 HLA-C*05:01 0.5 FTNPQWATRVF FTNPRWATRVF 0.6 15 HLA-DRB1*03:01 9.61 PQWATRVFAAECVCR PRWATRVFAAECVCR 9.61 1 NA NA NA 0 NA uc010ami.3 uc001wrg.1 1 1 NA 214 114 19 0.799065421 0 0.947368421 0.947368421 NA -Ptx 77 chr11_74429841 CHRDL2 NM_015424 uc001ovh.3 0.170452164 uc001ovh.3#exon.1 0 117 39 Y40H ARPDMFCLFHGKRHFPGESWHPYLEPQ ARPDMFCLFHGKRYSPGESWHPYLEPQ GCCCGCCCAGACATGTTCTGCCTTTTCCATGGGAAGAGACACTTCCCCGGCGAGAGCTGGCACCCCTACTTGGAGCCACAA 11 HLA-C*07:02 0.5 HFPGESWHPYL YSPGESWHPYL 6.4 15 HLA-DRB1*11:04 9.28 ARPDMFCLFHGKRHF ARPDMFCLFHGKRYS 9.28 2 1 chr11_74429837 MyMut 0 in_phase:merged_two_codons uc001ovi.3 uc001ovk.1 1 1 69 54 4 0.231884058 0.018518519 0 0 +gene wildTypeXmer mutatedXmer patientIdentifier my.annotation.with.dots external_annotation_2 +BRCA2 AAAAAALAAAAA AAAAAAFAAAAA Pt29 blah1 blah2 +BRCA2 AAAAAAMAAAAA AAAAAARAAAAA Pt29 blah1 blah2 +BRCA2 AAAAAAGAAAAA AAAAAAKAAAAA Pt29 blah1 blah2 +BRCA2 AAAAAACAAAAA AAAAAAEAAAAA Pt29 blah1 blah2 +BRCA2 AAAAAAKAAAAA AAAAAACAAAAA Pt29 blah1 blah2 diff --git a/neofox/tests/resources/test_data_with_mutation_in_column_name.txt b/neofox/tests/resources/test_data_with_mutation_in_column_name.txt index 5839dbb7..20838fef 100755 --- a/neofox/tests/resources/test_data_with_mutation_in_column_name.txt +++ b/neofox/tests/resources/test_data_with_mutation_in_column_name.txt @@ -1,12 +1,6 @@ -patient key mutation gene RefSeq_transcript UCSC_transcript transcript_expression exon exon_expression transcript_position codon substitution +-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) [WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) mRNA_for_+13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) MHC_I_peptide_length_(best_prediction) MHC_I_allele_(best_prediction) MHC_I_score_(best_prediction) MHC_I_epitope_(best_prediction) MHC_I_epitope_(WT) MHC_I_score_(WT) MHC_II_peptide_length_(best_prediction) MHC_II_allele_(best_prediction) MHC_II_score_(best_prediction) MHC_II_epitope_(best_prediction) MHC_II_epitope_(WT) MHC_II_score_(WT) mutations_in_transcript distance_to_next_mutation(AA_residues) next_mutation(potential_to_change_27mer) next_mutation_source peptide_count_for_this_mutation_in_this_transcript phase_of_next_mutation other_transcripts_with_this_peptide peptide_resulting_from_this_mutation distinct_peptides_resulting_from_this_mutation keys_of_distinct_peptides_resulting_from_this_mutation coverage_tumor coverage_normal coverage_RNA VAF_in_tumor VAF_in_normal VAF_in_RNA VAF_RNA_raw VAF_RNA_limits mutation -Ptx 56 chr2_176983779 HOXD10 NM_002148 uc002ukj.3 0.250592319 uc002ukj.3#exon.1 0 843 281 NA RKKRCPYTKHQTLELERVLVQYVPHPRAPPRDQ RKKRCPYTKHQTLELEKEFLFNMYLTRERRLEISKSVNLTDRQVKIWFQNRRMKLKKMSRENRIRELTANLTFS AGAAAGAAGAGGTGCCCTTACACTAAGCACCAAACGCTGGAATTAGAAAGAGTTCTTGTTCAATATGTACCTCACCCGCGAGCGCCGCCTAGAGATCAG 10 HLA-B*18:01 0.25 LELERVLVQY LELEKEFLFN 9.15 15 HLA-DRB1*03:01 2.46 LELERVLVQYVPHPR LELEKEFLFNMYLTR 13.53 1 NA NA NA 0 NA NA 1 1 NA 345 101 2 0.160818713 0 0 0 NA -Ptx 27 chr5_82817144 VCAN NM_004385 uc003kii.3 0.519506894 uc003kii.3#exon.6 0 3019 1006 I1007T DEVLGEPSQDILVTDQTRLEATISPET DEVLGEPSQDILVIDQTRLEATISPET GATGAAGTTCTAGGTGAACCCTCTCAAGACATACTTGTCACTGATCAGACTCGCCTTGAAGCGACTATTTCTCCAGAAACT 11 HLA-C*05:01 0.3 VTDQTRLEATI VIDQTRLEATI 0.4 15 HLA-DRB1*03:01 0.23 QDILVTDQTRLEATI QDILVIDQTRLEATI 0.1 1 NA NA NA 0 NA uc010jau.2 1 1 NA 129 38 7 0.294573643 0 0.857142857 0.857142857 NA -Ptx 24 chr1_154996999 DCST2 NM_144622 uc001fgm.3 0.28245175 uc001fgm.3#exon.10 0 1689 563 R564W RTNLLAALHRSVRWRAADQGHRSAFLV RTNLLAALHRSVRRRAADQGHRSAFLV CGAACCAATCTGTTGGCTGCCCTGCACCGATCAGTGAGGTGGCGGGCGGCTGACCAGGGCCACAGAAGTGCCTTCCTAGTG 9 HLA-B*44:02 2.15 AALHRSVRW AALHRSVRR 51 15 HLA-DRB1*03:01 4.01 TNLLAALHRSVRWRA TNLLAALHRSVRRRA 5.24 1 NA NA NA 0 NA NA 1 1 NA 340 47 0 0.108823529 0 -1 -1 NA -Ptx 148 chr9_133577547 EXOSC2 NM_014285 uc004bzu.2 7.942770402 uc004bzu.2#exon.6 0 522 174 NA SLKYGKLGQGVLVQVSPPW SLKYGKLGQGVLVQVSPSLVKRQKTHFHDLPCGASVILGNNGFIWIYPTPEHKEEEAGGFIANLEPVSLADREVISRLRNCIISLVTQRMMLYDTSILYCYEASLPHQIKDILKPEIMEEIVMETRQRLLEQEG AGCCTGAAATATGGAAAACTAGGTCAGGGGGTTTTGGTCCAGGTTTCCCCTCCCTGG 9 HLA-B*44:02 4.15 VLVQVSPPW VLVQVSPSL 28.5 15 HLA-DRB1*03:01 25.7 GKLGQGVLVQVSPPW GKLGQGVLVQVSPSL 6.66 1 NA NA NA 0 NA NA 1 2 58 162 45 78 0.378881988 0 0.064 0.064 NA -Ptx 28 chr1_115258747 NRAS NM_002524 uc009wgu.3 42.85666275 uc009wgu.3#exon.1 0 33 11 G12C MTEYKLVVVGACGVGKSALTIQLIQ MTEYKLVVVGAGGVGKSALTIQLIQ ATGACTGAGTACAAACTGGTGGTGGTTGGAGCATGTGGTGTTGGGAAAAGCGCACTGACAATCCAGCTAATCCAG 10 HLA-A*02:01 2.35 KLVVVGACGV KLVVVGAGGV 3.9 15 HLA-DRB1*01:01 12.77 MTEYKLVVVGACGVG MTEYKLVVVGAGGVG 9.95 1 NA NA NA 0 NA NA 1 1 NA 574 199 253 0.853658537 0 0.964426877 0.964426877 NA -Ptx 63 chr1_179989810 CEP350 NM_014810 uc001gnt.3 6.765113758 uc001gnt.3#exon.11 0 2901 967 Q968K QTDSSSSDMQACSKDKAKISLGSSIDS QTDSSSSDMQACSQDKAKISLGSSIDS CAGACTGACTCTTCTAGCTCTGATATGCAAGCCTGTTCTAAAGACAAAGCCAAAATATCTCTTGGTTCCAGCATAGATTCA 10 HLA-B*44:02 3.7 SDMQACSKDK SDMQACSQDK 3.05 15 HLA-DRB1*03:01 7.5 SDMQACSKDKAKISL SDMQACSQDKAKISL 18.62 2 23 chr1_179989741 SNP_replicate_1 0 NA uc001gnu.3 uc009wxl.2 1 1 NA 959 99 39 0.164754953 0 0.205128205 0.205128205 NA -Ptx 77 chr16_12798810 CPPED1 NM_018340 uc002dca.4 10.91669936 uc002dca.4#exon.2 0 384 128 D129Y DRAIPLVLVSGNHYIGNTPTAETVEEF DRAIPLVLVSGNHDIGNTPTAETVEEF GACAGGGCCATCCCACTGGTCCTTGTCAGCGGCAACCATTACATTGGCAACACCCCCACGGCCGAGACCGTCGAGGAGTTC 8 HLA-A*30:02 0.4 VLVSGNHY VLVSGNHD 16.15 15 HLA-DRB1*03:01 3.05 IPLVLVSGNHYIGNT IPLVLVSGNHDIGNT 4.37 1 NA NA NA 0 NA NA 1 1 NA 184 84 83 0.277173913 0 0.313253012 0.313253012 NA -Ptx 117 chrX_75397540 CXorf26 NM_016500 uc004ecl.1 15.38792717 uc004ecl.1#exon.5 0 499 166 G167E YNKAVYISVQDKEEEKGVNNGGEKRAD YNKAVYISVQDKEGEKGVNNGGEKRAD TATAACAAAGCTGTTTATATCAGTGTTCAGGACAAAGAAGAAGAGAAAGGAGTCAACAATGGAGGAGAAAAAAGAGCTGAC 10 HLA-C*05:01 2.6 VQDKEEEKGV VQDKEGEKGV 1.5 15 HLA-DRB1*03:01 7.64 YNKAVYISVQDKEEE YNKAVYISVQDKEGE 7.64 1 NA NA NA 0 NA NA 1 1 NA 100 45 172 0.46 0 0.534883721 0.534883721 NA -Ptx 110 chr11_133799661 IGSF9B NM_014987 uc001qgx.4 0.071129512 uc001qgx.4#exon.11 0 1534 511 A512V ASTHLTVIGTSPHVPGSVRVQVSMTTA ASTHLTVIGTSPHAPGSVRVQVSMTTA GCCAGCACCCACCTCACCGTCATCGGCACCAGCCCCCATGTCCCGGGCAGTGTCCGGGTCCAGGTCTCCATGACAACTGCC 9 HLA-A*02:01 4.1 TVIGTSPHV TVIGTSPHA 12 15 HLA-DRB1*03:01 7.62 PHVPGSVRVQVSMTT PHAPGSVRVQVSMTT 15.36 1 NA NA NA 0 NA uc001qgy.1 1 1 NA 90 45 0 0.422222222 0.022222222 -1 -1 NA -Ptx 26 chr14_31792892 HEATR5A NM_015473 uc001wrf.4 4.055704514 uc001wrf.4#exon.23 0 3664 1221 R1222Q TRRDEKSHPFTNPQWATRVFAAECVCR TRRDEKSHPFTNPRWATRVFAAECVCR ACCAGACGTGATGAAAAATCCCATCCTTTTACCAATCCCCAATGGGCTACTAGAGTCTTTGCTGCTGAATGTGTCTGTAGG 11 HLA-C*05:01 0.5 FTNPQWATRVF FTNPRWATRVF 0.6 15 HLA-DRB1*03:01 9.61 PQWATRVFAAECVCR PRWATRVFAAECVCR 9.61 1 NA NA NA 0 NA uc010ami.3 uc001wrg.1 1 1 NA 214 114 19 0.799065421 0 0.947368421 0.947368421 NA -Ptx 77 chr11_74429841 CHRDL2 NM_015424 uc001ovh.3 0.170452164 uc001ovh.3#exon.1 0 117 39 Y40H ARPDMFCLFHGKRHFPGESWHPYLEPQ ARPDMFCLFHGKRYSPGESWHPYLEPQ GCCCGCCCAGACATGTTCTGCCTTTTCCATGGGAAGAGACACTTCCCCGGCGAGAGCTGGCACCCCTACTTGGAGCCACAA 11 HLA-C*07:02 0.5 HFPGESWHPYL YSPGESWHPYL 6.4 15 HLA-DRB1*11:04 9.28 ARPDMFCLFHGKRHF ARPDMFCLFHGKRYS 9.28 2 1 chr11_74429837 MyMut 0 in_phase:merged_two_codons uc001ovi.3 uc001ovk.1 1 1 69 54 4 0.231884058 0.018518519 0 0 +gene wildTypeXmer mutatedXmer patientIdentifier mutation external_annotation_2 +BRCA2 AAAAAALAAAAA AAAAAAFAAAAA Pt29 blah1 blah2 +BRCA2 AAAAAAMAAAAA AAAAAARAAAAA Pt29 blah1 blah2 +BRCA2 AAAAAAGAAAAA AAAAAAKAAAAA Pt29 blah1 blah2 +BRCA2 AAAAAACAAAAA AAAAAAEAAAAA Pt29 blah1 blah2 +BRCA2 AAAAAAKAAAAA AAAAAACAAAAA Pt29 blah1 blah2 diff --git a/neofox/tests/resources/test_model_file.txt b/neofox/tests/resources/test_model_file.txt deleted file mode 100755 index 974d5409..00000000 --- a/neofox/tests/resources/test_model_file.txt +++ /dev/null @@ -1,6 +0,0 @@ -gene mutation.wildTypeXmer mutation.mutatedXmer patientIdentifier dnaVariantAlleleFrequency rnaExpression rnaVariantAlleleFrequency external_annotation_1 external_annotation_2 -BRCA2 AAAAAALAAAAA AAAAAAFAAAAA Ptx 0.5 NA NA blah blah -BRCA2 AAAAAAMAAAAA AAAAAARAAAAA Ptx NA NA NA blah blah -BRCA2 AAAAAAGAAAAA AAAAAAKAAAAA Ptx NA NA NA blah blah -BRCA2 AAAAAACAAAAA AAAAAAEAAAAA Ptx NA NA NA blah blah -BRCA2 AAAAAAKAAAAA AAAAAACAAAAA Ptx NA NA NA blah blah diff --git a/neofox/tests/resources/test_patient_file.txt b/neofox/tests/resources/test_patient_file.txt index 3da7b783..65f99a7b 100755 --- a/neofox/tests/resources/test_patient_file.txt +++ b/neofox/tests/resources/test_patient_file.txt @@ -1,3 +1,5 @@ -identifier mhcIAlleles mhcIIAlleles tumorType -Ptx HLA-A*03:01,HLA-A*29:02,HLA-B*07:02,HLA-B*44:03,HLA-C*07:02,HLA-C*16:01 HLA-DRB1*04:02,HLA-DRB1*08:01,HLA-DQA1*03:01,HLA-DQA1*04:01,HLA-DQB1*03:02,HLA-DQB1*04:02,HLA-DPA1*01:03,HLA-DPA1*02:01,HLA-DPB1*13:01,HLA-DPB1*04:01 HNSC -Pty HLA-A*03:01,HLA-A*29:02,HLA-B*07:02,HLA-B*44:03,HLA-C*07:02,HLA-C*16:01 HLA-DRB1*04:02,HLA-DRB1*08:01,HLA-DQA1*03:01,HLA-DQA1*04:01,HLA-DQB1*03:02,HLA-DQB1*04:02,HLA-DPA1*01:03,HLA-DPA1*02:01,HLA-DPB1*13:01,HLA-DPB1*04:01 HNSC +identifier mhcIAlleles mhcIIAlleles estimatedTumorContent isRnaAvailable tumorType +Ptx HLA-A*03:01,HLA-A*29:02,HLA-B*07:02,HLA-B*44:03,HLA-C*07:02,HLA-C*16:01 HLA-DRB1*04:02,HLA-DRB1*08:01,HLA-DQA1*03:01,HLA-DQA1*04:01,HLA-DQB1*03:02,HLA-DQB1*04:02,HLA-DPA1*01:03,HLA-DPA1*02:01,HLA-DPB1*13:01,HLA-DPB1*04:01 0 False HNSC +Pty HLA-A*03:01,HLA-A*29:02,HLA-B*07:02,HLA-B*44:03,HLA-C*07:02,HLA-C*16:01 HLA-DRB1*04:02,HLA-DRB1*08:01,HLA-DQA1*03:01,HLA-DQA1*04:01,HLA-DQB1*03:02,HLA-DQB1*04:02,HLA-DPA1*01:03,HLA-DPA1*02:01,HLA-DPB1*13:01,HLA-DPB1*04:01 0 False HNSC +Pt29 HLA-A*03:01,HLA-A*02:01,HLA-B*07:02,HLA-B*18:01,HLA-C*07:02,HLA-C*12:03 HLA-DRB1*11:04,HLA-DRB1*15:01,HLA-DQA1*01:02,HLA-DQA1*05:05,HLA-DQB1*06:02,HLA-DQB1*03:01,HLA-DPA1*01:03,HLA-DPA1*01:03,HLA-DPB1*04:02,HLA-DPB1*04:01 0.69 True SKCM +patient_without_tumor_type HLA-A*03:01,HLA-A*02:01,HLA-B*07:02,HLA-B*18:01,HLA-C*07:02,HLA-C*12:03 HLA-DRB1*11:04,HLA-DRB1*15:01,HLA-DQA1*01:02,HLA-DQA1*05:05,HLA-DQB1*06:02,HLA-DQB1*03:01,HLA-DPA1*01:03,HLA-DPA1*01:03,HLA-DPB1*04:02,HLA-DPB1*04:01 0.69 True no diff --git a/neofox/tests/synthetic_data/data_generator.py b/neofox/tests/synthetic_data/data_generator.py index 72434d92..1adb677a 100644 --- a/neofox/tests/synthetic_data/data_generator.py +++ b/neofox/tests/synthetic_data/data_generator.py @@ -1,7 +1,7 @@ import os from typing import List, Tuple from faker import Faker -from neofox.MHC_predictors.MixMHCpred.mixmhc2pred import MixMhc2Pred +from neofox.MHC_predictors.MixMHCpred.mixmhc2pred import MixMHC2pred from neofox.MHC_predictors.MixMHCpred.mixmhcpred import MixMHCpred from neofox.model.mhc_parser import MhcParser from neofox.model.neoantigen import Patient, Neoantigen @@ -23,7 +23,7 @@ def __init__(self, reference_folder: ReferenceFolder, configuration: Dependencie mhc1_alleles = mixmhcpred_alleles.union(netmhcpan_alleles) mixmhc2pred_alleles = set(self.load_mhc2_alleles( - MixMhc2Pred(runner=None, configuration=configuration, mhc_parser=None).available_alleles)) + MixMHC2pred(runner=None, configuration=configuration, mhc_parser=None).available_alleles)) netmhc2pan_alleles = set(self.load_mhc2_alleles( reference_folder.get_available_alleles().get_available_mhc_ii())) mhc2_isoforms = mixmhc2pred_alleles.union(netmhc2pan_alleles) diff --git a/neofox/tests/synthetic_data/factories.py b/neofox/tests/synthetic_data/factories.py index b606f087..233cf00e 100644 --- a/neofox/tests/synthetic_data/factories.py +++ b/neofox/tests/synthetic_data/factories.py @@ -9,9 +9,9 @@ from neofox.helpers.epitope_helper import EpitopeHelper from neofox.model.validation import ModelValidator from neofox.model.mhc_parser import MhcParser -from neofox.model.neoantigen import Patient, Mhc1Name, Neoantigen, Mutation, Mhc2Name, Mhc2Isoform, \ +from neofox.model.neoantigen import Patient, Mhc1Name, Neoantigen, Mhc2Name, Mhc2Isoform, \ MhcAllele -from neofox.model.factories import get_mhc2_isoform_name, MhcFactory +from neofox.model.factories import get_mhc2_isoform_name, MhcFactory, NeoantigenFactory from neofox.references.references import HlaDatabase @@ -133,14 +133,21 @@ def neoantigen(self, patient_identifier=None, wildtype=True) -> Neoantigen: found = False while not found: try: + wildtype_xmer = self._get_wild_type_xmer() + mutation_position = int(self.length_xmer / 2) + mutated_xmer = wildtype_xmer[0:mutation_position] + self._mutate_aminoacid( + wildtype_xmer[mutation_position]) + \ + wildtype_xmer[mutation_position + 1:] neoantigen = Neoantigen( patient_identifier=self.generator.unique.uuid4() if patient_identifier is None else patient_identifier, gene="BRCA2" if wildtype else None, # no gene if no wildtype provided - mutation=self.mutation(wildtype=wildtype), + wild_type_xmer=wildtype_xmer if wildtype else None, + mutated_xmer=mutated_xmer, rna_expression=float(self.random_number(digits=4, fix_len=True))/100, dna_variant_allele_frequency=float(self.random_number(digits=3, fix_len=True))/1000, rna_variant_allele_frequency=float(self.random_number(digits=3, fix_len=True))/1000 ) + neoantigen.position = NeoantigenFactory.mut_position_xmer_seq(neoantigen) ModelValidator.validate_neoantigen(neoantigen) except NeofoxDataValidationException: continue @@ -148,19 +155,6 @@ def neoantigen(self, patient_identifier=None, wildtype=True) -> Neoantigen: return neoantigen - def mutation(self, wildtype) -> Mutation: - wildtype_xmer = self._get_wild_type_xmer() - mutation_position = int(self.length_xmer / 2) - mutated_xmer = wildtype_xmer[0:mutation_position] + self._mutate_aminoacid(wildtype_xmer[mutation_position]) + \ - wildtype_xmer[mutation_position + 1:] - if wildtype: - mutation = Mutation(mutated_xmer=mutated_xmer, wild_type_xmer=wildtype_xmer) - else: - mutation = Mutation(mutated_xmer=mutated_xmer) - mutation.position = EpitopeHelper.mut_position_xmer_seq(mutation) - - return mutation - def _get_wild_type_xmer(self): random_protein = self.random_elements(self.protein_list, length=1)[0] random_index = self.random_int(0, len(random_protein) - self.length_xmer) diff --git a/neofox/tests/tools.py b/neofox/tests/tools.py index 74db6aad..301069dc 100755 --- a/neofox/tests/tools.py +++ b/neofox/tests/tools.py @@ -22,7 +22,7 @@ from Bio.Data import IUPACData from mock import Mock from neofox.model.factories import NeoantigenFactory -from neofox.model.neoantigen import Mutation, Neoantigen +from neofox.model.neoantigen import Neoantigen def mock_file_existence(existing_files=[], non_existing_files=[]): @@ -37,6 +37,7 @@ def side_effect(filename): return original_os_path_exists(filename) os.path.exists = Mock(side_effect=side_effect) + os.path.isfile = Mock(side_effect=side_effect) def mock_file_is_executable(executable_files=[], non_executable_files=[]): @@ -73,9 +74,9 @@ def get_random_neoantigen() -> Neoantigen: return neoantigen -def get_mutation(mutated_xmer, wild_type_xmer) -> Mutation: +def get_neoantigen(mutated_xmer, wild_type_xmer) -> Neoantigen: return NeoantigenFactory.build_neoantigen( patient_identifier="123", mutated_xmer=mutated_xmer, - wild_type_xmer=wild_type_xmer).mutation + wild_type_xmer=wild_type_xmer) diff --git a/neofox/tests/unit_tests/test_api.py b/neofox/tests/unit_tests/test_api.py index eca7ff0c..3a899f0a 100644 --- a/neofox/tests/unit_tests/test_api.py +++ b/neofox/tests/unit_tests/test_api.py @@ -1,11 +1,8 @@ from unittest import TestCase from neofox.model.factories import NeoantigenFactory, PatientFactory -from neofox.model.mhc_parser import MhcParser from neofox.model.neoantigen import Neoantigen, Patient -from neofox.neofox import NeoFox from neofox.tests.fake_classes import FakeHlaDatabase -from neofox.tests.integration_tests import integration_test_tools class TestApi(TestCase): @@ -62,3 +59,35 @@ def test_no_mhc(self): mhc2_alleles=["HLA-DRB1*01:01"], mhc_database=self.hla_database) self.assertIsInstance(patient, Patient) + + def test_normalisation_and_position(self): + neoantigen = NeoantigenFactory.build_neoantigen( + mutated_xmer="aaaaaaaaaaaaaaa", + wild_type_xmer="AAAAAAGAAAAAAAA", + patient_identifier="123") + self.assertIsInstance(neoantigen, Neoantigen) + self.assertEqual(neoantigen.position, [7]) + + def test_multiple_positions(self): + neoantigen = NeoantigenFactory.build_neoantigen( + mutated_xmer="aaaaaaaaaaaaaaa", + wild_type_xmer="AAAAAAGAAAAgAAA", + patient_identifier="123") + self.assertIsInstance(neoantigen, Neoantigen) + self.assertEqual(neoantigen.position, [7, 12]) + + def test_insertion(self): + neoantigen = NeoantigenFactory.build_neoantigen( + mutated_xmer="aaaaaaaaaaaaaaa", + wild_type_xmer="AAAAAAGAAAAgA", + patient_identifier="123") + self.assertIsInstance(neoantigen, Neoantigen) + self.assertEqual(neoantigen.position, [7, 12]) + + def test_deletion(self): + neoantigen = NeoantigenFactory.build_neoantigen( + mutated_xmer="aaaaaaaaaaaaa", + wild_type_xmer="AAAAAAGAAAAgAAA", + patient_identifier="123") + self.assertIsInstance(neoantigen, Neoantigen) + self.assertEqual(neoantigen.position, [7, 12]) diff --git a/neofox/tests/unit_tests/test_dependencies_configuration.py b/neofox/tests/unit_tests/test_dependencies_configuration.py index 7d30d0df..3f784d72 100755 --- a/neofox/tests/unit_tests/test_dependencies_configuration.py +++ b/neofox/tests/unit_tests/test_dependencies_configuration.py @@ -54,10 +54,14 @@ def _load_env_variables(self): def test_not_provided_variable(self): self._load_env_variables() + # removes the path to avoid binaries to be loaded from the path + backup_path = os.environ['PATH'] + del os.environ['PATH'] for v in self.variables.keys(): del os.environ[v] with self.assertRaises(NeofoxConfigurationException): DependenciesConfiguration() + os.environ['PATH'] = backup_path def test_empty_string_variable(self): self._load_env_variables() diff --git a/neofox/tests/unit_tests/test_differential_binding.py b/neofox/tests/unit_tests/test_differential_binding.py index b5dfc735..b5e2a461 100755 --- a/neofox/tests/unit_tests/test_differential_binding.py +++ b/neofox/tests/unit_tests/test_differential_binding.py @@ -25,19 +25,19 @@ class TestDifferentialBinding(TestCase): def setUp(self): - self.diffbdg_calculator = DifferentialBinding(affinity_threshold=500) + self.diffbdg_calculator = DifferentialBinding() def test_dai(self): result = self.diffbdg_calculator.dai( - score_mutation=50, score_wild_type=500, affin_filtering=False + score_mutation=50, score_wild_type=500 ) self.assertEqual(result, 450) result = self.diffbdg_calculator.dai( - score_mutation=550, score_wild_type=2000, affin_filtering=True + score_mutation=550, score_wild_type=2000 ) - self.assertEqual(result, None) + self.assertEqual(result, 1450) result = self.diffbdg_calculator.dai( - score_mutation=50, score_wild_type=10, affin_filtering=True + score_mutation=50, score_wild_type=10 ) self.assertLess(result, 0.0) @@ -89,16 +89,16 @@ def test_classify_adn_cdn(self): self.assertEqual(result, None) def test_affinity_threshold(self): - diffbdg_calculator = DifferentialBinding(affinity_threshold=1000) + diffbdg_calculator = DifferentialBinding() result = diffbdg_calculator.dai( - score_mutation=50, score_wild_type=530, affin_filtering=False + score_mutation=50, score_wild_type=530 ) self.assertIsNotNone(result) result = diffbdg_calculator.dai( - score_mutation=550, score_wild_type=530, affin_filtering=True + score_mutation=550, score_wild_type=530 ) self.assertIsNotNone(result) result = diffbdg_calculator.dai( - score_mutation=1030, score_wild_type=1030, affin_filtering=True + score_mutation=1030, score_wild_type=1030 ) - self.assertIsNone(result) + self.assertIsNotNone(result) diff --git a/neofox/tests/unit_tests/test_epitope_helper.py b/neofox/tests/unit_tests/test_epitope_helper.py index cbad2bc2..e980a6f8 100755 --- a/neofox/tests/unit_tests/test_epitope_helper.py +++ b/neofox/tests/unit_tests/test_epitope_helper.py @@ -19,6 +19,7 @@ from unittest import TestCase from neofox.helpers.epitope_helper import EpitopeHelper +from neofox.model.neoantigen import PredictedEpitope, Annotation class EpitopeHelperTest(TestCase): @@ -41,16 +42,23 @@ def test_number_of_mismatches(self): def test_position_mutation(self): position = EpitopeHelper().position_of_mutation_epitope( - wild_type="AAAAAA", mutation="AAANAA" - ) + PredictedEpitope(wild_type_peptide="AAAAAA", mutated_peptide="AAANAA")) self.assertEqual(position, 4) position = EpitopeHelper().position_of_mutation_epitope( - wild_type="AAAAAA", mutation="AAAAAA" - ) + PredictedEpitope(wild_type_peptide="AAAAAA", mutated_peptide="AAAAAA")) self.assertEqual(position, -1) position = EpitopeHelper().position_of_mutation_epitope( - wild_type="AAAAAA", mutation="AANNNN" - ) + PredictedEpitope(wild_type_peptide="AAAAAA", mutated_peptide="AANNNN")) self.assertEqual(position, 6) + def test_get_annotation_by_name(self): + annotations = [Annotation(name='this', value='5'), Annotation(name='that', value='0')] + self.assertEqual(EpitopeHelper.get_annotation_by_name(annotations=annotations, name='this'), '5') + self.assertEqual(EpitopeHelper.get_annotation_by_name(annotations=annotations, name='that'), '0') + try: + EpitopeHelper.get_annotation_by_name(annotations=annotations, name='nothing') + self.assertTrue(False) + except ValueError: + self.assertTrue(True) + # TODO: test ther methods in the EpitopeHelper diff --git a/neofox/tests/unit_tests/test_expression.py b/neofox/tests/unit_tests/test_expression.py index 5068b34d..9c20b5d4 100755 --- a/neofox/tests/unit_tests/test_expression.py +++ b/neofox/tests/unit_tests/test_expression.py @@ -20,22 +20,30 @@ from unittest import TestCase from neofox.model.factories import NOT_AVAILABLE_VALUE +from neofox.model.neoantigen import Neoantigen from neofox.published_features.expression import Expression class TestExpression(TestCase): + + def setUp(self) -> None: + self.expression = Expression() + def test_calculate_expression_mutation(self): - result = Expression(transcript_expression=12.0, vaf_rna=0.2).get_annotations()[ - 0 - ] + neoantigen = Neoantigen(rna_expression=12.0, dna_variant_allele_frequency=0.2) + result = self.expression.get_annotations(neoantigen=neoantigen)[0] self.assertGreater(result.value, "0.0") + # no reads for mut - result = Expression(transcript_expression=12.0, vaf_rna=0.0).get_annotations()[ - 0 - ] + neoantigen = Neoantigen(rna_expression=12.0, dna_variant_allele_frequency=0.0) + result = self.expression.get_annotations(neoantigen=neoantigen)[0] self.assertEqual(result.value, "0") + # no reads for mut/wt - result = Expression(transcript_expression=12.0, vaf_rna=-1).get_annotations()[0] + neoantigen = Neoantigen(rna_expression=12.0, dna_variant_allele_frequency=-1, rna_variant_allele_frequency=-1) + result = self.expression.get_annotations(neoantigen=neoantigen)[0] self.assertEqual(result.value, NOT_AVAILABLE_VALUE) - result = Expression(transcript_expression=None, vaf_rna=-1).get_annotations()[0] + + neoantigen = Neoantigen(rna_expression=None, dna_variant_allele_frequency=-1, rna_variant_allele_frequency=-1) + result = self.expression.get_annotations(neoantigen=neoantigen)[0] self.assertEqual(result.value, NOT_AVAILABLE_VALUE) diff --git a/neofox/tests/unit_tests/test_iedb_immunogenicity.py b/neofox/tests/unit_tests/test_iedb_immunogenicity.py index 6b037b36..0d7e82b5 100755 --- a/neofox/tests/unit_tests/test_iedb_immunogenicity.py +++ b/neofox/tests/unit_tests/test_iedb_immunogenicity.py @@ -25,7 +25,7 @@ class TestImmunogenicity(TestCase): def test_immunogenicity(self): - iedb_immunogenicity = IEDBimmunogenicity(affinity_threshold=500) + iedb_immunogenicity = IEDBimmunogenicity() result = iedb_immunogenicity.calculate_iedb_immunogenicity( peptide="ENPVVHFF", mhc_allele=MhcAllele(name="HLA-A*68:01"), mhc_score=400 ) @@ -35,10 +35,10 @@ def test_immunogenicity(self): mhc_allele=MhcAllele(name="HLA-A*68:01"), mhc_score=600, ) - self.assertIsNone(result) + self.assertEqual(result, 0.27579) def test_affinity_threshold(self): - iedb_immunogenicity = IEDBimmunogenicity(affinity_threshold=1000) + iedb_immunogenicity = IEDBimmunogenicity() result = iedb_immunogenicity.calculate_iedb_immunogenicity( peptide="ENPVVHFF", mhc_allele=MhcAllele(name="HLA-A*68:01"), mhc_score=600 ) @@ -51,4 +51,11 @@ def test_default_affinity_threshold(self): ) self.assertGreater(result, 0) + def test_none_mhc_allele(self): + iedb_immunogenicity = IEDBimmunogenicity() + result = iedb_immunogenicity.calculate_iedb_immunogenicity( + peptide="ENPVVHFF", mhc_allele=None, mhc_score=600 + ) + self.assertGreater(result, 0) + diff --git a/neofox/tests/unit_tests/test_model_converter.py b/neofox/tests/unit_tests/test_model_converter.py index 8a16bc61..a69e1bf4 100755 --- a/neofox/tests/unit_tests/test_model_converter.py +++ b/neofox/tests/unit_tests/test_model_converter.py @@ -25,14 +25,15 @@ from neofox.model.conversion import ModelConverter from neofox.model.neoantigen import ( Neoantigen, - Mutation, Patient, Annotation, - NeoantigenAnnotations, + Annotations, Zygosity, Mhc2Name, ) from neofox.model.factories import MhcFactory, NeoantigenFactory +from neofox.model.validation import ModelValidator +from neofox.references.references import ORGANISM_HOMO_SAPIENS from neofox.tests.fake_classes import FakeHlaDatabase, FakeH2Database from neofox.tests.tools import get_random_neoantigen @@ -73,10 +74,14 @@ def test_model2csv2model(self): neoantigen = get_random_neoantigen() csv_data = ModelConverter._objects2dataframe([neoantigen]) neoantigen2 = ModelConverter._neoantigens_csv2objects(csv_data)[0] + neoantigen.external_annotations = None + neoantigen2.external_annotations = None + neoantigen.neofox_annotations = None + neoantigen2.neofox_annotations = None self.assertEqual(neoantigen, neoantigen2) def test_neoantigen_annotations(self): - annotations = NeoantigenAnnotations() + annotations = Annotations() annotations.annotations = [ Annotation(name="string_annotation", value="blabla"), Annotation(name="integer_annotation", value=1), @@ -89,102 +94,6 @@ def test_neoantigen_annotations(self): self.assertEqual(annotations_dict.get("annotations")[1].get("value"), 1) self.assertEqual(annotations_dict.get("annotations")[2].get("value"), 1.1) - def test_candidate_neoantigens2model(self): - candidate_file = pkg_resources.resource_filename( - neofox.tests.__name__, "resources/test_data.txt" - ) - with open(candidate_file) as f: - self.count_lines = len(f.readlines()) - neoantigens = ModelConverter().parse_candidate_file(candidate_file) - self.assertIsNotNone(neoantigens) - self.assertEqual(self.count_lines -1, len(neoantigens)) - for n in neoantigens: - self.assertIsInstance(n, Neoantigen) - self.assertIsInstance(n.mutation, Mutation) - self.assertTrue(n.gene is not None and len(n.gene) > 0) - self.assertTrue( - n.mutation.mutated_xmer is not None and len(n.mutation.mutated_xmer) > 1 - ) - self.assertTrue( - n.mutation.wild_type_xmer is not None - and len(n.mutation.wild_type_xmer) > 1 - ) - self.assertTrue( - n.mutation.position is not None and len(n.mutation.position) >= 1 - ) - self.assertTrue( - n.rna_variant_allele_frequency is None - or n.rna_variant_allele_frequency == -1 - or (0 <= n.rna_variant_allele_frequency <= 1) - ) - self.assertTrue(n.rna_expression is None or n.rna_expression >= 0) - self.assertTrue(0 <= n.dna_variant_allele_frequency <= 1) - - # test external annotations - self._assert_external_annotations( - expected_external_annotations=[ - "patient", - "key", - "mutation", - "RefSeq_transcript", - "UCSC_transcript", - "transcript_expression", - "exon", - "exon_expression", - "transcript_position", - "codon", - "substitution", - "+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL)", - "[WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL)", - "mRNA_for_+13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL)", - "MHC_I_peptide_length_(best_prediction)", - "MHC_I_allele_(best_prediction)", - "MHC_I_score_(best_prediction)", - "MHC_I_epitope_(best_prediction)", - "MHC_I_epitope_(WT)", - "MHC_I_score_(WT)", - "MHC_II_peptide_length_(best_prediction)", - "MHC_II_allele_(best_prediction)", - "MHC_II_score_(best_prediction)", - "MHC_II_epitope_(best_prediction)", - "MHC_II_epitope_(WT)", - "MHC_II_score_(WT)", - "mutations_in_transcript", - "distance_to_next_mutation(AA_residues)", - "next_mutation(potential_to_change_27mer)", - "next_mutation_source", - "peptide_count_for_this_mutation_in_this_transcript", - "phase_of_next_mutation", - "other_transcripts_with_this_peptide", - "peptide_resulting_from_this_mutation", - "distinct_peptides_resulting_from_this_mutation", - "keys_of_distinct_peptides_resulting_from_this_mutation", - "coverage_tumor", - "coverage_normal", - "coverage_RNA", - "VAF_in_tumor", - "VAF_in_normal", - "VAF_in_RNA", - "VAF_RNA_raw", - "VAF_RNA_limits" - ], - non_nullable_annotations=[ - "patient", - "key", - "mutation", - "RefSeq_transcript", - "UCSC_transcript", - "transcript_expression", - "exon", - "exon_expression", - "transcript_position", - "codon", - "+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL)", - "[WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL)" - ], - neoantigen_annotations=n.external_annotations, - ) - def test_candidate_neoantigens2model_with_dot_in_column_name(self): candidate_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/test_data_with_dot_in_column_name.txt" @@ -232,13 +141,11 @@ def test_csv_neoantigens2model(self): self.assertEqual(5, len(neoantigens)) for n in neoantigens: self.assertTrue(isinstance(n, Neoantigen)) - self.assertNotEmpty(n.mutation) self.assertNotEmpty(n.patient_identifier) self.assertNotEmpty(n.gene) - self.assertTrue(isinstance(n.mutation, Mutation)) - self.assertNotEmpty(n.mutation.mutated_xmer) - self.assertNotEmpty(n.mutation.wild_type_xmer) - self.assertIsNotNone(n.mutation.position) + self.assertNotEmpty(n.mutated_xmer) + self.assertNotEmpty(n.wild_type_xmer) + self.assertIsNotNone(n.position) # test external annotations self._assert_external_annotations( @@ -260,31 +167,16 @@ def test_json_neoantigens2model(self): self.assertEqual(5, len(neoantigens)) for n in neoantigens: self.assertTrue(isinstance(n, Neoantigen)) - self.assertNotEmpty(n.mutation) self.assertNotEmpty(n.patient_identifier) self.assertNotEmpty(n.rna_expression) self.assertNotEmpty(n.rna_variant_allele_frequency) self.assertNotEmpty(n.dna_variant_allele_frequency) - self.assertTrue(isinstance(n.mutation, Mutation)) - self.assertNotEmpty(n.mutation.position) + self.assertNotEmpty(n.position) def assertNotEmpty(self, value): self.assertIsNotNone(value) self.assertNotEqual(value, "") - def test_overriding_patient_id(self): - candidate_file = pkg_resources.resource_filename( - neofox.tests.__name__, "resources/test_data.txt" - ) - with open(candidate_file) as f: - self.count_lines = len(f.readlines()) - neoantigens = ModelConverter().parse_candidate_file(candidate_file, patient_id="patientX") - for n in neoantigens: - self.assertEqual(n.patient_identifier, "patientX") - neoantigens = ModelConverter().parse_candidate_file(candidate_file) - for n in neoantigens: - self.assertEqual(n.patient_identifier, "Ptx") - def test_patients_csv_file2model(self): patients_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/alleles.Pt29.csv" @@ -303,6 +195,21 @@ def test_patients_csv_file2model(self): ) self.assertEqual(patients[0].is_rna_available, False) + def test_patients_without_mhc2(self): + patients_file = pkg_resources.resource_filename( + neofox.tests.__name__, "resources/alleles.Pt29_without_mhc2.csv" + ) + patients = ModelConverter.parse_patients_file(patients_file, self.hla_database) + self.assertIsNotNone(patients) + self.assertIsInstance(patients, list) + self.assertTrue(len(patients) == 2) + self.assertIsInstance(patients[0], Patient) + self.assertEqual(patients[0].identifier, "Pt29") + self.assertEqual(3, len(patients[0].mhc1)) + self.assertEqual(6, len([a for m in patients[0].mhc1 for a in m.alleles])) + self.assertEqual(0, len(patients[0].mhc2)) + self.assertEqual(patients[0].is_rna_available, False) + def test_patients_csv_file2model_mouse(self): patients_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/alleles.Pt29_mouse.csv" @@ -395,8 +302,10 @@ def test_annotations2short_wide_df(self): neoantigens = [ Neoantigen( - mutation=Mutation(wild_type_xmer="AAAAAAA", mutated_xmer="AAACAAA", position=[]), - neofox_annotations=NeoantigenAnnotations( + wild_type_xmer="AAAAAAA", + mutated_xmer="AAACAAA", + position=[], + neofox_annotations=Annotations( annotations=[ Annotation(name="this_name", value="this_value"), Annotation(name="that_name", value="that_value"), @@ -406,8 +315,10 @@ def test_annotations2short_wide_df(self): ) ), Neoantigen( - mutation=Mutation(wild_type_xmer="AAAGAAA", mutated_xmer="AAAZAAA", position=[1, 2, 3]), - neofox_annotations=NeoantigenAnnotations( + wild_type_xmer="AAAGAAA", + mutated_xmer="AAAZAAA", + position=[1, 2, 3], + neofox_annotations=Annotations( annotations=[ Annotation(name="this_name", value="0"), Annotation(name="that_name", value="1"), @@ -417,10 +328,10 @@ def test_annotations2short_wide_df(self): ) ), ] - df = ModelConverter.annotations2table(neoantigens=neoantigens) + df = ModelConverter.annotations2neoantigens_table(neoantigens=neoantigens) self.assertEqual(df.shape[0], 2) self.assertEqual(df.shape[1], 13) - self.assertEqual(0, df[df["mutation.position"].transform(lambda x: isinstance(x, list))].shape[0]) + self.assertEqual(0, df[df["position"].transform(lambda x: isinstance(x, list))].shape[0]) def test_parse_mhc1_heterozygous_alleles(self): mhc1s = MhcFactory.build_mhc1_alleles( @@ -896,6 +807,32 @@ def test_parse_mhc2_non_existing_allele_does_not_fail_mouse(self): ) self.assertEqual(2, len(mhc2s)) + def test_candidate_neoepitopes2model(self): + candidate_file = pkg_resources.resource_filename( + neofox.tests.__name__, "resources/test_data_neoepitopes.txt" + ) + with open(candidate_file) as f: + self.count_lines = len(f.readlines()) + neoepitopes = ModelConverter().parse_candidate_neoepitopes_file(candidate_file, self.hla_database) + self.assertIsNotNone(neoepitopes) + self.assertEqual(self.count_lines -1, len(neoepitopes)) + for n in neoepitopes: + ModelValidator.validate_neoepitope(n, ORGANISM_HOMO_SAPIENS) + + def test_candidate_neoepitopes2model_with_patients(self): + candidate_file = pkg_resources.resource_filename( + neofox.tests.__name__, "resources/test_data_neoepitopes_with_patients.txt" + ) + + with open(candidate_file) as f: + self.count_lines = len(f.readlines()) + + neoepitopes = ModelConverter().parse_candidate_neoepitopes_file(candidate_file, self.hla_database) + self.assertIsNotNone(neoepitopes) + self.assertEqual(self.count_lines -1, len(neoepitopes)) + for n in neoepitopes: + ModelValidator.validate_neoepitope(n, ORGANISM_HOMO_SAPIENS) + def _assert_isoforms(self, mhc2): for isoform in mhc2.isoforms: if mhc2.name == Mhc2Name.DR: diff --git a/neofox/tests/unit_tests/test_neofox.py b/neofox/tests/unit_tests/test_neofox.py index 36717cc5..ebf62169 100755 --- a/neofox/tests/unit_tests/test_neofox.py +++ b/neofox/tests/unit_tests/test_neofox.py @@ -23,7 +23,7 @@ import pkg_resources from neofox.model.conversion import ModelConverter -from neofox.model.neoantigen import Neoantigen, Mutation, Patient +from neofox.model.neoantigen import Neoantigen, Patient import neofox from neofox.exceptions import ( @@ -43,7 +43,6 @@ def test_missing_input_raises_exception(self): with self.assertRaises(NeofoxConfigurationException): NeoFox( neoantigens=None, - patient_id=None, patients=None, num_cpus=1, reference_folder=FakeReferenceFolder(), @@ -51,7 +50,6 @@ def test_missing_input_raises_exception(self): with self.assertRaises(NeofoxConfigurationException): NeoFox( neoantigens=[], - patient_id=None, patients=[], num_cpus=1, reference_folder=FakeReferenceFolder(), @@ -61,7 +59,6 @@ def test_not_set_reference_folder_fails(self): with self.assertRaises(NeofoxConfigurationException): NeoFox( neoantigens=[self._get_test_neoantigen()], - patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), @@ -72,7 +69,6 @@ def test_empty_reference_folder_fails(self): with self.assertRaises(NeofoxConfigurationException): NeoFox( neoantigens=[self._get_test_neoantigen()], - patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), @@ -80,11 +76,10 @@ def test_empty_reference_folder_fails(self): def test_validation_captures_bad_wild_type_xmer(self): neoantigen = self._get_test_neoantigen() - neoantigen.mutation.wild_type_xmer = "123" # should be a valid aminoacid + neoantigen.wild_type_xmer = "123" # should be a valid aminoacid with self.assertRaises(NeofoxDataValidationException): NeoFox( neoantigens=[neoantigen], - patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), @@ -93,11 +88,10 @@ def test_validation_captures_bad_wild_type_xmer(self): def test_validation_captures_bad_mutated_xmer(self): neoantigen = self._get_test_neoantigen() - neoantigen.mutation.mutated_xmer = "123" # should be a valid aminoacid + neoantigen.mutated_xmer = "123" # should be a valid aminoacid with self.assertRaises(NeofoxDataValidationException): NeoFox( neoantigens=[neoantigen], - patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), @@ -110,7 +104,6 @@ def test_validation_captures_bad_patient(self): with self.assertRaises(NeofoxDataValidationException): NeoFox( neoantigens=[self._get_test_neoantigen()], - patient_id=None, patients=[patient], num_cpus=1, reference_folder=FakeReferenceFolder(), @@ -120,7 +113,6 @@ def test_validation_captures_bad_patient(self): def test_valid_data_does_not_raise_exceptions(self): NeoFox( neoantigens=[self._get_test_neoantigen()], - patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), @@ -135,7 +127,6 @@ def test_neoantigens_referring_to_non_existing_patients(self): with self.assertRaises(NeofoxDataValidationException): NeoFox( neoantigens=[neoantigen], - patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), @@ -145,7 +136,6 @@ def test_neoantigens_referring_to_non_existing_patients(self): with self.assertRaises(NeofoxDataValidationException): NeoFox( neoantigens=[neoantigen], - patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), @@ -155,7 +145,6 @@ def test_neoantigens_referring_to_non_existing_patients(self): with self.assertRaises(NeofoxDataValidationException): NeoFox( neoantigens=[neoantigen], - patient_id=None, patients=[self._get_test_patient()], num_cpus=1, reference_folder=FakeReferenceFolder(), @@ -164,7 +153,7 @@ def test_neoantigens_referring_to_non_existing_patients(self): def test_no_expression_imputation(self): input_file = pkg_resources.resource_filename( - neofox.tests.__name__, "resources/test_candidate_file.txt" + neofox.tests.__name__, "resources/test_data_model_realistic.txt" ) patients_file = pkg_resources.resource_filename( neofox.tests.__name__, "resources/test_patient_file.txt" @@ -179,14 +168,14 @@ def test_no_expression_imputation(self): ) for neoantigen in neoantigens: for neoantigen_imputed in neofox_runner.neoantigens: - if neoantigen.mutation.mutated_xmer == neoantigen_imputed.mutation.mutated_xmer: + if neoantigen.mutated_xmer == neoantigen_imputed.mutated_xmer: self.assertEqual( neoantigen.rna_expression, neoantigen_imputed.rna_expression ) def test_with_expression_imputation(self): input_file = pkg_resources.resource_filename( - neofox.tests.__name__, "resources/test_candidate_file_Pty.txt" + neofox.tests.__name__, "resources/test_data_model_realistic_Pty.txt" ) neoantigens= ModelConverter.parse_candidate_file(input_file) import copy @@ -211,9 +200,8 @@ def test_with_expression_imputation(self): def _get_test_neoantigen(self): return Neoantigen( gene="GENE", - mutation=Mutation( - mutated_xmer="AAAAAAAIAAAAAAAA", wild_type_xmer="AAAAAAALAAAAAAAA" - ), + mutated_xmer="AAAAAAAIAAAAAAAA", + wild_type_xmer="AAAAAAALAAAAAAAA", patient_identifier="12345", rna_expression=0.12345, ) diff --git a/neofox/tests/unit_tests/test_priority_score.py b/neofox/tests/unit_tests/test_priority_score.py index eb246206..c6803443 100755 --- a/neofox/tests/unit_tests/test_priority_score.py +++ b/neofox/tests/unit_tests/test_priority_score.py @@ -26,7 +26,7 @@ def setUp(self): def test_priority(self): result = self.priority_calculator.calc_priority_score( - vaf_tumor=0.35, + vaf_dna=0.35, vaf_rna=0.33, transcript_expr=12, no_mismatch=1, @@ -36,7 +36,7 @@ def test_priority(self): ) self.assertGreater(result, 0) result = self.priority_calculator.calc_priority_score( - vaf_tumor=None, + vaf_dna=None, vaf_rna=0.33, transcript_expr=12, no_mismatch=1, @@ -46,7 +46,7 @@ def test_priority(self): ) self.assertGreater(result, 0) result = self.priority_calculator.calc_priority_score( - vaf_tumor=0.35, + vaf_dna=0.35, vaf_rna=None, transcript_expr=12, no_mismatch=1, @@ -56,7 +56,7 @@ def test_priority(self): ) self.assertGreater(result, 0) result = self.priority_calculator.calc_priority_score( - vaf_tumor=None, + vaf_dna=None, vaf_rna=-1, transcript_expr=12, no_mismatch=1, @@ -66,7 +66,7 @@ def test_priority(self): ) self.assertEqual(result, None) result = self.priority_calculator.calc_priority_score( - vaf_tumor=0.35, + vaf_dna=0.35, vaf_rna=0.33, transcript_expr=None, no_mismatch=1, @@ -76,7 +76,7 @@ def test_priority(self): ) self.assertEqual(result, None) result = self.priority_calculator.calc_priority_score( - vaf_tumor=0.35, + vaf_dna=0.35, vaf_rna=0.33, transcript_expr=None, no_mismatch=1, diff --git a/neofox/tests/unit_tests/test_runner.py b/neofox/tests/unit_tests/test_runner.py index 76156a52..d8cdb6b5 100755 --- a/neofox/tests/unit_tests/test_runner.py +++ b/neofox/tests/unit_tests/test_runner.py @@ -28,7 +28,7 @@ def setUp(self): def test_runner(self): output, errors = self.runner.run_command(cmd=["python", "-V"]) - self.assertTrue("Python 3.7" in output or "Python 3.6" in output or "Python 3.8" in output) + self.assertTrue("Python 3.7" in output or "Python 3.8" in output, "Output:" + output) self.assertTrue(len(errors) == 0) def test_runner_failure(self): diff --git a/neofox/tests/unit_tests/test_synthetic_data.py b/neofox/tests/unit_tests/test_synthetic_data.py index dafee04a..664fe29a 100644 --- a/neofox/tests/unit_tests/test_synthetic_data.py +++ b/neofox/tests/unit_tests/test_synthetic_data.py @@ -4,7 +4,7 @@ from faker import Faker import neofox -from neofox.model.neoantigen import Zygosity +from neofox.model.neoantigen import Zygosity, Neoantigen from neofox.tests.fake_classes import FakeHlaDatabase from neofox.tests.synthetic_data.factories import PatientProvider, NeoantigenProvider @@ -14,17 +14,18 @@ class TestFactories(TestCase): def setUp(self) -> None: faker = Faker() self.patient_provider = PatientProvider(faker, - ["HLA-A*01:01", "HLA-A*01:02", "HLA-A*01:03", "HLA-B*01:01", "HLA-B*01:02", "HLA-B*01:03", - "HLA-C*01:01", "HLA-C*01:02", "HLA-C*01:03"], - ["DRB10101", "DRB10102", "DRB10103", "DRB10104", "HLA-DPA10101-DPB10101", "HLA-DPA10102-DPB10102", "HLA-DPA10103-DPB10103", - "HLA-DPA10104-DPB10104" - "HLA-DQA10101-DQB10101", "HLA-DQA10102-DQB10102", "HLA-DQA10103-DQB10103", "HLA-DQA10104-DQB10104"], + ["HLA-A*74:18", "HLA-A*01:141", "HLA-A*01:12", "HLA-B*07:02", "HLA-B*07:05", "HLA-B*07:06", + "HLA-C*01:02", "HLA-C*02:10", "HLA-C*03:03"], + ["DRB1*01:01", "DRB1*01:02", "DRB1*01:03", "DRB1*01:04", "HLA-DPA1*01:01-DPB1*01:01", + "HLA-DPA1*01:02-DPB1*01:02", "HLA-DPA1*01:03-DPB1*01:03", + "HLA-DPA1*01:04-DPB1*01:04" + "HLA-DQA1*01:01-DQB1*01:01", "HLA-DQA1*01:02-DQB1*01:02", + "HLA-DQA1*01:03-DQB1*01:03", "HLA-DQA1*01:04-DQB1*01:04"], FakeHlaDatabase() ) self.neoantigen_provider = NeoantigenProvider(faker, proteome_fasta=pkg_resources.resource_filename( neofox.tests.__name__, "resources/proteome_test.fa")) - @skip def test_patient(self): patient1 = self.patient_provider.patient() patient2 = self.patient_provider.patient() @@ -45,15 +46,14 @@ def test_neoantigen(self): neoantigen = self.neoantigen_provider.neoantigen() self._assert_neoantigen(neoantigen) - def _assert_neoantigen(self, neoantigen): + def _assert_neoantigen(self, neoantigen: Neoantigen): self.assertIsNotNone(neoantigen.patient_identifier) - self.assertTrue(len(neoantigen.mutation.mutated_xmer) == 27) - self.assertTrue(len(neoantigen.mutation.wild_type_xmer) == 27) - self.assertNotEqual(neoantigen.mutation.wild_type_xmer, neoantigen.mutation.mutated_xmer) - self.assertEqual(neoantigen.mutation.wild_type_xmer[0:13], neoantigen.mutation.mutated_xmer[0:13]) - self.assertEqual(neoantigen.mutation.wild_type_xmer[14:], neoantigen.mutation.mutated_xmer[14:]) + self.assertTrue(len(neoantigen.mutated_xmer) == 27) + self.assertTrue(len(neoantigen.wild_type_xmer) == 27) + self.assertNotEqual(neoantigen.wild_type_xmer, neoantigen.mutated_xmer) + self.assertEqual(neoantigen.wild_type_xmer[0:13], neoantigen.mutated_xmer[0:13]) + self.assertEqual(neoantigen.wild_type_xmer[14:], neoantigen.mutated_xmer[14:]) - @skip def test_neoantigen_and_patient(self): patient = self.patient_provider.patient() neoantigen = self.neoantigen_provider.neoantigen(patient_identifier=patient.identifier) diff --git a/neofox/tests/unit_tests/test_tcell_predictor.py b/neofox/tests/unit_tests/test_tcell_predictor.py index 49d34a95..3ad1a908 100755 --- a/neofox/tests/unit_tests/test_tcell_predictor.py +++ b/neofox/tests/unit_tests/test_tcell_predictor.py @@ -17,6 +17,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see .# from unittest import TestCase + +from neofox.model.neoantigen import PredictedEpitope from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import ( TcellPrediction, ) @@ -26,75 +28,56 @@ class TestTCellPredictor(TestCase): def setUp(self) -> None: self.tcell_predictor = TcellPrediction() - # TODO: Franzis maybe you can add some more sensible tests here? + # TODO: Franzi maybe you can add some more sensible tests here? def test_non_existing_gene(self): - result = TcellPrediction(affinity_threshold=10)._calculate_tcell_predictor_score( + result = TcellPrediction().calculate_tcell_predictor_score( gene="BLAH", - substitution="blaaaah", - epitope="BLAHBLAH", - score=5 + epitope=PredictedEpitope(wild_type_peptide="BLAHBLAH", mutated_peptide="blaaaah", affinity_mutated=5) ) self.assertEqual(None, result) def test_empty_gene(self): - result = TcellPrediction(affinity_threshold=10)._calculate_tcell_predictor_score( + result = TcellPrediction().calculate_tcell_predictor_score( gene=None, - substitution="blaaaah", - epitope="BLAHBLAH", - score=5 + epitope=PredictedEpitope(wild_type_peptide="BLAHBLAH", mutated_peptide="blaaaah", affinity_mutated=5) ) self.assertEqual(None, result) - result = TcellPrediction(affinity_threshold=10)._calculate_tcell_predictor_score( + result = TcellPrediction().calculate_tcell_predictor_score( gene="", - substitution="blaaaah", - epitope="BLAHBLAH", - score=5 + epitope=PredictedEpitope(wild_type_peptide="BLAHBLAH", mutated_peptide="blaaaah", affinity_mutated=5) ) self.assertEqual(None, result) - result = TcellPrediction(affinity_threshold=10)._calculate_tcell_predictor_score( + result = TcellPrediction().calculate_tcell_predictor_score( gene=" ", - substitution="blaaaah", - epitope="BLAHBLAH", - score=5 + epitope=PredictedEpitope(wild_type_peptide="BLAHBLAH", mutated_peptide="blaaaah", affinity_mutated=5) ) self.assertEqual(None, result) def test_existing_gene_with_too_short_epitope(self): - result = TcellPrediction(affinity_threshold=10)._calculate_tcell_predictor_score( - gene="BRCA2", substitution="C", epitope="CCCCCC", score=5 + result = TcellPrediction().calculate_tcell_predictor_score( + gene="BRCA2", + epitope=PredictedEpitope(wild_type_peptide="CCCCCC", mutated_peptide="CCVCCC", affinity_mutated=5) ) self.assertEqual(None, result) def test_existing_gene_with_too_long_epitope(self): - result = TcellPrediction(affinity_threshold=10)._calculate_tcell_predictor_score( - gene="BRCA2", substitution="C", epitope="CCCCCCCCCC", score=5 + result = TcellPrediction().calculate_tcell_predictor_score( + gene="BRCA2", + epitope=PredictedEpitope(wild_type_peptide="CCCCCCCCCC", mutated_peptide="CCCCCVCCCC", affinity_mutated=5) ) self.assertEqual(None, result) def test_existing_gene(self): - result = TcellPrediction(affinity_threshold=10)._calculate_tcell_predictor_score( + result = TcellPrediction().calculate_tcell_predictor_score( gene="BRCA2", - substitution="CCCCVCCCC", - epitope="CCCCCCCCC", - score=5 + epitope=PredictedEpitope(wild_type_peptide="CCCCCCCCC", mutated_peptide="CCCCVCCCC", affinity_mutated=5) ) - self.assertAlmostEqual(0.2453409331088489, float(result)) + self.assertAlmostEqual(0.30162944008956233, float(result)) def test_rare_aminoacid(self): - result = TcellPrediction(affinity_threshold=10)._calculate_tcell_predictor_score( - gene="BRCA2", - substitution="CU", - epitope="CCCCUCCCC", - score=5 - ) - self.assertIsNone(result) - - def test_affinity_threshold(self): - result = TcellPrediction(affinity_threshold=1)._calculate_tcell_predictor_score( + result = TcellPrediction().calculate_tcell_predictor_score( gene="BRCA2", - substitution="CCCCVCCCC", - epitope="CCCCCCCCC", - score=5 + epitope=PredictedEpitope(wild_type_peptide="CCCCCCCCC", mutated_peptide="CCCCUCCCC", affinity_mutated=5) ) self.assertIsNone(result) diff --git a/neofox/tests/unit_tests/test_validation.py b/neofox/tests/unit_tests/test_validation.py index a546831d..39f38d5b 100644 --- a/neofox/tests/unit_tests/test_validation.py +++ b/neofox/tests/unit_tests/test_validation.py @@ -12,7 +12,7 @@ Mhc2Name, Mhc2GeneName, Mhc2Gene, - Mhc2Isoform, + Mhc2Isoform, PredictedEpitope, MhcAllele, ) from neofox.model.validation import ModelValidator from neofox.references.references import ORGANISM_HOMO_SAPIENS, ORGANISM_MUS_MUSCULUS @@ -708,3 +708,157 @@ def test_bad_is_rna_available(self): Patient(identifier="123", is_rna_available="False"), ORGANISM_HOMO_SAPIENS ) + + def test_validate_neoepitope_mhci(self): + neoepitope = PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + ) + ModelValidator.validate_neoepitope(neoepitope, ORGANISM_HOMO_SAPIENS) + + def test_validate_neoepitope_mhci_without_wt(self): + neoepitope = PredictedEpitope( + mutated_peptide="DILVTDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + ) + ModelValidator.validate_neoepitope(neoepitope, ORGANISM_HOMO_SAPIENS) + + def test_validate_neoepitope_mhci_bad_length(self): + self.assertRaises( + NeofoxDataValidationException, + ModelValidator.validate_neoepitope, + PredictedEpitope( + mutated_peptide="DILVT", # 5 aa < min 8 aa + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + ), + ORGANISM_HOMO_SAPIENS + ) + self.assertRaises( + NeofoxDataValidationException, + ModelValidator.validate_neoepitope, + PredictedEpitope( + mutated_peptide="DILVTAAAAAAAAAAAAAAAAA", # 22 aa > max 14 aa + wild_type_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + ), + ORGANISM_HOMO_SAPIENS + ) + self.assertRaises( + NeofoxDataValidationException, + ModelValidator.validate_neoepitope, + PredictedEpitope( + wild_type_peptide="DILVT", # 5 aa < min 8 aa + mutated_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + ), + ORGANISM_HOMO_SAPIENS + ) + self.assertRaises( + NeofoxDataValidationException, + ModelValidator.validate_neoepitope, + PredictedEpitope( + wild_type_peptide="DILVTAAAAAAAAAAAAAAAAA", # 22 aa > max 14 aa + mutated_peptide="DILVIDQTR", + allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'), + ), + ORGANISM_HOMO_SAPIENS + ) + + def test_validate_neoepitope_mhcii(self): + neoepitope = PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + isoform_mhc_i_i=self._get_test_mhcii_isoform('DRB1*01:01'), + ) + ModelValidator.validate_neoepitope(neoepitope, ORGANISM_HOMO_SAPIENS) + + def test_validate_neoepitope_mhcii_without_wt(self): + neoepitope = PredictedEpitope( + mutated_peptide="DILVTDQTR", + isoform_mhc_i_i=self._get_test_mhcii_isoform('DRB1*01:01'), + + ) + ModelValidator.validate_neoepitope(neoepitope, ORGANISM_HOMO_SAPIENS) + + def test_validate_neoepitope_mhcii_bad_length(self): + self.assertRaises( + NeofoxDataValidationException, + ModelValidator.validate_neoepitope, + PredictedEpitope( + mutated_peptide="DILVT", # 5 aa < min 8 aa + wild_type_peptide="DILVIDQTR", + isoform_mhc_i_i=self._get_test_mhcii_isoform('DRB1*01:01'), + ), + ORGANISM_HOMO_SAPIENS + ) + self.assertRaises( + NeofoxDataValidationException, + ModelValidator.validate_neoepitope, + PredictedEpitope( + wild_type_peptide="DILVT", # 5 aa < min 8 aa + mutated_peptide="DILVIDQTR", + isoform_mhc_i_i=self._get_test_mhcii_isoform('DRB1*01:01'), + ), + ORGANISM_HOMO_SAPIENS + ) + + def test_validate_neoepitope_with_patient_id(self): + neoepitope = PredictedEpitope( + mutated_peptide="DILVTDQTR", + wild_type_peptide="DILVIDQTR", + patient_identifier="123", + ) + ModelValidator.validate_neoepitope(neoepitope, ORGANISM_HOMO_SAPIENS) + + def test_validate_neoepitope_with_patient_id_bad_length(self): + self.assertRaises( + NeofoxDataValidationException, + ModelValidator.validate_neoepitope, + PredictedEpitope( + mutated_peptide="DILVT", # 5 aa < min 8 aa + wild_type_peptide="DILVIDQTR", + patient_identifier="123", + ), + ORGANISM_HOMO_SAPIENS + ) + self.assertRaises( + NeofoxDataValidationException, + ModelValidator.validate_neoepitope, + PredictedEpitope( + wild_type_peptide="DILVT", # 5 aa < min 8 aa + mutated_peptide="DILVIDQTR", + patient_identifier="123", + ), + ORGANISM_HOMO_SAPIENS + ) + + def test_validate_neoepitope_mhci_bad_allele(self): + self.assertRaises( + NeofoxDataValidationException, + ModelValidator.validate_neoepitope, + PredictedEpitope( + mutated_peptide="DILVTDQTR", + allele_mhc_i=MhcAllele(name="something"), + ), + ORGANISM_HOMO_SAPIENS + ) + + def test_validate_neoepitope_mhcii_bad_isoform(self): + self.assertRaises( + NeofoxDataValidationException, + ModelValidator.validate_neoepitope, + PredictedEpitope( + mutated_peptide="DILVTDQTR", + isoform_mhc_i_i=Mhc2Isoform(name="something"), + ), + ORGANISM_HOMO_SAPIENS + ) + + def _get_test_mhci_allele(self, allele) -> MhcAllele: + + return self.hla_parser.parse_mhc_allele(allele) + + def _get_test_mhcii_isoform(self, isoform) -> Mhc2Isoform: + return self.hla_parser.parse_mhc2_isoform(isoform) diff --git a/requirements.txt b/requirements.txt index 0a64df35..57ef210c 100755 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ betterproto~=1.2.5 pysam~=0.19.1 dask[complete]>=2021.10.0 distributed>=2021.10.0 -faker~=6.6.2 +python-dotenv==0.12.0 +faker~=13.13.0 orjson~=3.5.2 xmltodict~=0.12.0 diff --git a/setup.py b/setup.py index d0284b27..ea4290f5 100755 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ entry_points={ "console_scripts": [ "neofox=neofox.command_line:neofox_cli", + "neofox-epitope=neofox.command_line:neofox_epitope_cli", "neofox-configure=neofox.command_line:neofox_configure", ], },