diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 2102670c5..0a21020ca 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -8,7 +8,7 @@ comment = The contents of this file cannot be merged with that of setup.cfg unti search = __version__ = "{current_version}" replace = __version__ = "{new_version}" -[bumpversion:file:setup.cfg] +[bumpversion:file:pyproject.toml] search = version = {current_version} replace = version = {new_version} diff --git a/.github/actions/install-python-and-package/action.yml b/.github/actions/install-python-and-package/action.yml index 4887709d1..510da80a7 100644 --- a/.github/actions/install-python-and-package/action.yml +++ b/.github/actions/install-python-and-package/action.yml @@ -6,8 +6,8 @@ inputs: python-version: required: false - description: "The Python version to use. Specify major and minor version, e.g. '3.9'." - default: "3.9" + description: "The Python version to use. Specify major and minor version, e.g. '3.10'." + default: "3.10" extras-require: required: false @@ -24,12 +24,6 @@ runs: with: access_token: ${{ github.token }} - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: ${{ inputs.python-version }} - cache: 'pip' - cache-dependency-path: setup.cfg - - name: Setup conda uses: s-weigand/setup-conda@v1 with: @@ -38,29 +32,70 @@ runs: conda-channels: anaconda - run: conda --version shell: bash {0} - - run: which python - shell: bash {0} - - - name: Upgrade pip and install dependencies + - name: Python info + shell: bash -e {0} + run: | + which python3 + python3 --version + - name: Install dependencies on Linux shell: bash {0} env: CMAKE_INSTALL_PREFIX: .local + if: runner.os == 'Linux' run: | - python3 -m pip install --upgrade pip setuptools wheel - # Install dssp + # Install dependencies not handled by setuptools + ## DSSP sudo apt-get install -y dssp - # Only way to install msms is through conda + ## MSMS conda install -c bioconda msms - # Safest way to install PyTorch and PyTorch Geometric is through conda - conda install pytorch==2.0.0 torchvision=0.15.0 torchaudio=2.0.0 cpuonly -c pytorch + ## PyTorch, PyG, PyG adds + ### Installing for CPU only on the CI + conda install pytorch torchvision torchaudio cpuonly -c pytorch conda install pyg -c pyg - # Install optional pyg dependencies - python3 -m pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-$(python3 -c "import torch; print(torch.__version__)")+cpu.html - # In the future, release h5xplorer on PyPI - pip install git+https://github.com/DeepRank/h5xplorer.git@master - - - name: Install the package + pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-$(python3 -c "import torch; print(torch.__version__)")+cpu.html + - name: Install dependencies on MacOS shell: bash {0} - run: python3 -m pip install .[${{ inputs.extras-require }}] env: - CONDA_PREFIX: /usr/share/miniconda + CMAKE_INSTALL_PREFIX: .local + if: runner.os == 'macOS' + run: | + # Install dependencies not handled by setuptools + ## DSSP + git clone https://github.com/PDB-REDO/libcifpp.git --recurse-submodules + cd libcifpp + cmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local -DCMAKE_BUILD_TYPE=Release + cmake --build build + cmake --install build + ####### + git clone https://github.com/mhekkel/libmcfp.git + cd libmcfp + mkdir build + cd build + cmake .. + cmake --build . + cmake --install . + ####### + git clone https://github.com/PDB-REDO/dssp.git + cd dssp + mkdir build + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release + cmake --build build + cmake --install build + ## MSMS + cd /tmp/ + wget http://mgltools.scripps.edu/downloads/tars/releases/MSMSRELEASE/REL2.6.1/msms_i86Linux2_2.6.1.tar.gz + sudo mkdir /usr/local/lib/msms + cd /usr/local/lib/msms + sudo tar zxvf /tmp/msms_i86Linux2_2.6.1.tar.gz + sudo ln -s /usr/local/lib/msms/msms.i86Linux2.2.6.1 /usr/local/bin/msms + sudo ln -s /usr/local/lib/msms/pdb_to_xyzr* /usr/local/bin + ## PyTorch, PyG, PyG adds + ### Installing for CPU only on the CI + conda install pytorch torchvision torchaudio cpuonly -c pytorch + pip install torch_geometric + pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-$(python3 -c "import torch; print(torch.__version__)")+cpu.html + # PyTables via conda only for MacOS + conda install pytables + - name: Install the package + shell: bash {0} + run: pip install .'[${{ inputs.extras-require }}]' diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b62565144..6b1ad6ee3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -37,15 +37,15 @@ jobs: fail-fast: false matrix: os: ['ubuntu-latest'] - python-version: ['3.9', '3.10'] + python-version: ['3.10', '3.11'] steps: - uses: actions/checkout@v3 - uses: ./.github/actions/install-python-and-package + with: + python-version: ${{ matrix.python-version }} + extras-require: test, publishing - name: Run unit tests - env: - CONDA_PREFIX: /usr/share/miniconda - run: | - pytest -v + run: pytest -v - name: Verify that we can build the package - run: python3 setup.py sdist bdist_wheel \ No newline at end of file + run: python3 -m build diff --git a/.github/workflows/coveralls.yml b/.github/workflows/coveralls.yml index cdec5534e..a59459c20 100644 --- a/.github/workflows/coveralls.yml +++ b/.github/workflows/coveralls.yml @@ -37,11 +37,14 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest"] - python-version: [3.9] + python-version: ['3.10'] steps: - uses: actions/checkout@v3 - uses: ./.github/actions/install-python-and-package + with: + python-version: ${{ matrix.python-version }} + extras-require: test - name: Run unit tests with coverage run: pytest --cov --cov-append --cov-report xml --cov-report term --cov-report html - name: Coveralls @@ -49,4 +52,4 @@ jobs: GITHUB_TOKEN: ${{ secrets.github_token }} COVERALLS_FLAG_NAME: python-${{ matrix.os }} run: | - coveralls --service=github \ No newline at end of file + coveralls --service=github diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index c750b6dc4..4c0e1c9f4 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -37,10 +37,13 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest"] - python-version: [3.9] + python-version: ['3.10'] steps: - uses: actions/checkout@v3 - uses: ./.github/actions/install-python-and-package + with: + python-version: ${{ matrix.python-version }} + extras-require: test - name: Check style against standards using prospector - run: prospector --die-on-tool-error \ No newline at end of file + run: prospector --die-on-tool-error diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1baa5437b..e5c3d6350 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,12 +15,13 @@ jobs: fail-fast: false matrix: os: ['ubuntu-latest'] - python-version: [3.9] + python-version: ['3.10'] steps: - uses: actions/checkout@v3 - uses: ./.github/actions/install-python-and-package with: + python-version: ${{ matrix.python-version }} extras-require: publishing - name: Build wheel and source distribution run: python -m build diff --git a/.gitignore b/.gitignore index 6a2314b46..827d85d1a 100644 --- a/.gitignore +++ b/.gitignore @@ -41,4 +41,6 @@ docs/apidocs #other *_.ipynb -Untitled* \ No newline at end of file +Untitled* +data_raw/ +data_processed/ diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 9734d1a84..000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -include LICENSE -include README.md -recursive-include deeprank2 * \ No newline at end of file diff --git a/README.md b/README.md index d9d7bc275..c82bd6915 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,10 @@ | **fairness** | [![fair-software.eu](https://img.shields.io/badge/fair--software.eu-%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F-green)](https://fair-software.eu) [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/6403/badge)](https://bestpractices.coreinfrastructure.org/projects/6403) | | **package** | [![PyPI version](https://badge.fury.io/py/deeprank2.svg)](https://badge.fury.io/py/deeprank2) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/f3f98b2d1883493ead50e3acaa23f2cc)](https://app.codacy.com/gh/DeepRank/deeprank2?utm_source=github.com&utm_medium=referral&utm_content=DeepRank/deeprank2&utm_campaign=Badge_Grade) | | **docs** | [![Documentation Status](https://readthedocs.org/projects/deeprank2/badge/?version=latest)](https://deeprank2.readthedocs.io/en/latest/?badge=latest) [![DOI](https://zenodo.org/badge/450496579.svg)](https://zenodo.org/badge/latestdoi/450496579) | -| **tests** | [![Build Status](https://github.com/DeepRank/deeprank2/actions/workflows/build.yml/badge.svg)](https://github.com/DeepRank/deeprank2/actions) ![Linting status](https://github.com/DeepRank/deeprank2/actions/workflows/linting.yml/badge.svg?branch=main) [![Coverage Status](https://coveralls.io/repos/github/DeepRank/deeprank2/badge.svg?branch=main)](https://coveralls.io/github/DeepRank/deeprank2?branch=main) | +| **tests** | [![Build Status](https://github.com/DeepRank/deeprank2/actions/workflows/build.yml/badge.svg)](https://github.com/DeepRank/deeprank2/actions) ![Linting status](https://github.com/DeepRank/deeprank2/actions/workflows/linting.yml/badge.svg?branch=main) [![Coverage Status](https://coveralls.io/repos/github/DeepRank/deeprank2/badge.svg?branch=main)](https://coveralls.io/github/DeepRank/deeprank2?branch=main) ![Python](https://img.shields.io/badge/python-3.10-blue.svg) ![Python](https://img.shields.io/badge/python-3.11-blue.svg) | +| **running on** | ![Ubuntu](https://img.shields.io/badge/Ubuntu-E95420?style=for-the-badge&logo=ubuntu&logoColor=white) | | **license** | [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/license/apache-2-0/) | - ## Overview ![alt-text](./deeprank2.png) @@ -46,35 +46,28 @@ DeepRank2 extensive documentation can be found [here](https://deeprank2.rtfd.io/ - [GraphDataset](#graphdataset) - [GridDataset](#griddataset) - [Training](#training) - - [h5x support](#h5x-support) - [Package development](#package-development) ## Installation +The package officially supports ubuntu-latest OS only, whose functioning is widely tested through the continuous integration workflows. + ### Dependencies -Before installing deeprank2 you need to install some dependencies. We advise to use a [conda environment](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) with Python >= 3.9 installed. - -* [msms](https://ssbio.readthedocs.io/en/latest/instructions/msms.html): `conda install -c bioconda msms`. *For MacOS with M1 chip users*: you can follow [these instructions](https://ssbio.readthedocs.io/en/latest/instructions/msms.html). -* [PyTorch](https://pytorch.org/): - * CPU only: `conda install pytorch==2.0.0 torchvision==0.15.0 torchaudio==2.0.0 cpuonly -c pytorch` - * if using GPU: `conda install pytorch==2.0.0 torchvision==0.15.0 torchaudio==2.0.0 pytorch-cuda=11.7 -c pytorch -c nvidia` -* [pytorch-geometric](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html): `conda install pyg -c pyg` -* [Dependencies for pytorch geometric from wheels](https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html#installation-from-wheels): `pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html`. - - Here, `${TORCH}` and `${CUDA}` should be replaced by the pytorch and CUDA versions installed. You can find these using: - - `python -c "import torch; print(torch.__version__)"` and - - `python -c "import torch; print(torch.version.cuda)"` - - if this returns `None`, use `cpu` instead - - For example: `pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cpu.html` -* Check if [DSSP 4](https://swift.cmbi.umcn.nl/gv/dssp/) is installed: `dssp --version` - * if this gives an error or shows a version lower than 4: - * on ubuntu 22.04 or newer: `sudo apt-get install dssp`. - * If the package cannot be located, first run `sudo apt-get update`. - * on older versions of ubuntu or on mac or lacking sudo priviliges: install from [here](https://github.com/pdb-redo/dssp), following the instructions listed. -* Check if gcc is installed: `gcc --version`. - * if this gives an error, run `sudo apt-get install gcc`. - -* For MacOS with M1 chip (otherwise ignore this): `conda install pytables` +Before installing deeprank2 you need to install some dependencies. We advise to use a [conda environment](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) with Python >= 3.10 installed. The following dependency installation instructions are updated as of 14/09/2023, but in case of issues during installation always refer to the official documentation which is linked below: + +* [MSMS](https://anaconda.org/bioconda/msms): `conda install -c bioconda msms`. + * [Here](https://ssbio.readthedocs.io/en/latest/instructions/msms.html) for MacOS with M1 chip users. +* [PyTorch](https://pytorch.org/get-started/locally/) + * We support torch's CPU library as well as CUDA. +* [PyG](https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html) and its optional dependencies: `torch_scatter`, `torch_sparse`, `torch_cluster`, `torch_spline_conv`. +* [DSSP 4](https://swift.cmbi.umcn.nl/gv/dssp/) + * Check if `dssp` is installed: `dssp --version`. If this gives an error or shows a version lower than 4: + * on ubuntu 22.04 or newer: `sudo apt-get install dssp`. If the package cannot be located, first run `sudo apt-get update`. + * on older versions of ubuntu or on mac or lacking sudo priviliges: install from [here](https://github.com/pdb-redo/dssp), following the instructions listed. Alternatively, follow [this](https://github.com/PDB-REDO/libcifpp/issues/49) thread. +* [GCC](https://gcc.gnu.org/install/) + * Check if gcc is installed: `gcc --version`. If this gives an error, run `sudo apt-get install gcc`. +* For MacOS with M1 chip users only install [the conda version of PyTables](https://www.pytables.org/usersguide/installation.html). ### Deeprank2 Package @@ -84,26 +77,29 @@ Once the dependencies are installed, you can install the latest stable release o pip install deeprank2 ``` -Alternatively, get all the new developments by cloning the repo and installing the code with +Alternatively, get all the new developments by cloning the repo and installing the editable version of the package with: ```bash git clone https://github.com/DeepRank/deeprank2 cd deeprank2 -pip install -e ./ +pip install -e .'[test]' ``` +The `test` extra is optional, and can be used to install test-related dependencies useful during the development. + ### Test installation If you have installed the package from a cloned repository (second option above), you can check that all components were installed correctly, using pytest. The quick test should be sufficient to ensure that the software works, while the full test (a few minutes) will cover a much broader range of settings to ensure everything is correct. -First, install [pytest](https://docs.pytest.org/): `pip install pytest`. -Then run `pytest tests/test_integration.py` for the quick test or just `pytest` for the full test (expect a few minutes to run). +Run `pytest tests/test_integration.py` for the quick test or just `pytest` for the full test (expect a few minutes to run). ### Contributing + If you would like to contribute to the package in any way, please see [our guidelines](CONTRIBUTING.rst). ## Quick start + The following section serves as a first guide to start using the package, using Protein-Protein Interface (PPI) queries as example. For an enhanced learning experience, we provide in-depth [tutorial notebooks](https://github.com/DeepRank/deeprank2/tree/main/tutorials) for generating PPI data, generating variants data, and for the training pipeline. For more details, see the [extended documentation](https://deeprank2.rtfd.io/). @@ -318,10 +314,6 @@ trainer.test() ``` -## h5x support - -After installing `h5xplorer` (https://github.com/DeepRank/h5xplorer), you can execute the python file `deeprank2/h5x/h5x.py` to explorer the connection graph used by deeprank2. The context menu (right click on the name of the structure) allows to automatically plot the graphs using `plotly`. - ## Package development - Branching diff --git a/deeprank2/features/components.py b/deeprank2/features/components.py index 53f3a2b59..e7cb90d2f 100644 --- a/deeprank2/features/components.py +++ b/deeprank2/features/components.py @@ -2,12 +2,11 @@ from typing import Optional import numpy as np -from deeprank2.molstruct.atom import Atom -from deeprank2.molstruct.residue import Residue -from deeprank2.molstruct.variant import SingleResidueVariant -from deeprank2.utils.graph import Graph from deeprank2.domain import nodestorage as Nfeat +from deeprank2.molstruct.atom import Atom +from deeprank2.molstruct.residue import Residue, SingleResidueVariant +from deeprank2.utils.graph import Graph from deeprank2.utils.parsing import atomic_forcefield _log = logging.getLogger(__name__) diff --git a/deeprank2/features/conservation.py b/deeprank2/features/conservation.py index ed3fb9365..4a2c46f43 100644 --- a/deeprank2/features/conservation.py +++ b/deeprank2/features/conservation.py @@ -1,14 +1,13 @@ from typing import Optional import numpy as np + +from deeprank2.domain import nodestorage as Nfeat from deeprank2.domain.aminoacidlist import amino_acids from deeprank2.molstruct.atom import Atom -from deeprank2.molstruct.residue import Residue -from deeprank2.molstruct.variant import SingleResidueVariant +from deeprank2.molstruct.residue import Residue, SingleResidueVariant from deeprank2.utils.graph import Graph -from deeprank2.domain import nodestorage as Nfeat - def add_features( # pylint: disable=unused-argument pdb_path: str, graph: Graph, diff --git a/deeprank2/features/contact.py b/deeprank2/features/contact.py index ba3d8a961..e8eaa2e83 100644 --- a/deeprank2/features/contact.py +++ b/deeprank2/features/contact.py @@ -9,7 +9,7 @@ from deeprank2.domain import edgestorage as Efeat from deeprank2.molstruct.atom import Atom from deeprank2.molstruct.pair import AtomicContact, ResidueContact -from deeprank2.molstruct.variant import SingleResidueVariant +from deeprank2.molstruct.residue import SingleResidueVariant from deeprank2.utils.graph import Graph from deeprank2.utils.parsing import atomic_forcefield diff --git a/deeprank2/features/exposure.py b/deeprank2/features/exposure.py index d05d0b308..8d6be16cf 100644 --- a/deeprank2/features/exposure.py +++ b/deeprank2/features/exposure.py @@ -9,12 +9,11 @@ from Bio.PDB.HSExposure import HSExposureCA from Bio.PDB.PDBParser import PDBParser from Bio.PDB.ResidueDepth import get_surface, residue_depth -from deeprank2.molstruct.atom import Atom -from deeprank2.molstruct.residue import Residue -from deeprank2.molstruct.variant import SingleResidueVariant -from deeprank2.utils.graph import Graph from deeprank2.domain import nodestorage as Nfeat +from deeprank2.molstruct.atom import Atom +from deeprank2.molstruct.residue import Residue, SingleResidueVariant +from deeprank2.utils.graph import Graph _log = logging.getLogger(__name__) diff --git a/deeprank2/features/irc.py b/deeprank2/features/irc.py index f3f101437..f28215885 100644 --- a/deeprank2/features/irc.py +++ b/deeprank2/features/irc.py @@ -3,15 +3,14 @@ from typing import Dict, List, Optional, Tuple import pdb2sql + +from deeprank2.domain import nodestorage as Nfeat from deeprank2.domain.aminoacidlist import amino_acids from deeprank2.molstruct.aminoacid import Polarity from deeprank2.molstruct.atom import Atom -from deeprank2.molstruct.residue import Residue -from deeprank2.molstruct.variant import SingleResidueVariant +from deeprank2.molstruct.residue import Residue, SingleResidueVariant from deeprank2.utils.graph import Graph -from deeprank2.domain import nodestorage as Nfeat - _log = logging.getLogger(__name__) diff --git a/deeprank2/features/secondary_structure.py b/deeprank2/features/secondary_structure.py index dd86b87a7..d6b4a258d 100644 --- a/deeprank2/features/secondary_structure.py +++ b/deeprank2/features/secondary_structure.py @@ -5,12 +5,11 @@ import numpy as np from Bio.PDB import PDBParser from Bio.PDB.DSSP import DSSP -from deeprank2.molstruct.atom import Atom -from deeprank2.molstruct.residue import Residue -from deeprank2.molstruct.variant import SingleResidueVariant -from deeprank2.utils.graph import Graph from deeprank2.domain import nodestorage as Nfeat +from deeprank2.molstruct.atom import Atom +from deeprank2.molstruct.residue import Residue, SingleResidueVariant +from deeprank2.utils.graph import Graph class DSSPError(Exception): diff --git a/deeprank2/features/surfacearea.py b/deeprank2/features/surfacearea.py index 758064872..d36ffe0bc 100644 --- a/deeprank2/features/surfacearea.py +++ b/deeprank2/features/surfacearea.py @@ -3,12 +3,11 @@ import freesasa import numpy as np -from deeprank2.molstruct.atom import Atom -from deeprank2.molstruct.residue import Residue -from deeprank2.molstruct.variant import SingleResidueVariant -from deeprank2.utils.graph import Graph from deeprank2.domain import nodestorage as Nfeat +from deeprank2.molstruct.atom import Atom +from deeprank2.molstruct.residue import Residue, SingleResidueVariant +from deeprank2.utils.graph import Graph # pylint: disable=c-extension-no-member diff --git a/deeprank2/molstruct/aminoacid.py b/deeprank2/molstruct/aminoacid.py index 9ac4edf96..88c615a87 100644 --- a/deeprank2/molstruct/aminoacid.py +++ b/deeprank2/molstruct/aminoacid.py @@ -4,7 +4,7 @@ class Polarity(Enum): - "a value to express a residue's polarity" + """One-hot encoding of the amino acid polarity.""" NONPOLAR = 0 POLAR = 1 @@ -15,12 +15,11 @@ class Polarity(Enum): def onehot(self): t = np.zeros(4) t[self.value] = 1.0 - return t class AminoAcid: - "a value to represent one of the amino acids" + """An amino acid represents the type of `Residue` in a `PDBStructure`.""" def __init__( # pylint: disable=too-many-arguments self, @@ -112,24 +111,25 @@ def hydrogen_bond_acceptors(self) -> int: def onehot(self) -> np.ndarray: if self._index is None: raise ValueError( - "amino acid {self._name} index is not set, thus no onehot can be computed" + f"Amino acid {self._name} index is not set, thus no onehot can be computed." ) # 20 canonical amino acids # selenocysteine and pyrrolysine are indexed as cysteine and lysine, respectively a = np.zeros(20) a[self._index] = 1.0 - return a @property def index(self) -> int: return self._index - def __hash__(self): + def __hash__(self) -> hash: return hash(self.name) - def __eq__(self, other): - return isinstance(other, type(self)) and other.name == self.name + def __eq__(self, other) -> bool: + if isinstance(other, AminoAcid): + return other.name == self.name + return NotImplemented - def __repr__(self): + def __repr__(self) -> str: return self._three_letter_code diff --git a/deeprank2/molstruct/atom.py b/deeprank2/molstruct/atom.py index d933ce5f9..56ee7e10e 100644 --- a/deeprank2/molstruct/atom.py +++ b/deeprank2/molstruct/atom.py @@ -1,12 +1,14 @@ +from __future__ import annotations + from enum import Enum import numpy as np + from deeprank2.molstruct.residue import Residue class AtomicElement(Enum): - "value to represent the type of pdb atoms" - + """One-hot encoding of the atomic element (or atom type).""" C = 1 O = 2 # noqa: pycodestyle N = 3 @@ -22,7 +24,7 @@ def onehot(self) -> np.array: class Atom: - """Represents a pdb atom.""" + """One atom in a PDBStructure.""" def __init__( # pylint: disable=too-many-arguments self, @@ -50,11 +52,10 @@ def __init__( # pylint: disable=too-many-arguments self._occupancy = occupancy def __eq__(self, other) -> bool: - return ( - isinstance(self, type(other)) - and self._residue == other._residue - and self._name == other._name - ) + if isinstance (other, Atom): + return (self._residue == other._residue + and self._name == other._name) + return NotImplemented def __hash__(self) -> hash: return hash((tuple(self._position), self._element, self._name)) @@ -62,9 +63,8 @@ def __hash__(self) -> hash: def __repr__(self) -> str: return f"{self._residue} {self._name}" - def change_altloc(self, alternative_atom): - "replace the atom's location by another atom's location" - + def change_altloc(self, alternative_atom: Atom): + """Replace the atom's location by another atom's location.""" self._position = alternative_atom.position self._occupancy = alternative_atom.occupancy @@ -85,5 +85,5 @@ def position(self) -> np.array: return self._position @property - def residue(self): + def residue(self) -> Residue: return self._residue diff --git a/deeprank2/molstruct/pair.py b/deeprank2/molstruct/pair.py index b57e1b664..ecdb5febf 100644 --- a/deeprank2/molstruct/pair.py +++ b/deeprank2/molstruct/pair.py @@ -6,57 +6,50 @@ class Pair: - """A hashable, comparable object for any set of two inputs where order doesn't matter. - Args: - item1 (Any object): The pair's first object, must be convertable to string. - item2 (Any object): The pair's second object, must be convertable to string. - """ + """A hashable, comparable object for any set of two inputs where order doesn't matter.""" def __init__(self, item1: Any, item2: Any): + """ + Args: + item1 (Any object): The pair's first object, must be convertable to string. + item2 (Any object): The pair's second object, must be convertable to string. + """ self.item1 = item1 self.item2 = item2 - def __hash__(self): - # The hash should be solely based on the two paired items, not on their - # order. - + def __hash__(self) -> hash: + """The hash should be solely based on the two paired items, not on their order.""" s1 = str(self.item1) s2 = str(self.item2) - if s1 < s2: return hash(s1 + s2) return hash(s2 + s1) - def __eq__(self, other): - # Compare the pairs as sets, so the order doesn't matter. - - return ( - self.item1 == other.item1 - and self.item2 == other.item2 - or self.item1 == other.item2 - and self.item2 == other.item1 - ) + def __eq__(self, other) -> bool: + """Compare the pairs as sets, so the order doesn't matter.""" + if isinstance(other, Pair): + return (self.item1 == other.item1 and self.item2 == other.item2 + or self.item1 == other.item2 and self.item2 == other.item1) + return NotImplemented def __iter__(self): # Iterate over the two items in the pair. return iter([self.item1, self.item2]) - def __repr__(self): + def __repr__(self) -> str: return (str(self.item1) + str(self.item2)) class Contact(Pair, ABC): - pass + """Parent class to bind `ResidueContact` and `ResidueContact` objects.""" class ResidueContact(Contact): - "A contact between two residues from a structure" + """A contact between two residues from a structure.""" def __init__(self, residue1: Residue, residue2: Residue): - self._residue1 = residue1 self._residue2 = residue2 - super().__init__(residue1, residue2) @property @@ -69,17 +62,11 @@ def residue2(self) -> Residue: class AtomicContact(Contact): - "A contact between two atoms from a structure" - - def __init__( - self, - atom1: Atom, - atom2: Atom - ): + """A contact between two atoms from a structure.""" + def __init__(self, atom1: Atom, atom2: Atom): self._atom1 = atom1 self._atom2 = atom2 - super().__init__(atom1, atom2) @property diff --git a/deeprank2/molstruct/residue.py b/deeprank2/molstruct/residue.py index c59668038..ef8700532 100644 --- a/deeprank2/molstruct/residue.py +++ b/deeprank2/molstruct/residue.py @@ -1,13 +1,25 @@ -from typing import Optional +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional import numpy as np + from deeprank2.molstruct.aminoacid import AminoAcid from deeprank2.molstruct.structure import Chain from deeprank2.utils.pssmdata import PssmRow +if TYPE_CHECKING: + from deeprank2.molstruct.atom import Atom + class Residue: - "represents a pdb residue" + """One protein residue in a `PDBStructure`. + + A `Residue` is the basic building block of proteins and protein complex, + here represented by `PDBStructures`. + Each residue is of a certain `AminoAcid` type and consists of multiple + `Atom`s. + """ def __init__( self, @@ -32,26 +44,21 @@ def __init__( self._atoms = [] def __eq__(self, other) -> bool: - return ( - isinstance(self, type(other)) - and self._chain == other._chain - and self._number == other._number - and self._insertion_code == other._insertion_code - ) + if isinstance(other, Residue): + return (self._chain == other._chain + and self._number == other._number + and self._insertion_code == other._insertion_code + ) + return NotImplemented def __hash__(self) -> hash: return hash((self._number, self._insertion_code)) def get_pssm(self) -> PssmRow: - """ - If the residue's chain has pssm info linked to it, - then return the part that belongs to this residue. - """ - + """Load pssm info linked to the residue.""" pssm = self._chain.pssm if pssm is None: raise FileNotFoundError(f'No pssm file found for Chain {self._chain}.') - return pssm[self] @property @@ -59,7 +66,7 @@ def number(self) -> int: return self._number @property - def chain(self): + def chain(self) -> Chain: return self._chain @property @@ -67,23 +74,21 @@ def amino_acid(self) -> AminoAcid: return self._amino_acid @property - def atoms(self): + def atoms(self) -> list[Atom]: return self._atoms @property def number_string(self) -> str: - "contains both the number and the insertion code (if any)" - + """Contains both the number and the insertion code (if any).""" if self._insertion_code is not None: return f"{self._number}{self._insertion_code}" - return str(self._number) @property def insertion_code(self) -> str: return self._insertion_code - def add_atom(self, atom): + def add_atom(self, atom: Atom): self._atoms.append(atom) def __repr__(self) -> str: @@ -93,25 +98,48 @@ def __repr__(self) -> str: def position(self) -> np.array: return np.mean([atom.position for atom in self._atoms], axis=0) + def get_center(self) -> np.ndarray: + """Find the center position of a `Residue`. -def get_residue_center(residue: Residue) -> np.ndarray: - """Chooses a center position for a residue. + Center position is found as follows: + 1. find beta carbon + 2. if no beta carbon is found: find alpha carbon + 3. if no alpha carbon is found: take the mean of the atom positions + """ + betas = [atom for atom in self.atoms if atom.name == "CB"] + if len(betas) > 0: + return betas[0].position - Based on the atoms it has: - 1. find beta carbon, if present - 2. find alpha carbon, if present - 3. else take the mean of the atom positions - """ + alphas = [atom for atom in self.atoms if atom.name == "CA"] + if len(alphas) > 0: + return alphas[0].position + + if len(self.atoms) == 0: + raise ValueError(f"cannot get the center position from {self}, because it has no atoms") + + return np.mean([atom.position for atom in self.atoms], axis=0) - betas = [atom for atom in residue.atoms if atom.name == "CB"] - if len(betas) > 0: - return betas[0].position - alphas = [atom for atom in residue.atoms if atom.name == "CA"] - if len(alphas) > 0: - return alphas[0].position +class SingleResidueVariant: + """A single residue mutation of a PDBStrcture.""" - if len(residue.atoms) == 0: - raise ValueError(f"cannot get the center position from {residue}, because it has no atoms") + def __init__(self, residue: Residue, variant_amino_acid: AminoAcid): + """ + Args: + residue (Residue): the `Residue` object from the PDBStructure that is mutated. + variant_amino_acid (AminoAcid): the amino acid that the `Residue` is mutated into. + """ + self._residue = residue + self._variant_amino_acid = variant_amino_acid - return np.mean([atom.position for atom in residue.atoms], axis=0) + @property + def residue(self) -> Residue: + return self._residue + + @property + def variant_amino_acid(self) -> AminoAcid: + return self._variant_amino_acid + + @property + def wildtype_amino_acid(self) -> AminoAcid: + return self._residue.amino_acid diff --git a/deeprank2/molstruct/structure.py b/deeprank2/molstruct/structure.py index fdfdd77b9..4b508ab04 100644 --- a/deeprank2/molstruct/structure.py +++ b/deeprank2/molstruct/structure.py @@ -1,10 +1,22 @@ -from typing import Optional +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional from deeprank2.utils.pssmdata import PssmRow +if TYPE_CHECKING: + from deeprank2.molstruct.atom import Atom + from deeprank2.molstruct.residue import Residue + class PDBStructure: - "represents one entire pdb structure" + """A proitein or protein complex structure.. + + A `PDBStructure` can contain one or multiple `Chains`, i.e. separate + molecular entities (individual proteins). + One PDBStructure consists of a number of `Residue`s, each of which is of a + particular `AminoAcid` type and in turn consists of a number of `Atom`s. + """ def __init__(self, id_: Optional[str] = None): """ @@ -16,7 +28,9 @@ def __init__(self, id_: Optional[str] = None): self._chains = {} def __eq__(self, other) -> bool: - return isinstance(self, type(other)) and self._id == other._id + if isinstance(other, PDBStructure): + return self._id == other._id + return NotImplemented def __hash__(self) -> hash: return hash(self._id) @@ -27,26 +41,24 @@ def __repr__(self) -> str: def has_chain(self, chain_id: str) -> bool: return chain_id in self._chains - def get_chain(self, chain_id: str): + def get_chain(self, chain_id: str) -> Chain: return self._chains[chain_id] - def add_chain(self, chain): + def add_chain(self, chain: Chain): if chain.id in self._chains: raise ValueError(f"duplicate chain: {chain.id}") - self._chains[chain.id] = chain @property - def chains(self): + def chains(self) -> list[Chain]: return list(self._chains.values()) - def get_atoms(self): - "shortcut to list all atoms in this structure" + def get_atoms(self) -> list[Atom]: + """List all atoms in the structure.""" atoms = [] for chain in self._chains.values(): for residue in chain.residues: atoms.extend(residue.atoms) - return atoms @property @@ -55,22 +67,25 @@ def id(self) -> str: class Chain: - "represents one pdb chain" + """One independent molecular entity of a `PDBStructure`. + + In other words: each `Chain` in a `PDBStructure` is a separate molecule. + """ def __init__(self, model: PDBStructure, id_: Optional[str]): - """ - Args: + """One chain of a PDBStructure. + + Args: model (:class:`PDBStructure`): The model that this chain is part of. id_ (str): The pdb identifier of this chain. """ - self._model = model self._id = id_ self._residues = {} self._pssm = None # pssm is per chain @property - def model(self): + def model(self) -> PDBStructure: return self._model @property @@ -81,13 +96,13 @@ def pssm(self) -> PssmRow: def pssm(self, pssm: PssmRow): self._pssm = pssm - def add_residue(self, residue): + def add_residue(self, residue: Residue): self._residues[(residue.number, residue.insertion_code)] = residue def has_residue(self, residue_number: int, insertion_code: Optional[str] = None) -> bool: return (residue_number, insertion_code) in self._residues - def get_residue(self, residue_number: int, insertion_code: Optional[str] = None): + def get_residue(self, residue_number: int, insertion_code: Optional[str] = None) -> Residue: return self._residues[(residue_number, insertion_code)] @property @@ -95,23 +110,22 @@ def id(self) -> str: return self._id @property - def residues(self): + def residues(self) -> list[Residue]: return list(self._residues.values()) - def get_atoms(self): + def get_atoms(self) -> list[Atom]: """Shortcut to list all atoms in this chain.""" atoms = [] - for residue in self._residues.values(): + for residue in self.residues: atoms.extend(residue.atoms) return atoms def __eq__(self, other) -> bool: - return ( - isinstance(self, type(other)) - and self._model == other._model - and self._id == other._id - ) + if isinstance(other, Chain): + return (self._model == other._model + and self._id == other._id) + return NotImplemented def __hash__(self) -> hash: return hash(self._id) diff --git a/deeprank2/molstruct/variant.py b/deeprank2/molstruct/variant.py deleted file mode 100644 index 227bd89f4..000000000 --- a/deeprank2/molstruct/variant.py +++ /dev/null @@ -1,22 +0,0 @@ -from deeprank2.molstruct.aminoacid import AminoAcid -from deeprank2.molstruct.residue import Residue - - -class SingleResidueVariant: - "represents an amino acid replacement" - - def __init__(self, residue: Residue, variant_amino_acid: AminoAcid): - self._residue = residue - self._variant_amino_acid = variant_amino_acid - - @property - def residue(self) -> Residue: - return self._residue - - @property - def variant_amino_acid(self) -> AminoAcid: - return self._variant_amino_acid - - @property - def wildtype_amino_acid(self) -> AminoAcid: - return self._residue.amino_acid diff --git a/deeprank2/query.py b/deeprank2/query.py index 163964ed7..8f82b4c1d 100644 --- a/deeprank2/query.py +++ b/deeprank2/query.py @@ -298,7 +298,6 @@ def _build_helper(self) -> Graph: #TODO: check if this works with a set instead of a list graph.center = get_residue_center(variant_residue) - return graph diff --git a/deeprank2/tools/plotgraph.py b/deeprank2/tools/plotgraph.py deleted file mode 100644 index be78e727b..000000000 --- a/deeprank2/tools/plotgraph.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python - -import os - -from deeprank2.tools.visualization.h5x_menu import context_menu -from h5xplorer.h5xplorer import h5xplorer - -baseimport = os.path.dirname( - os.path.abspath(__file__)) + "/baseimport.py" -app = h5xplorer(context_menu, - baseimport=baseimport, extended_selection=False) diff --git a/deeprank2/tools/visualization/__init__.py b/deeprank2/tools/visualization/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/deeprank2/tools/visualization/baseimport.py b/deeprank2/tools/visualization/baseimport.py deleted file mode 100644 index 9c825cf4d..000000000 --- a/deeprank2/tools/visualization/baseimport.py +++ /dev/null @@ -1,21 +0,0 @@ -import plotly.offline as py -from deeprank2.tools.visualization.plotting import (hdf5_to_networkx, - plotly_2d, plotly_3d) - - -def tsne_graph(grp, method): - - py.init_notebook_mode(connected=True) - - g = hdf5_to_networkx(grp) - - plotly_2d(g, offline=True, iplot=False, method=method) - - -def graph3d(grp): - - py.init_notebook_mode(connected=True) - - g = hdf5_to_networkx(grp) - - plotly_3d(g, offline=True, iplot=False) diff --git a/deeprank2/tools/visualization/embedding.py b/deeprank2/tools/visualization/embedding.py deleted file mode 100644 index e42dcfe19..000000000 --- a/deeprank2/tools/visualization/embedding.py +++ /dev/null @@ -1,17 +0,0 @@ -from sklearn.manifold import MDS, TSNE, SpectralEmbedding - - -def manifold_embedding(pos,method='tsne'): - n_components = 2 - n_neighbors = 100 - - if method == 'tsne': - tsne = TSNE(n_components=n_components, init='pca', random_state=0, perplexity=5.0) - Y = tsne.fit_transform(pos) - elif method == 'spectral': - se = SpectralEmbedding(n_components=n_components,n_neighbors=n_neighbors) - Y = se.fit_transform(pos) - elif method == 'mds': - mds = MDS(n_components, max_iter=100, n_init=1) - Y = mds.fit_transform(pos) - return Y diff --git a/deeprank2/tools/visualization/h5x_menu.py b/deeprank2/tools/visualization/h5x_menu.py deleted file mode 100644 index 5dc72e539..000000000 --- a/deeprank2/tools/visualization/h5x_menu.py +++ /dev/null @@ -1,82 +0,0 @@ -from h5xplorer.menu_plot import plot2d, plot_histogram, plot_line -from h5xplorer.menu_tools import (get_current_hdf5_group, get_current_item, - get_group_data, get_multilevel_actions, - send_dict_to_console) - - -def context_menu(self, treeview, position): # noqa: MC0001 - """Generate a right-click menu for the items.""" - - all_item = get_current_item(self, treeview, single=False) - - if len(all_item) == 1: - - item = all_item[0] - data = get_group_data(get_current_hdf5_group(self, item)) - - if data is None: - list_operations = ['Print attrs', 'tSNE Graph', '3D Plot'] - list_sub = [[], ['Louvain', 'MCL'], []] - - elif data.ndim == 1: - list_operations = ['Print attrs', - '-', 'Plot Hist', 'Plot Line'] - - elif data.ndim == 2: - list_operations = ['Print attrs', - '-', 'Plot Hist', 'Plot Map'] - - else: - list_operations = ['Print attrs'] - - #action,actions = get_actions(treeview,position,list_operations) - action, actions = get_multilevel_actions( - treeview, position, list_operations, list_sub) - - if action == actions['Print attrs']: - send_dict_to_console(self, item, treeview) - - if 'Plot Hist' in actions: - if action == actions['Plot Hist']: - plot_histogram(self, item, treeview) - - if 'Plot Line' in actions: - if action == actions['Plot Line']: - plot_line(self, item, treeview) - - if 'Plot Map' in actions: - if action == actions['Plot Map']: - plot2d(self, item, treeview) - - if ('tSNE Graph', 'Louvain') in actions: - if action == actions[('tSNE Graph', 'Louvain')]: - - grp = get_current_hdf5_group(self, item) - data_dict = {'_grp': grp} - treeview.emitDict.emit(data_dict) - - cmd = "tsne_graph(_grp,'louvain')" - data_dict = {'exec_cmd': cmd} - treeview.emitDict.emit(data_dict) - - if ('tSNE Graph', 'MCL') in actions: - if action == actions[('tSNE Graph', 'MCL')]: - - grp = get_current_hdf5_group(self, item) - data_dict = {'_grp': grp} - treeview.emitDict.emit(data_dict) - - cmd = "tsne_graph(_grp,'mcl')" - data_dict = {'exec_cmd': cmd} - treeview.emitDict.emit(data_dict) - - if '3D Plot' in actions: - if action == actions['3D Plot']: - - grp = get_current_hdf5_group(self, item) - data_dict = {'_grp': grp} - treeview.emitDict.emit(data_dict) - - cmd = 'graph3d(_grp)' - data_dict = {'exec_cmd': cmd} - treeview.emitDict.emit(data_dict) diff --git a/deeprank2/tools/visualization/plotting.py b/deeprank2/tools/visualization/plotting.py deleted file mode 100644 index d60395631..000000000 --- a/deeprank2/tools/visualization/plotting.py +++ /dev/null @@ -1,419 +0,0 @@ -import logging -from copy import deepcopy -from typing import Optional - -import community -import h5py -import markov_clustering -import matplotlib.pyplot as plt -import networkx -import numpy as np -import plotly.graph_objs as go - -from deeprank2.domain import edgestorage as Efeat -from deeprank2.domain import nodestorage as Nfeat -from deeprank2.tools.visualization.embedding import manifold_embedding - -_log = logging.getLogger(__name__) - - -def _get_node_key(value): - if isinstance(value, str): - return value - - key = "" - for item in value: - if isinstance(item, (bytes, np.bytes_)): - key = item.decode() - - elif isinstance(item, str): - key += item - - else: - key += str(item) - - return key - - -def hdf5_to_networkx(graph_group: h5py.Group) -> networkx.Graph: # pylint: disable=too-many-locals - """Read a hdf5 group into a networkx graph.""" - - graph = networkx.Graph() - - # read nodes - node_features_group = graph_group[Nfeat.NODE] - node_names = [_get_node_key(key) for key in node_features_group[Nfeat.NAME][()]] - node_features = {} - node_feature_names = list(node_features_group.keys()) - for node_feature_name in node_feature_names: - node_features[node_feature_name] = node_features_group[node_feature_name][()] - - for node_index, node_name in enumerate(node_names): - graph.add_node(node_name) - for node_feature_name in node_feature_names: - graph.nodes[node_name][node_feature_name] = node_features[ - node_feature_name - ][node_index] - - # read edges - edge_features_group = graph_group[Efeat.EDGE] - edge_names = edge_features_group[Efeat.NAME][()] - edge_node_indices = edge_features_group[Efeat.INDEX][()] - edge_features = {} - edge_feature_names = list(edge_features_group.keys()) - for edge_feature_name in edge_feature_names: - edge_features[edge_feature_name] = edge_features_group[edge_feature_name][()] - - for edge_index, _ in enumerate(edge_names): - node1_index, node2_index = edge_node_indices[edge_index] - node1_name = node_names[node1_index] - node2_name = node_names[node2_index] - edge_key = (node1_name, node2_name) - - graph.add_edge(node1_name, node2_name) - for edge_feature_name in edge_feature_names: - graph.edges[edge_key][edge_feature_name] = edge_features[edge_feature_name][ - edge_index - ] - - return graph - - -def _get_edge_type_name(value): - if isinstance(value, (bytes, np.bytes_)): - - return value.decode() - - return value - - -def plotly_2d( # noqa - graph: networkx.Graph, - out: Optional[str] = None, - offline: bool = False, - iplot: bool = True, - disable_plot: bool = False, - method: str = "louvain", -): - """Plots the interface graph in 2D.""" - - if offline: - import plotly.offline as py # pylint: disable=import-outside-toplevel - else: - import chart_studio.plotly as py # pylint: disable=import-outside-toplevel - - pos = np.array( - [v.tolist() for _, v in networkx.get_node_attributes(graph, Nfeat.POSITION).items()] - ) - pos2d = manifold_embedding(pos) - dict_pos = dict(zip(graph.nodes, pos2d)) - networkx.set_node_attributes(graph, dict_pos, "pos2d") - - # remove interface edges for clustering - gtmp = deepcopy(graph) - ebunch = [] - for e in graph.edges: - if graph.edges[e][Efeat.SAMECHAIN] == 0.0: - ebunch.append(e) - gtmp.remove_edges_from(ebunch) - - if method == "louvain": - cluster = community.best_partition(gtmp) - - elif method == "mcl": - matrix = networkx.to_scipy_sparse_matrix(gtmp) - # run MCL with default parameters - result = markov_clustering.run_mcl(matrix.toarray()) - mcl_clust = markov_clustering.get_clusters(result) # get clusters - cluster = {} - node_key = list(graph.nodes.keys()) - for ic, c in enumerate(mcl_clust): - for node in c: - cluster[node_key[node]] = ic - - # get the colormap for the clsuter line - ncluster = np.max([v for _, v in cluster.items()]) + 1 - cmap = plt.cm.nipy_spectral - N = cmap.N - cmap = [cmap(i) for i in range(N)] - cmap = cmap[:: int(N / ncluster)] - cmap = "plasma" - - edge_trace_list, internal_edge_trace_list = [], [] - - node_connect = {} - for edge in graph.edges: - - same_chain = graph.edges[edge[0], edge[1]][Efeat.SAMECHAIN] - if same_chain == 1.0: # internal - trace = go.Scatter( - x=[], - y=[], - text=[], - mode="lines", - hoverinfo=None, - showlegend=False, - line=go.scatter.Line(color="rgb(110,110,110)", width=3), - ) - - if same_chain == 0.0: # interface - trace = go.Scatter( - x=[], - y=[], - text=[], - mode="lines", - hoverinfo=None, - showlegend=False, - line=go.scatter.Line(color="rgb(210,210,210)", width=1), - ) - else: - continue - - x0, y0 = graph.nodes[edge[0]]["pos2d"] - x1, y1 = graph.nodes[edge[1]]["pos2d"] - - trace["x"] += (x0, x1, None) - trace["y"] += (y0, y1, None) - - if same_chain == 1.0: # internal - internal_edge_trace_list.append(trace) - - if same_chain == 0.0: # interface - edge_trace_list.append(trace) - - for i in [0, 1]: - if edge[i] not in node_connect: - node_connect[edge[i]] = 1 - else: - node_connect[edge[i]] += 1 - node_trace_a = go.Scatter( - x=[], - y=[], - text=[], - mode="markers", - hoverinfo="text", - marker=dict( - color="rgb(227,28,28)", - size=[], - line=dict(color=[], width=4, colorscale=cmap), - ), - ) - # 'rgb(227,28,28)' - node_trace_b = go.Scatter( - x=[], - y=[], - text=[], - mode="markers", - hoverinfo="text", - marker=dict( - color="rgb(0,102,255)", - size=[], - line=dict(color=[], width=4, colorscale=cmap), - ), - ) - # 'rgb(0,102,255)' - node_trace = [node_trace_a, node_trace_b] - - for x, node in enumerate(graph.nodes): - - index = 0 - if Nfeat.CHAINID in graph.nodes[node]: - if x == 0: - first_chain = graph.nodes[node][Nfeat.CHAINID] - if graph.nodes[node][Nfeat.CHAINID] != first_chain: # This is not very pythonic, but somehow I'm stuck on how to do this without enumerating - index = 1 - - pos = graph.nodes[node]["pos2d"] - - node_trace[index]["x"] += (pos[0],) - node_trace[index]["y"] += (pos[1],) - node_trace[index]["text"] += ( - "[Clst:" + str(cluster[node]) + "] " + " ".join(node), - ) - - nc = node_connect[node] - node_trace[index]["marker"]["size"] += (5 + 15 * np.tanh(nc / 5),) - node_trace[index]["marker"]["line"]["color"] += (cluster[node],) - - fig = go.Figure( - data=[*internal_edge_trace_list, *edge_trace_list, *node_trace], - layout=go.Layout( - title="
tSNE connection graph", - titlefont=dict(size=16), - showlegend=False, - hovermode="closest", - margin=dict(b=20, l=5, r=5, t=40), - annotations=[ - dict( - text="", - showarrow=False, - xref="paper", - yref="paper", - x=0.005, - y=-0.002, - ) - ], - xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), - yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), - ), - ) - - if not disable_plot: - if iplot: - py.iplot(fig, filename=out) - else: - py.plot(fig) - - -def plotly_3d( # pylint: disable=too-many-locals, too-many-branches # noqa: MC0001 - graph: networkx.Graph, - out: Optional[str] = None, - offline: bool = False, - iplot: bool = True, - disable_plot: bool = False, -): - """Plots interface graph in 3D. - - Args: - graph (:class:`networkx.Graph`): The graph to be plotted. - out (str, optional): Defaults to None. - offline (bool, optional): Defaults to False. - iplot (bool, optional): Defaults to True. - disable_plot (bool, optional): Defaults to False. - """ - - if offline: - import plotly.offline as py # pylint: disable=import-outside-toplevel - else: - import chart_studio.plotly as py # pylint: disable=import-outside-toplevel - - edge_trace_list, internal_edge_trace_list = [], [] - node_connect = {} - - for edge in graph.edges: - - same_chain = graph.edges[edge[0], edge[1]][Efeat.SAMECHAIN] - if same_chain == 1.0: # internal - trace = go.Scatter3d( - x=[], - y=[], - z=[], - text=[], - mode="lines", - hoverinfo=None, - showlegend=False, - line=go.scatter3d.Line(color="rgb(110,110,110)", width=5), - ) - - elif same_chain == 0.0: # interface - trace = go.Scatter3d( - x=[], - y=[], - z=[], - text=[], - mode="lines", - hoverinfo=None, - showlegend=False, - line=go.scatter3d.Line(color="rgb(210,210,210)", width=2), - ) - else: - continue - - x0, y0, z0 = graph.nodes[edge[0]][Nfeat.POSITION] - x1, y1, z1 = graph.nodes[edge[1]][Nfeat.POSITION] - - trace["x"] += (x0, x1, None) - trace["y"] += (y0, y1, None) - trace["z"] += (z0, z1, None) - - if same_chain == 1.0: # internal - internal_edge_trace_list.append(trace) - - elif same_chain == 0.0: # interface - edge_trace_list.append(trace) - - for i in [0, 1]: - if edge[i] not in node_connect: - node_connect[edge[i]] = 1 - else: - node_connect[edge[i]] += 1 - - node_trace_a = go.Scatter3d( - x=[], - y=[], - z=[], - text=[], - mode="markers", - hoverinfo="text", - marker=dict( - color="rgb(227,28,28)", - size=[], - symbol="circle", - line=dict(color="rgb(50,50,50)", width=2), - ), - ) - - node_trace_b = go.Scatter3d( - x=[], - y=[], - z=[], - text=[], - mode="markers", - hoverinfo="text", - marker=dict( - color="rgb(0,102,255)", - size=[], - symbol="circle", - line=dict(color="rgb(50,50,50)", width=2), - ), - ) - - node_trace = [node_trace_a, node_trace_b] - - for x, node in enumerate(graph.nodes): - - index = 0 - if Nfeat.CHAINID in graph.nodes[node]: - if x == 0: - first_chain = graph.nodes[node][Nfeat.CHAINID] - if graph.nodes[node][Nfeat.CHAINID] != first_chain: # This is not very puythonic, but somehow I'm stuck on how to do this without enumerating - index = 1 - - pos = graph.nodes[node][Nfeat.POSITION] - - node_trace[index]["x"] += (pos[0],) - node_trace[index]["y"] += (pos[1],) - node_trace[index]["z"] += (pos[2],) - node_trace[index]["text"] += (" ".join(node),) - - nc = node_connect[node] - node_trace[index]["marker"]["size"] += (5 + 15 * np.tanh(nc / 5),) - - fig = go.Figure( - data=[*node_trace, *internal_edge_trace_list, *edge_trace_list], - layout=go.Layout( - title="
Connection graph", - titlefont=dict(size=16), - showlegend=False, - hovermode="closest", - margin=dict(b=20, l=5, r=5, t=40), - annotations=[ - dict( - text="", - showarrow=False, - xref="paper", - yref="paper", - x=0.005, - y=-0.002, - ) - ], - xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), - yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), - ), - ) - - if not disable_plot: - if iplot: - py.iplot(fig, filename=out) - else: - py.plot(fig) diff --git a/deeprank2/utils/graph.py b/deeprank2/utils/graph.py index 06412f518..01d533371 100644 --- a/deeprank2/utils/graph.py +++ b/deeprank2/utils/graph.py @@ -391,8 +391,8 @@ def build_residue_graph( # pylint: disable=too-many-locals graph = Graph(graph_id) for residue1_index, residue2_index in residue_index_pairs: - residue1 = residues[residue1_index] - residue2 = residues[residue2_index] + residue1: Residue = residues[residue1_index] + residue2: Residue = residues[residue2_index] if residue1 != residue2: @@ -402,8 +402,8 @@ def build_residue_graph( # pylint: disable=too-many-locals node2 = Node(residue2) edge = Edge(contact) - node1.features[Nfeat.POSITION] = get_residue_center(residue1) - node2.features[Nfeat.POSITION] = get_residue_center(residue2) + node1.features[Nfeat.POSITION] = residue1.get_center() + node2.features[Nfeat.POSITION] = residue2.get_center() # The same residue will be added multiple times as a node, # but the Graph class fixes this. diff --git a/docs/conf.py b/docs/conf.py index 2961b86e8..a3255c1f2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,16 +18,9 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. import os +import toml import sys -config = configparser.ConfigParser() -config.read('./../setup.cfg') -CONFIG = {} -for section in config.sections(): - CONFIG[section] = {} - for option in config.options(section): - CONFIG[section][option] = config.get(section, option) - autodoc_mock_imports = [ 'numpy', 'scipy', @@ -107,7 +100,9 @@ # built documents. # # The short X.Y version. -version = CONFIG['metadata']['version'] +with open('./../pyproject.toml', 'r') as f: + toml_file = toml.load(f) + version = toml_file['project']['version'] # The full version, including alpha/beta/rc tags. release = version diff --git a/docs/features.md b/docs/features.md index cf7b099c5..0fac1f047 100644 --- a/docs/features.md +++ b/docs/features.md @@ -10,7 +10,7 @@ Users can add custom features by creating a new module and placing it in `deepra ```python from typing import Optional -from deeprank2.molstruct.variant import SingleResidueVariant +from deeprank2.molstruct.residue import SingleResidueVariant from deeprank2.utils.graph import Graph diff --git a/docs/getstarted.md b/docs/getstarted.md index 987997c7e..be471b1df 100644 --- a/docs/getstarted.md +++ b/docs/getstarted.md @@ -411,7 +411,3 @@ fig.update_layout( title='Loss vs epochs' ) ``` - -## h5x support - -After installing `h5xplorer` (https://github.com/DeepRank/h5xplorer), you can execute the python file `deeprank2/h5x/h5x.py` to explorer the connection graph used by deeprank2. The context menu (right click on the name of the structure) allows to automatically plot the graphs using `plotly`. diff --git a/docs/installation.md b/docs/installation.md index 086e41d30..7d1b0af9b 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,53 +1,50 @@ # Installation +The package officially supports ubuntu-latest OS only, whose functioning is widely tested through the continuous integration workflows. + ## Dependencies -Before installing deeprank2 you need to install some dependencies. We advise to use a [conda environment](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) with Python >= 3.9 installed. - -* [msms](https://ssbio.readthedocs.io/en/latest/instructions/msms.html): `conda install -c bioconda msms`. *For MacOS with M1 chip users*: you can follow [these instructions](https://ssbio.readthedocs.io/en/latest/instructions/msms.html). -* [PyTorch](https://pytorch.org/): - * CPU only: `conda install pytorch==2.0.0 torchvision==0.15.0 torchaudio==2.0.0 cpuonly -c pytorch` - * if using GPU: `conda install pytorch==2.0.0 torchvision==0.15.0 torchaudio==2.0.0 pytorch-cuda=11.7 -c pytorch -c nvidia` -* [pytorch-geometric](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html): `conda install pyg -c pyg` -* [Dependencies for pytorch geometric from wheels](https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html#installation-from-wheels): `pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html`. - - Here, `${TORCH}` and `${CUDA}` should be replaced by the pytorch and CUDA versions installed. You can find these using: - - `python -c "import torch; print(torch.__version__)"` and - - `python -c "import torch; print(torch.version.cuda)"` - - if this returns `None`, use `cpu` instead - - For example: `pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cpu.html` -* Check if [DSSP 4](https://swift.cmbi.umcn.nl/gv/dssp/) is installed: `dssp --version` - * if this gives an error or shows a version lower than 4: - * on ubuntu 22.04 or newer: `sudo apt-get install dssp`. - * If the package cannot be located, first run `sudo apt-get update`. - * on older versions of ubuntu or on mac or lacking sudo priviliges: install from [here](https://github.com/pdb-redo/dssp), following the instructions listed. -* Check if gcc is installed: `gcc --version`. - * if this gives an error, run `sudo apt-get install gcc`. - -* For MacOS with M1 chip (otherwise ignore this): `conda install pytables` - -## DeepRank2 Package - -Once the dependencies installed, you can install the latest release of deeprank2 using the PyPi package manager: +Before installing deeprank2 you need to install some dependencies. We advise to use a [conda environment](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) with Python >= 3.10 installed. The following dependency installation instructions are updated as of 14/09/2023, but in case of issues during installation always refer to the official documentation which is linked below: + +* [MSMS](https://anaconda.org/bioconda/msms): `conda install -c bioconda msms`. + * [Here](https://ssbio.readthedocs.io/en/latest/instructions/msms.html) for MacOS with M1 chip users. +* [PyTorch](https://pytorch.org/get-started/locally/) + * We support torch's CPU library as well as CUDA. +* [PyG](https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html) and its optional dependencies: `torch_scatter`, `torch_sparse`, `torch_cluster`, `torch_spline_conv`. +* [DSSP 4](https://swift.cmbi.umcn.nl/gv/dssp/) + * Check if `dssp` is installed: `dssp --version`. If this gives an error or shows a version lower than 4: + * on ubuntu 22.04 or newer: `sudo apt-get install dssp`. If the package cannot be located, first run `sudo apt-get update`. + * on older versions of ubuntu or on mac or lacking sudo priviliges: install from [here](https://github.com/pdb-redo/dssp), following the instructions listed. +* [GCC](https://gcc.gnu.org/install/) + * Check if gcc is installed: `gcc --version`. If this gives an error, run `sudo apt-get install gcc`. +* For MacOS with M1 chip users only install [the conda version of PyTables](https://www.pytables.org/usersguide/installation.html). + + +## Deeprank2 Package + +Once the dependencies are installed, you can install the latest stable release of deeprank2 using the PyPi package manager: ```bash pip install deeprank2 ``` -Alternatively, get all the new developments by cloning the repo and installing the code with +Alternatively, get all the new developments by cloning the repo and installing the editable version of the package with: ```bash git clone https://github.com/DeepRank/deeprank2 cd deeprank2 -pip install -e ./ +pip install -e .'[test]' ``` +The `test` extra is optional, and can be used to install test-related dependencies useful during the development. + ## Test installation If you have installed the package from a cloned repository (second option above), you can check that all components were installed correctly, using pytest. The quick test should be sufficient to ensure that the software works, while the full test (a few minutes) will cover a much broader range of settings to ensure everything is correct. -First, install [pytest](https://docs.pytest.org/): `pip install pytest`. -Then run `pytest tests/test_integration.py` for the quick test or just `pytest` for the full test (expect a few minutes to run). +Run `pytest tests/test_integration.py` for the quick test or just `pytest` for the full test (expect a few minutes to run). ## Contributing + If you would like to contribute to the package in any way, please see [our guidelines](CONTRIBUTING.rst). diff --git a/docs/requirements.txt b/docs/requirements.txt index 8410c4c5a..3558cc33a 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,5 @@ sphinx==5.3.0 sphinx_rtd_theme==1.1.1 readthedocs-sphinx-search==0.1.1 -myst-parser \ No newline at end of file +myst-parser +toml \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b56b80cf8..f6dd8d8f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,84 @@ [build-system] -requires = ["setuptools", "wheel"] +requires = ["setuptools"] build-backend = "setuptools.build_meta" -[tool.pytest.ini_options] -testpaths = ["tests"] \ No newline at end of file +[project] +name = "deeprank2" +version = "2.0.0" +description = "DeepRank2 is an open-source deep learning framework for data mining of protein-protein interfaces or single-residue missense variants." +readme = "README.md" +requires-python = ">=3.10" +keywords = [ + "graph neural network", + "convolutional neural network", + "protein-protein interfaces", + "missense variants", + "deep learning", + "pytorch"] +authors = [ + {name = "Giulia Crocioni", email = "g.crocioni@esciencecenter.nl"}, + {name = "Coos Baakman", email = "coos.baakman@radboudumc.nl"}, + {name = "Dani Bodor", email = "d.bodor@esciencecenter.nl"}, + {name = "Daniel Rademaker"}, + {name = "Gayatri Ramakrishnan"}, + {name = "Sven van der Burg"}, + {name = "Li Xue"}, + {name = "Daniil Lepikhov"}, + ] +license = {text = "Apache-2.0 license"} +classifiers = [ + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Natural Language :: English", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10" +] +dependencies = [ + "tables >= 3.8.0", + "numpy >= 1.21.5", + "scipy >= 1.11.2", + "h5py >= 3.6.0", + "networkx >= 2.6.3", + "matplotlib >= 3.5.1", + "pdb2sql >= 0.5.1", + "scikit-learn >= 1.0.2", + "chart-studio >= 1.1.0", + "biopython >= 1.81", + "python-louvain >= 0.16", + "markov-clustering >= 0.0.6.dev0", + "tqdm >= 4.63.0", + "freesasa >= 2.1.0", + "tensorboard >= 0.9.0", + "protobuf >= 3.20.1" +] + +[project.optional-dependencies] +# development dependency groups +test = [ + "pytest >= 7.4.0", + "pylint <= 2.15.3", + "prospector[with_pyroma] <= 1.7.7", + "bump2version", + "coverage", + "pycodestyle", + "pytest-cov", + "pytest-runner", + "coveralls", +] +publishing = [ + "build", + "twine", + "wheel", +] + +[project.urls] +Documentation = "https://deeprank2.readthedocs.io/en/latest/?badge=latest" +Repository = "https://github.com/DeepRank/deeprank2" +Changelog = "https://github.com/DeepRank/deeprank2/blob/main/CHANGELOG.md" + +[tool.coverage.run] +branch = true +source = ["deeprank2"] + +[tool.setuptools] +packages = ["deeprank2"] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 2fb747254..000000000 --- a/setup.cfg +++ /dev/null @@ -1,89 +0,0 @@ -[metadata] -name = deeprank2 -authors = [ - { name = "Giulia Crocioni", email = "g.crocioni@esciencecenter.nl"}, - { name = "Coos Baakman", email = "coos.baakman@radboudumc.nl"}, - { name = "Dani Bodor", email = "d.bodor@esciencecenter.nl"}, - { name = "Daniel Rademaker"}, - { name = "Gayatri Ramakrishnan"}, - { name = "Sven van der Burg"}, - { name = "Li Xue"}, - { name = "Daniil Lepikhov"}, - ] - -classifiers = - Intended Audience :: Developers - License :: OSI Approved :: Apache Software License - Natural Language :: English - Programming Language :: Python :: 3 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 -description = deeprank2 allows to train graph neural networks to classify protein-protein interfaces with a greater flexibility for the user. -keywords = - graph neural network - protein-protein interface - pytorch -long_description = file: README.md -long_description_content_type = text/markdown -project_urls = - Bug Tracker = https://github.com/DeepRank/deeprank2/issues -url = https://github.com/DeepRank/deeprank2 -version = 2.0.0 - -[options] -zip_safe = False -include_package_data = True -packages = find_namespace: -python_requires = >= 3.9 -install_requires = - tables >= 3.7 - numpy >= 1.21.5 - scipy >= 1.11.2 - h5py >= 3.6.0 - networkx >= 2.6.3 - matplotlib >= 3.5.1 - pdb2sql >= 0.5.1 - scikit-learn >= 1.0.2 - chart-studio >= 1.1.0 - biopython >= 1.81 - python-louvain >= 0.16 - markov-clustering >= 0.0.6.dev0 - tqdm >= 4.63.0 - freesasa == 2.1.0 - tensorboard >= 2.9.0 - protobuf <= 3.20.1 - -[options.extras_require] -dev = - yapf -doc = - myst-parser - sphinx - sphinx_rtd_theme -test = - pytest - pylint <= 2.15.3 - prospector[with_pyroma] <= 1.7.7 - bump2version - coverage - pycodestyle - pytest-cov - pytest-runner - coveralls -publishing = - build - twine - wheel -tutorials = - notebook - pytest - plotly - jupyterlab - -[options.packages.find] -include = deeprank2, deeprank2.* -exclude = tests, tests.* - -[coverage:run] -branch = True -source = deeprank2 diff --git a/setup.py b/setup.py deleted file mode 100644 index 5b5b22c42..000000000 --- a/setup.py +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env python -from setuptools import setup - -# see setup.cfg -setup() \ No newline at end of file diff --git a/tests/features/__init__.py b/tests/features/__init__.py index 909abbf1e..06fa65a88 100644 --- a/tests/features/__init__.py +++ b/tests/features/__init__.py @@ -1,17 +1,17 @@ from pathlib import Path from typing import Optional, Tuple, Union +from pdb2sql import pdb2sql + from deeprank2.molstruct.aminoacid import AminoAcid -from deeprank2.molstruct.residue import Residue +from deeprank2.molstruct.residue import Residue, SingleResidueVariant from deeprank2.molstruct.structure import Chain, PDBStructure -from deeprank2.molstruct.variant import SingleResidueVariant from deeprank2.utils.buildgraph import (get_residue_contact_pairs, get_structure, get_surrounding_residues) from deeprank2.utils.graph import (Graph, build_atomic_graph, build_residue_graph) from deeprank2.utils.parsing.pssm import parse_pssm -from pdb2sql import pdb2sql def _get_residue(chain: Chain, number: int) -> Residue: diff --git a/tests/tools/test_visualization.py b/tests/tools/test_visualization.py deleted file mode 100644 index 00d720a3c..000000000 --- a/tests/tools/test_visualization.py +++ /dev/null @@ -1,34 +0,0 @@ -import unittest -import warnings - -import h5py -import numpy as np -from deeprank2.tools.visualization.embedding import manifold_embedding -from deeprank2.tools.visualization.plotting import (hdf5_to_networkx, - plotly_2d, plotly_3d) - - -class TestGraph(unittest.TestCase): - def setUp(self): - with h5py.File("tests/data/hdf5/1ATN_ppi.hdf5", "r") as f5: - self.networkx_graph = hdf5_to_networkx(f5["residue-ppi-1ATN_1w:A-B"]) - - self.pdb_path = "tests/data/pdb/1ATN/1ATN_1w.pdb" - self.reference_path = "tests/data/pdb/1ATN/1ATN_2w.pdb" - - def test_plot_2d(self): - with warnings.catch_warnings(record=FutureWarning): - plotly_2d(self.networkx_graph, "1ATN", disable_plot=True) - - def test_plot_3d(self): - plotly_3d(self.networkx_graph, "1ATN", disable_plot=True) - - def test_embedding(self): - pos = np.random.rand(110, 3) - for method in ["tsne", "spectral", "mds"]: - with warnings.catch_warnings(record=FutureWarning): - _ = manifold_embedding(pos, method=method) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/utils/test_graph.py b/tests/utils/test_graph.py index 44ad5671e..a413026b8 100644 --- a/tests/utils/test_graph.py +++ b/tests/utils/test_graph.py @@ -6,11 +6,6 @@ import h5py import numpy as np import pytest -from deeprank2.molstruct.pair import ResidueContact -from deeprank2.molstruct.residue import get_residue_center -from deeprank2.utils.buildgraph import get_structure -from deeprank2.utils.graph import Edge, Graph, Node -from deeprank2.utils.grid import Augmentation, GridSettings, MapMethod from pdb2sql import pdb2sql from pdb2sql.transform import get_rot_axis_angle @@ -18,6 +13,10 @@ from deeprank2.domain import gridstorage from deeprank2.domain import nodestorage as Nfeat from deeprank2.domain import targetstorage as Target +from deeprank2.molstruct.pair import ResidueContact +from deeprank2.utils.buildgraph import get_structure +from deeprank2.utils.graph import Edge, Graph, Node +from deeprank2.utils.grid import Augmentation, GridSettings, MapMethod entry_id = "test" node_feature_narray = "node_feat1" @@ -58,8 +57,8 @@ def graph(): node1.features[node_feature_singleton] = 0.0 # set node positions, for the grid mapping - node0.features[Nfeat.POSITION] = get_residue_center(residue0) - node1.features[Nfeat.POSITION] = get_residue_center(residue1) + node0.features[Nfeat.POSITION] = residue0.get_center() + node1.features[Nfeat.POSITION] = residue1.get_center() # init the graph graph = Graph(structure.id)