From cbb4098edf9649d4d31fd5921460e732e24922d9 Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Wed, 13 Mar 2024 15:07:44 +0100
Subject: [PATCH 01/14] minor formatting improvements

---
 deeprank2/features/exposure.py | 7 ++-----
 deeprank2/features/irc.py      | 6 +++++-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/deeprank2/features/exposure.py b/deeprank2/features/exposure.py
index 88d512ad7..b5d4aa71f 100644
--- a/deeprank2/features/exposure.py
+++ b/deeprank2/features/exposure.py
@@ -1,11 +1,9 @@
 import logging
 import signal
 import sys
-import warnings
 from typing import NoReturn
 
 import numpy as np
-from Bio.PDB.Atom import PDBConstructionWarning
 from Bio.PDB.HSExposure import HSExposureCA
 from Bio.PDB.PDBParser import PDBParser
 from Bio.PDB.ResidueDepth import get_surface, residue_depth
@@ -42,9 +40,8 @@ def add_features(  # noqa:D103
     signal.signal(signal.SIGINT, handle_sigint)
     signal.signal(signal.SIGALRM, handle_timeout)
 
-    with warnings.catch_warnings(record=PDBConstructionWarning):
-        parser = PDBParser()
-        structure = parser.get_structure("_tmp", pdb_path)
+    parser = PDBParser(QUIET=True)
+    structure = parser.get_structure("_tmp", pdb_path)
     bio_model = structure[0]
 
     try:
diff --git a/deeprank2/features/irc.py b/deeprank2/features/irc.py
index f1d0f4c07..249cbd1bd 100644
--- a/deeprank2/features/irc.py
+++ b/deeprank2/features/irc.py
@@ -40,7 +40,11 @@ def __init__(self, residue: tuple[str, int, str], polarity: Polarity):
         self.connections["all"] = []
 
 
-def get_IRCs(pdb_path: str, chains: list[str], cutoff: float = 5.5) -> dict[str, _ContactDensity]:
+def get_IRCs(
+    pdb_path: str,
+    chains: list[str],
+    cutoff: float = 5.5,
+) -> dict[str, _ContactDensity]:
     """Get all close contact residues from the opposite chain.
 
     Args:

From e09fe6b521a1d73314d0fd1496ea1f5ec979066d Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Wed, 13 Mar 2024 15:31:30 +0100
Subject: [PATCH 02/14] add dependencies to env files

---
 env/deeprank2-docker.yml | 5 +++++
 env/deeprank2.yml        | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/env/deeprank2-docker.yml b/env/deeprank2-docker.yml
index 440daf9c3..6f43c592e 100644
--- a/env/deeprank2-docker.yml
+++ b/env/deeprank2-docker.yml
@@ -41,5 +41,10 @@ dependencies:
   - ruff>=0.3.0
   - dill>=0.3.8
   - pyarrow>=15.0.0
+  - openmm>=8.0.0
+  - chardet>=4.0.0
   - pip:
+      - pdb-tools==2.5.0
+      - Pras-Server==1.2.1
+      - pdb2pqr==3.6.2
       - --requirement requirements-docker.txt
diff --git a/env/deeprank2.yml b/env/deeprank2.yml
index 6127fcb66..1c237710e 100644
--- a/env/deeprank2.yml
+++ b/env/deeprank2.yml
@@ -41,3 +41,9 @@ dependencies:
   - ruff>=0.3.0
   - dill>=0.3.8
   - pyarrow>=15.0.0
+  - openmm>=8.0.0
+  - chardet>=4.0.0
+  - pip:
+      - pdb-tools==2.5.0
+      - Pras-Server==1.2.1
+      - pdb2pqr==3.6.2

From 088c4edfb794a881fd4e1c2d728d505e5c62eb2d Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Thu, 14 Mar 2024 15:50:19 +0100
Subject: [PATCH 03/14] style: update ruff settings for TODOs

---
 pyproject.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 7978fdbfa..ab22e8330 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -87,6 +87,10 @@ ignore = [
     "S311",   # insecure random generators
     "PT011",  # pytest-raises-too-broad
     "SIM108", # Use ternary operator
+    # TODO formatting
+    "TD002",  # Missing TODO author
+    "TD003",  # Missing TODO link
+    "FIX002", # Consider resolving the issue instead 
     # Unwanted docstrings
     "D100", # Missing module docstring
     "D104", # Missing public package docstring

From 3368f72a3189056c2905941a75ed02005b7e14da Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Wed, 13 Mar 2024 12:28:40 +0100
Subject: [PATCH 04/14] mimic do_pdbtools from pdbprep

---
 deeprank2/tools/pdbprep/__init__.py   |  0
 deeprank2/tools/pdbprep/preprocess.py | 72 +++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 deeprank2/tools/pdbprep/__init__.py
 create mode 100644 deeprank2/tools/pdbprep/preprocess.py

diff --git a/deeprank2/tools/pdbprep/__init__.py b/deeprank2/tools/pdbprep/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/deeprank2/tools/pdbprep/preprocess.py b/deeprank2/tools/pdbprep/preprocess.py
new file mode 100644
index 000000000..0b5f4d2c4
--- /dev/null
+++ b/deeprank2/tools/pdbprep/preprocess.py
@@ -0,0 +1,72 @@
+from pdbtools import pdb_delresname, pdb_fixinsert, pdb_keepcoord, pdb_reatom, pdb_reres, pdb_rplresname, pdb_selaltloc, pdb_sort, pdb_tidy
+
+from deeprank2.domain.aminoacidlist import amino_acids_by_code, amino_acids_by_letter
+
+
+def _run_pdb_tools(
+    pdb_str: str,
+    rename_residues: dict[str, str] | None = None,
+) -> str:
+    """Preprocesses pdb files using pdb-tools (Bonvin lab).
+
+    Files undergo a number of pruning steps:
+        1. Scrape non-atomic records.
+        2. Scrape water molecules.
+        3. Replace non-standard residue names with their standard counterparts.
+            A default library is used for this, which can be replaced using the `rename_residues` argument.
+        4. Scrape lower occupancy atoms in case of alternate locations.
+            Note that in case of equal occupancy, the first record is always used.
+        5. Delete insertion codes and shift the residue numbering of downstream residues.
+        6. Sort records by chain and residues.
+        7. Renumber residues on each chain from 1.
+        8. Renumber atoms from 1.
+        9. Tidy up to somewhat adhere to pdb format specifications.
+
+    Args:
+        pdb_str: string representation of pdb file.
+        rename_residues: dictionary mapping non-standard residue names (keys) to their standard names. Defaults to:
+            {
+                "MSE": "MET",
+                "HIP": "HIS",
+                "HIE": "HIS",
+                "HID": "HIS",
+                "HSE": "HIS",
+                "HSD": "HIS",
+            }
+
+    Raises:
+        ValueError: if an invalid amino acid (3-letter or 1-letter) code is given as a value to rename_residues.
+
+    Returns:
+        str: updated pdb
+    """
+    if not rename_residues:
+        rename_residues = {
+            "MSE": "MET",
+            "HIP": "HIS",
+            "HIE": "HIS",
+            "HID": "HIS",
+            "HSE": "HIS",
+            "HSD": "HIS",
+        }
+    else:
+        for new_res in rename_residues.values():
+            if new_res not in amino_acids_by_code and new_res not in amino_acids_by_letter:
+                msg = f"{new_res} is not a valid amino-acid code."
+                raise ValueError(msg)
+
+    # sequentially run individual tools from pdb-tools
+    new_pdb = pdb_keepcoord.run(pdb_str)  # Scrape non-atomic records
+    new_pdb = pdb_delresname.run(new_pdb, ("HOH",))  # Scrape water molecules
+
+    for old, new in rename_residues.items():
+        new_pdb = pdb_rplresname.run(new_pdb, old, new)  # Replace non-standard residue names with their standard counterparts
+
+    new_pdb = pdb_selaltloc.run(new_pdb)  # Scrape lower occupancy atoms in case of alternate locations
+    new_pdb = pdb_fixinsert.run(new_pdb, [])  # Delete insertion codes and shift the residue numbering of downstream residues.
+    new_pdb = pdb_sort.run(new_pdb, "CR")  # Sort records by chain and residues
+    new_pdb = pdb_reres.run(new_pdb, 1)  # Renumber residues on each chain from 1
+    new_pdb = pdb_reatom.run(new_pdb, 1)  # Renumber atoms from 1
+    new_pdb = pdb_tidy.run(new_pdb)  # Tidy up to somewhat adhere to pdb format specifications
+
+    return "".join(list(new_pdb))

From ae462b42f7a180d1936acf45a0978ca42a0f4f8f Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Wed, 13 Mar 2024 15:07:56 +0100
Subject: [PATCH 05/14] start testing pdbtools

---
 tests/tools/test_pdbprep.py | 55 +++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 tests/tools/test_pdbprep.py

diff --git a/tests/tools/test_pdbprep.py b/tests/tools/test_pdbprep.py
new file mode 100644
index 000000000..f4f3dede5
--- /dev/null
+++ b/tests/tools/test_pdbprep.py
@@ -0,0 +1,55 @@
+from pathlib import Path
+
+import pytest
+
+from deeprank2.tools.pdbprep.preprocess import preprocess_pdbs
+
+
+@pytest.fixture(scope="module")
+def pdb_file() -> Path:
+    return Path("tests/data/pdb/3C8P/3C8P.pdb")
+    # with Path("tests/data/pdb/3C8P/3C8P.pdb").open('r') as f:
+    #     records =
+
+
+def test_pdbtools(pdb_file: Path) -> None:
+    processed = preprocess_pdbs(pdb_file).splitlines()
+
+    with pdb_file.open("r") as pdb:
+        original = pdb.read().splitlines()
+
+    resname_cols = slice(17, 20)
+    altloc_cols = slice(16, 17)  # noqa: F841
+    coordinate_cols = slice(31, 54)  # noqa: F841
+
+    # check that only atomic records were preserved
+    original_openings = [r.split()[0] for r in original]
+    processed_openings = [r.split()[0] for r in processed]
+
+    scraped_record_types = ("HEADER", "TITLE", "COMPND", "REMARK")
+    kept_record_types = ("ATOM",)
+
+    for record in scraped_record_types:
+        assert record in original_openings
+        assert record not in processed_openings
+
+    for record in kept_record_types:
+        assert record in original_openings
+        assert record in processed_openings
+
+    # check that no water remains
+    original_resnames = [r[resname_cols] for r in original]
+    processed_resnames = [r[resname_cols] for r in processed]
+    assert "HOH" in original_resnames
+    assert "HOH" not in processed_resnames
+
+    # untested (but confirmed in Jupyter notebook):
+    # - select altloc (this file)
+    # - residue renumbering (this file)
+    # - atom renumbering (file 1ak4)
+    # - replace residue names (with dummy names)
+    #
+    # untested and no good test data:
+    # - fix insertion codes
+    # - sort
+    # - tidy (not sure what it does)

From b57e15dab091adf33de715154f2397af8bb17f07 Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Wed, 13 Mar 2024 17:25:34 +0100
Subject: [PATCH 06/14] create function to run PRAS

PRAS only allows files as in- and output, so function is wrapped in a `TemporaryDirectory` to have variable in- and outputs
---
 deeprank2/tools/pdbprep/pras.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 deeprank2/tools/pdbprep/pras.py

diff --git a/deeprank2/tools/pdbprep/pras.py b/deeprank2/tools/pdbprep/pras.py
new file mode 100644
index 000000000..72b417b1e
--- /dev/null
+++ b/deeprank2/tools/pdbprep/pras.py
@@ -0,0 +1,26 @@
+from pathlib import Path
+from tempfile import TemporaryFile
+
+from Pras_Server.RunType import InitRunType as PRAS
+
+
+def add_missing_heavy_atoms(pdb_str: str) -> str:
+    """Add missing heavy atoms (usually many) using PRAS.
+
+    PRAS can only use files (no strings) as input and output, which is why this function is wrapped inside
+    TemporaryFile context managers.
+
+    Args:
+        pdb_str: string representation of pdb file.
+
+    Returns:
+        str: updated pdb
+    """
+    with TemporaryFile(mode="w", suffix="pdb", encoding="utf-8") as input_pdb, TemporaryFile(mode="r", encoding="utf-8") as output_pdb:
+        input_pdb.write(pdb_str)
+
+        fixing = PRAS(ofname=output_pdb)
+        fixing.fname = input_pdb
+        fixing.ProcessOther()  # write to specified filename
+
+        return output_pdb.read()

From afec0d4b6a10751326da6b1fbee87dc83119702c Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Thu, 14 Mar 2024 14:05:21 +0100
Subject: [PATCH 07/14] add pdb2pqr functionality

---
 deeprank2/tools/pdbprep/pras.py | 90 ++++++++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 1 deletion(-)

diff --git a/deeprank2/tools/pdbprep/pras.py b/deeprank2/tools/pdbprep/pras.py
index 72b417b1e..b66a78df9 100644
--- a/deeprank2/tools/pdbprep/pras.py
+++ b/deeprank2/tools/pdbprep/pras.py
@@ -1,6 +1,8 @@
-from pathlib import Path
+from dataclasses import dataclass
 from tempfile import TemporaryFile
 
+from pdb2pqr.config import FORCE_FIELDS
+from pdb2pqr.main import main_driver as pdb2pqr
 from Pras_Server.RunType import InitRunType as PRAS
 
 
@@ -24,3 +26,89 @@ def add_missing_heavy_atoms(pdb_str: str) -> str:
         fixing.ProcessOther()  # write to specified filename
 
         return output_pdb.read()
+
+
+def calculate_protonation_state(pdb_str: str, forcefield: str = "AMBER") -> str:
+    """Calculate the protonation states using PDB2PQR.
+
+    PDB2PQR can only use files (no strings) as input and output, which is why this function is wrapped inside
+    TemporaryFile context managers.
+
+    Args:
+        pdb_str: string representation of pdb file.
+        forcefield: Which forcefield to use. Defaults to "AMBER".
+
+    Returns:
+        str: updated pdb
+    """
+    with TemporaryFile(mode="w", suffix="pdb", encoding="utf-8") as input_pdb, TemporaryFile(mode="r", encoding="utf-8") as output_pdb:
+        input_pdb.write(pdb_str)
+
+        input_args = _Pdb2pqrArgs(input_pdb, output_pdb, forcefield)
+        pdb2pqr(input_args)
+
+        return output_pdb.read()
+
+
+@dataclass
+class _Pdb2pqrArgs:
+    """Input arguments to `main_driver` function of PDB2PQR.
+
+    These are usually given via CLI using argparse. All arguments, including those kept as default need to be given to
+    `main_driver` if called from script.
+    The argument given to `main_driver` is accessed via dot notation and is iterated over, which is why this is created
+    as a dataclass with an iterator.
+
+    Args:
+        input_path: path of the input file
+        output_pqr: path of the output file
+        ff: which forcefield to use
+        all other arguments should remain untouched.
+
+    Raises:
+        ValueError: if the forcefield is not recognized
+    """
+
+    input_path: str
+    output_pqr: str
+    ff: str = "AMBER"
+
+    # arguments set different from default
+    debump: bool = True
+    keep_chain: bool = True
+    log_level: str = "CRITICAL"
+
+    # arguments kept as default
+    ph: float = 7.0
+    assign_only: bool = False
+    clean: bool = False
+    userff: None = None
+    ffout: None = None
+    usernames: None = None
+    ligand: None = None
+    neutraln: bool = False
+    neutralc: bool = False
+    drop_water: bool = False
+    pka_method: None = None
+    opt: bool = True
+    include_header: bool = False
+    whitespace: bool = False
+    pdb_output: None = None
+    apbs_input: None = None
+
+    def __post_init__(self):
+        self._index = 0
+        if self.ff.lower() not in FORCE_FIELDS:
+            msg = f"Forcefield {self.ff} not recognized. Valid options: {FORCE_FIELDS}."
+            raise ValueError(msg)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        settings = vars(self)
+        if self._index < len(settings):
+            setting = list(settings)[self._index]
+            self._index += 1
+            return setting
+        raise StopIteration

From b21cd0ceb4abbd9ca14c1775328fc6a5d519ef29 Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Thu, 14 Mar 2024 14:05:36 +0100
Subject: [PATCH 08/14] rename file

---
 deeprank2/tools/pdbprep/{pras.py => pras_pdb2pqr.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename deeprank2/tools/pdbprep/{pras.py => pras_pdb2pqr.py} (100%)

diff --git a/deeprank2/tools/pdbprep/pras.py b/deeprank2/tools/pdbprep/pras_pdb2pqr.py
similarity index 100%
rename from deeprank2/tools/pdbprep/pras.py
rename to deeprank2/tools/pdbprep/pras_pdb2pqr.py

From d68b01145787656e2dcc6a8f2df2bea95b315bd8 Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Wed, 13 Mar 2024 17:46:47 +0100
Subject: [PATCH 09/14] temporarily add test notebooks

---
 tests/tools/_PRAS.ipynb      | 163 +++++++++++++
 tests/tools/_pdb_tools.ipynb | 436 +++++++++++++++++++++++++++++++++++
 2 files changed, 599 insertions(+)
 create mode 100644 tests/tools/_PRAS.ipynb
 create mode 100644 tests/tools/_pdb_tools.ipynb

diff --git a/tests/tools/_PRAS.ipynb b/tests/tools/_PRAS.ipynb
new file mode 100644
index 000000000..59e44af2a
--- /dev/null
+++ b/tests/tools/_PRAS.ipynb
@@ -0,0 +1,163 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Testing PRAS functionality\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ruff: noqa"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "fixed /home/dbodor/git/DeepRank/DeepRank2/tests/Untitled_3C8P_pdbprep_joao.pdb.pdb\n"
+     ]
+    }
+   ],
+   "source": [
+    "from Pras_Server.RunType import InitRunType\n",
+    "\n",
+    "fixing = InitRunType(\n",
+    "    rotamer=\"\",\n",
+    "    mutation=\"\",\n",
+    "    pdb_faspr=\"\",\n",
+    "    keep_ligand=\"\",\n",
+    "    chain_no=\"\",\n",
+    "    addh=False,\n",
+    "    ss=False,\n",
+    "    raman=False,\n",
+    "    ofname=False,\n",
+    "    pdbid=False,\n",
+    "    his_p=False,\n",
+    ")\n",
+    "\n",
+    "pdbs = [\"/home/dbodor/git/DeepRank/DeepRank2/tests/Untitled_3C8P_pdbprep_joao.pdb\"]\n",
+    "fixing.pdbid = pdbs\n",
+    "\n",
+    "fixing.ProcessOther()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "from pprint import pprint\n",
+    "\n",
+    "with Path(\"/home/dbodor/git/DeepRank/DeepRank2/tests/Untitled_3C8P_pdbprep_joao.pdb\").open(\"r\") as f:\n",
+    "    orig = f.read().splitlines()\n",
+    "\n",
+    "with Path(\"/home/dbodor/git/DeepRank/DeepRank2/tests/Untitled_3C8P_pras_local.pdb\").open(\"r\") as f:\n",
+    "    fixed = f.read().splitlines()[1:]\n",
+    "\n",
+    "with Path(\"/home/dbodor/git/DeepRank/DeepRank2/tests/Untitled_3C8P_pras_joao.pdb\").open(\"r\") as f:\n",
+    "    joao = f.read().splitlines()[1:]\n",
+    "\n",
+    "\n",
+    "# fixed[0].strip() == orig[0].strip()\n",
+    "# orig[0].strip()\n",
+    "\n",
+    "for i, x in enumerate(fixed):\n",
+    "    if x.strip() != joao[i].strip():\n",
+    "        print(i)\n",
+    "\n",
+    "# fixed[0].strip() == joao[0].strip()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import time\n",
+    "\n",
+    "from Pras_Server.RunType import InitRunType\n",
+    "\n",
+    "startTime = time.time()\n",
+    "out = [\n",
+    "    \"xxxx.pdb\",\n",
+    "    \"zzzz.pdb\",\n",
+    "    \"ssss.pdb\",\n",
+    "]  # size of this list MUST be equal to the total number of PDB structures in your working directory\n",
+    "k = 0\n",
+    "for i in os.listdir(os.getcwd()):\n",
+    "    if i.endswith(\".pdb\") or i.endswith(\".ent\"):\n",
+    "        try:\n",
+    "            out[k]\n",
+    "        except:\n",
+    "            print(\"Index error. Size of the name list is small\")\n",
+    "            sys.exit()\n",
+    "        fixing = InitRunType(\n",
+    "            rotamer=\"\",\n",
+    "            mutation=\"\",\n",
+    "            pdb_faspr=\"\",\n",
+    "            keep_ligand=\"\",\n",
+    "            chain_no=\"\",\n",
+    "            addh=False,\n",
+    "            ss=False,\n",
+    "            raman=False,\n",
+    "            ofname=out[k],\n",
+    "            pdbid=False,\n",
+    "            his_p=False,\n",
+    "        )\n",
+    "        fixing.fname = i\n",
+    "        fixing.ProcessOther()\n",
+    "        k += 1\n",
+    "print(f\"The program took {time.time() - startTime} second !\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "string = \"fsvfdskl\"\n",
+    "\n",
+    "with open(\"Output.txt\", \"w\") as text_file:\n",
+    "    print(string, file=text_file)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "DR2",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tests/tools/_pdb_tools.ipynb b/tests/tools/_pdb_tools.ipynb
new file mode 100644
index 000000000..0b9ddd335
--- /dev/null
+++ b/tests/tools/_pdb_tools.ipynb
@@ -0,0 +1,436 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Testing pbd-tools functionality\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ruff: noqa"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['101M/101M.pdb',\n",
+       " '1A0Z/1A0Z.pdb',\n",
+       " '1A6B/1A6B.pdb',\n",
+       " '1ATN/1ATN_1w.pdb',\n",
+       " '1ATN/1ATN_2w.pdb',\n",
+       " '1ATN/1ATN_3w.pdb',\n",
+       " '1ATN/1ATN_4w.pdb',\n",
+       " '1CRN/1CRN.pdb',\n",
+       " '1ak4/1ak4.pdb',\n",
+       " '2g98/pdb2g98.pdb',\n",
+       " '3C8P/3C8P.pdb',\n",
+       " '3MRC/3MRC.pdb',\n",
+       " '9api/9api.pdb']"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "all_pdb_files = \"\"\"\n",
+    "101M/101M.pdb\n",
+    "1A0Z/1A0Z.pdb\n",
+    "1A6B/1A6B.pdb\n",
+    "1ATN/1ATN_1w.pdb\n",
+    "1ATN/1ATN_2w.pdb\n",
+    "1ATN/1ATN_3w.pdb\n",
+    "1ATN/1ATN_4w.pdb\n",
+    "1CRN/1CRN.pdb\n",
+    "1ak4/1ak4.pdb\n",
+    "2g98/pdb2g98.pdb\n",
+    "3C8P/3C8P.pdb\n",
+    "3MRC/3MRC.pdb\n",
+    "9api/9api.pdb\n",
+    "\"\"\"\n",
+    "\n",
+    "all_pdb_files = all_pdb_files.splitlines()[1:]\n",
+    "base_folder = \"/home/dbodor/git/DeepRank/DeepRank2/tests/data/pdb//\"\n",
+    "\n",
+    "all_pdb_files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('3C8P/3C8P.pdb', 1488, 'A'),\n",
+       " ('3C8P/3C8P.pdb', 1490, 'B'),\n",
+       " ('3C8P/3C8P.pdb', 1492, 'A'),\n",
+       " ('3C8P/3C8P.pdb', 1494, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 566, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 567, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 570, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 571, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 572, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 573, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 691, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 692, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 695, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 696, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 697, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 698, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 699, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 700, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 701, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 702, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 703, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 704, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 748, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 749, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 752, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 753, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 754, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 755, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 756, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 757, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 805, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 806, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 809, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 810, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 811, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 812, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 813, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 814, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 815, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 816, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 817, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 818, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1060, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1061, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1064, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1065, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1066, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1067, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1068, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1069, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1070, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1071, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1093, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1094, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1097, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1098, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1099, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1100, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1101, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1102, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1103, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1104, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1105, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1106, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1107, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1108, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1109, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1110, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1340, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1341, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1344, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1345, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1346, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1347, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1348, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1349, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1515, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1516, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1519, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1520, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1521, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1522, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1523, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1524, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1525, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1526, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1527, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1528, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1530, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1531, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1534, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1535, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1536, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1537, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1538, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1539, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1540, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1541, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1542, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1543, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1544, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1545, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1546, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1547, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1548, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1549, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1630, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1631, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1634, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1635, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1636, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1637, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1638, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1639, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1640, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1641, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1642, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1643, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1786, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1787, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1790, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1791, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1792, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1793, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1794, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1795, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1796, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1797, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1798, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1799, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1944, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1945, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1948, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1949, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1950, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1951, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 1952, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 1953, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2129, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2130, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2133, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2134, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2135, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2136, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2137, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2138, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2139, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2140, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2141, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2142, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2305, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2306, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2309, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2310, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2311, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2312, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2411, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2412, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2415, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2416, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2417, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2418, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2419, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2420, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2421, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2422, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2423, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2424, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2425, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2426, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2427, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2428, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2523, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2524, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2527, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2528, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2529, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2530, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2531, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2532, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2533, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2534, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2535, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2536, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2747, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2748, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2751, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2752, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2753, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2754, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2755, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2756, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2757, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2758, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2759, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2760, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2761, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2762, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2921, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2922, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2925, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2926, 'B'),\n",
+       " ('3MRC/3MRC.pdb', 2927, 'A'),\n",
+       " ('3MRC/3MRC.pdb', 2928, 'B')]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tell_me = {}\n",
+    "\n",
+    "for file in all_pdb_files:\n",
+    "    with open(base_folder + file, \"r\") as f:\n",
+    "        records = f.read().splitlines()\n",
+    "\n",
+    "    for i, line in enumerate(records):\n",
+    "        if line.startswith((\"ATOM\", \"HETATOM\")):\n",
+    "            ins_code = slice(26, 27)\n",
+    "            if line[ins_code].strip():\n",
+    "                inscode: list = tell_me.setdefault(\"inscode\", [])\n",
+    "                inscode.append((file, i, line[ins_code]))\n",
+    "\n",
+    "            alt_loc = slice(16, 17)\n",
+    "            if line[alt_loc].strip():\n",
+    "                altloc: list = tell_me.setdefault(\"altloc\", [])\n",
+    "                altloc.append((file, i, line[alt_loc]))\n",
+    "\n",
+    "            resname = slice(17, 20)\n",
+    "            if line[resname] == \"HOH\":\n",
+    "                water: list = tell_me.setdefault(\"water\", [])\n",
+    "                water.append((file, i, line[resname]))\n",
+    "\n",
+    "tell_me[\"altloc\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from deeprank2.tools.pdbprep.preprocess import preprocess_pdbs\n",
+    "\n",
+    "fname = \"/home/dbodor/git/DeepRank/DeepRank2/tests/data/pdb/3C8P/3C8P.pdb\"\n",
+    "result = preprocess_pdbs(fname).splitlines()\n",
+    "\n",
+    "file_from_Joao = \"/home/dbodor/git/DeepRank/DeepRank2/tests/Untitled_3C8P_pdbprep.pdb\"\n",
+    "with open(file_from_Joao) as f:\n",
+    "    pdb_joao = f.read().splitlines()\n",
+    "\n",
+    "for i, x in enumerate(pdb_joao):\n",
+    "    if result[i] != x:\n",
+    "        print(i)\n",
+    "        print(result[i])\n",
+    "        print(x)\n",
+    "        break\n",
+    "\n",
+    "result == pdb_joao"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['ATOM      1  N   LYS A   1      30.878  18.880  20.164  1.00 23.18           N  ',\n",
+       " 'ATOM      2  CA  LYS A   1      30.166  17.611  20.202  1.00 19.48           C  ',\n",
+       " 'ATOM      3  C   LYS A   1      30.047  17.111  18.767  1.00 15.10           C  ',\n",
+       " 'ATOM      4  O   LYS A   1      29.712  17.858  17.855  1.00 17.10           O  ',\n",
+       " 'ATOM      5  CB  LYS A   1      28.769  17.677  20.811  1.00 22.38           C  ',\n",
+       " 'ATOM      6  CG  LYS A   1      28.057  16.336  20.971  1.00 25.07           C  ',\n",
+       " 'ATOM      7  CD  LYS A   1      26.866  16.306  21.952  1.00 26.33           C  ',\n",
+       " 'ATOM      8  CE  LYS A   1      26.623  14.928  22.552  1.00 32.03           C  ',\n",
+       " 'ATOM      9  NZ  LYS A   1      25.509  14.800  23.557  1.00 42.05           N  ',\n",
+       " 'ATOM     10  N   SER A   2      30.314  15.859  18.520  1.00 13.53           N  ']"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fname = \"/home/dbodor/git/DeepRank/DeepRank2/tests/data/pdb/3C8P/3C8P.pdb\"\n",
+    "result = preprocess_pdbs(fname).splitlines()\n",
+    "\n",
+    "result[:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'101.338  38.470  -1.931'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X = \"ATOM      1  N   VAL A   1     101.338  38.470  -1.931  1.00 53.52           N  \"\n",
+    "Y = \"01234567890123456789012345678901234567890123456789012345678901234567890123456789\"\n",
+    "Z = \"00000000001111111111222222222233333333334444444444555555555566666666667777777777\"\n",
+    "X[31:54]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "DR2",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From f1ef5a7bcadf05bdecaa0561f588f55b0c3f936c Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Thu, 14 Mar 2024 15:03:47 +0100
Subject: [PATCH 10/14] add detect protonation functions

---
 deeprank2/tools/pdbprep/detect_protonation.py | 275 ++++++++++++++++++
 1 file changed, 275 insertions(+)
 create mode 100644 deeprank2/tools/pdbprep/detect_protonation.py

diff --git a/deeprank2/tools/pdbprep/detect_protonation.py b/deeprank2/tools/pdbprep/detect_protonation.py
new file mode 100644
index 000000000..4925bd9cd
--- /dev/null
+++ b/deeprank2/tools/pdbprep/detect_protonation.py
@@ -0,0 +1,275 @@
+_MIN_ATOMS_TO_PROTONATE = 5  # TODO: why do we need this check?
+
+
+def detect_protonation_state(pdb_str: str) -> list[str | None]:
+    """Detect protonation states.
+
+    Args:
+        pdb_str (str): string representation of pdb file.
+
+    Returns:
+        list of protonation-specific residue names, which can be used to ... #TODO: finish this sentence
+    """
+    pdb_lines = pdb_str.splitlines()
+    protonable_residues = ("HIS", "ASP", "GLU", "CYS", "LYS")
+
+    # initialize
+    prev_resid = None
+    prev_resname = None
+    atoms_in_residue = set()
+    residues = []
+
+    for i, line in enumerate(pdb_lines):
+        if not line.startswith("ATOM"):
+            continue
+
+        resid = line[21:26]  # chain ID + res number
+        resname = line[17:20]
+        atom_name = line[12:16].strip()
+
+        if (resid != prev_resid and len(atoms_in_residue) >= _MIN_ATOMS_TO_PROTONATE) or i == len(pdb_lines):
+            if prev_resname in protonable_residues:
+                residues.append(_protonation_resname(prev_resname, atoms_in_residue))
+            else:
+                residues.append(None)
+            atoms_in_residue.clear()
+
+        atoms_in_residue.add(atom_name)
+        prev_resid = resid
+        prev_resname = resname
+
+    return residues
+
+
+def _protonation_resname(resname: str, atoms_in_residue: list[str]) -> str:  # noqa: PLR0911
+    """Returns alternate residue name based on protonation state."""
+    if resname == "HIS":
+        if "HD1" in atoms_in_residue and "HE2" in atoms_in_residue:
+            return "HIP"
+        if "HD1" in atoms_in_residue:
+            return "HID"
+        if "HE2" in atoms_in_residue:
+            return "HIE"
+        return "HIN"
+
+    if resname == "ASP" and ("HD2" in atoms_in_residue or "HD1" in atoms_in_residue):
+        return "ASN"
+
+    if resname == "GLU" and ("HE2" in atoms_in_residue or "HE1" in atoms_in_residue):
+        return "GLH"
+
+    if resname == "LYS" and not all(_a in atoms_in_residue for _a in ("HZ1", "HZ2", "HZ3")):
+        return "LYN"
+
+    if resname == "CYS" and "HG" not in atoms_in_residue:
+        return "CYX"
+
+    return resname
+
+
+# This module is modified from https://github.com/DeepRank/pdbprep/blob/main/detect_protonation.py,
+# written by João M.C. Teixeira (https://github.com/joaomcteixeira)
+# publishd under the following license:
+
+
+#                                  Apache License
+#                            Version 2.0, January 2004
+#                         http://www.apache.org/licenses/
+
+#    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+#    1. Definitions.
+
+#       "License" shall mean the terms and conditions for use, reproduction,
+#       and distribution as defined by Sections 1 through 9 of this document.
+
+#       "Licensor" shall mean the copyright owner or entity authorized by
+#       the copyright owner that is granting the License.
+
+#       "Legal Entity" shall mean the union of the acting entity and all
+#       other entities that control, are controlled by, or are under common
+#       control with that entity. For the purposes of this definition,
+#       "control" means (i) the power, direct or indirect, to cause the
+#       direction or management of such entity, whether by contract or
+#       otherwise, or (ii) ownership of fifty percent (50%) or more of the
+#       outstanding shares, or (iii) beneficial ownership of such entity.
+
+#       "You" (or "Your") shall mean an individual or Legal Entity
+#       exercising permissions granted by this License.
+
+#       "Source" form shall mean the preferred form for making modifications,
+#       including but not limited to software source code, documentation
+#       source, and configuration files.
+
+#       "Object" form shall mean any form resulting from mechanical
+#       transformation or translation of a Source form, including but
+#       not limited to compiled object code, generated documentation,
+#       and conversions to other media types.
+
+#       "Work" shall mean the work of authorship, whether in Source or
+#       Object form, made available under the License, as indicated by a
+#       copyright notice that is included in or attached to the work
+#       (an example is provided in the Appendix below).
+
+#       "Derivative Works" shall mean any work, whether in Source or Object
+#       form, that is based on (or derived from) the Work and for which the
+#       editorial revisions, annotations, elaborations, or other modifications
+#       represent, as a whole, an original work of authorship. For the purposes
+#       of this License, Derivative Works shall not include works that remain
+#       separable from, or merely link (or bind by name) to the interfaces of,
+#       the Work and Derivative Works thereof.
+
+#       "Contribution" shall mean any work of authorship, including
+#       the original version of the Work and any modifications or additions
+#       to that Work or Derivative Works thereof, that is intentionally
+#       submitted to Licensor for inclusion in the Work by the copyright owner
+#       or by an individual or Legal Entity authorized to submit on behalf of
+#       the copyright owner. For the purposes of this definition, "submitted"
+#       means any form of electronic, verbal, or written communication sent
+#       to the Licensor or its representatives, including but not limited to
+#       communication on electronic mailing lists, source code control systems,
+#       and issue tracking systems that are managed by, or on behalf of, the
+#       Licensor for the purpose of discussing and improving the Work, but
+#       excluding communication that is conspicuously marked or otherwise
+#       designated in writing by the copyright owner as "Not a Contribution."
+
+#       "Contributor" shall mean Licensor and any individual or Legal Entity
+#       on behalf of whom a Contribution has been received by Licensor and
+#       subsequently incorporated within the Work.
+
+#    2. Grant of Copyright License. Subject to the terms and conditions of
+#       this License, each Contributor hereby grants to You a perpetual,
+#       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+#       copyright license to reproduce, prepare Derivative Works of,
+#       publicly display, publicly perform, sublicense, and distribute the
+#       Work and such Derivative Works in Source or Object form.
+
+#    3. Grant of Patent License. Subject to the terms and conditions of
+#       this License, each Contributor hereby grants to You a perpetual,
+#       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+#       (except as stated in this section) patent license to make, have made,
+#       use, offer to sell, sell, import, and otherwise transfer the Work,
+#       where such license applies only to those patent claims licensable
+#       by such Contributor that are necessarily infringed by their
+#       Contribution(s) alone or by combination of their Contribution(s)
+#       with the Work to which such Contribution(s) was submitted. If You
+#       institute patent litigation against any entity (including a
+#       cross-claim or counterclaim in a lawsuit) alleging that the Work
+#       or a Contribution incorporated within the Work constitutes direct
+#       or contributory patent infringement, then any patent licenses
+#       granted to You under this License for that Work shall terminate
+#       as of the date such litigation is filed.
+
+#    4. Redistribution. You may reproduce and distribute copies of the
+#       Work or Derivative Works thereof in any medium, with or without
+#       modifications, and in Source or Object form, provided that You
+#       meet the following conditions:
+
+#       (a) You must give any other recipients of the Work or
+#           Derivative Works a copy of this License; and
+
+#       (b) You must cause any modified files to carry prominent notices
+#           stating that You changed the files; and
+
+#       (c) You must retain, in the Source form of any Derivative Works
+#           that You distribute, all copyright, patent, trademark, and
+#           attribution notices from the Source form of the Work,
+#           excluding those notices that do not pertain to any part of
+#           the Derivative Works; and
+
+#       (d) If the Work includes a "NOTICE" text file as part of its
+#           distribution, then any Derivative Works that You distribute must
+#           include a readable copy of the attribution notices contained
+#           within such NOTICE file, excluding those notices that do not
+#           pertain to any part of the Derivative Works, in at least one
+#           of the following places: within a NOTICE text file distributed
+#           as part of the Derivative Works; within the Source form or
+#           documentation, if provided along with the Derivative Works; or,
+#           within a display generated by the Derivative Works, if and
+#           wherever such third-party notices normally appear. The contents
+#           of the NOTICE file are for informational purposes only and
+#           do not modify the License. You may add Your own attribution
+#           notices within Derivative Works that You distribute, alongside
+#           or as an addendum to the NOTICE text from the Work, provided
+#           that such additional attribution notices cannot be construed
+#           as modifying the License.
+
+#       You may add Your own copyright statement to Your modifications and
+#       may provide additional or different license terms and conditions
+#       for use, reproduction, or distribution of Your modifications, or
+#       for any such Derivative Works as a whole, provided Your use,
+#       reproduction, and distribution of the Work otherwise complies with
+#       the conditions stated in this License.
+
+#    5. Submission of Contributions. Unless You explicitly state otherwise,
+#       any Contribution intentionally submitted for inclusion in the Work
+#       by You to the Licensor shall be under the terms and conditions of
+#       this License, without any additional terms or conditions.
+#       Notwithstanding the above, nothing herein shall supersede or modify
+#       the terms of any separate license agreement you may have executed
+#       with Licensor regarding such Contributions.
+
+#    6. Trademarks. This License does not grant permission to use the trade
+#       names, trademarks, service marks, or product names of the Licensor,
+#       except as required for reasonable and customary use in describing the
+#       origin of the Work and reproducing the content of the NOTICE file.
+
+#    7. Disclaimer of Warranty. Unless required by applicable law or
+#       agreed to in writing, Licensor provides the Work (and each
+#       Contributor provides its Contributions) on an "AS IS" BASIS,
+#       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+#       implied, including, without limitation, any warranties or conditions
+#       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+#       PARTICULAR PURPOSE. You are solely responsible for determining the
+#       appropriateness of using or redistributing the Work and assume any
+#       risks associated with Your exercise of permissions under this License.
+
+#    8. Limitation of Liability. In no event and under no legal theory,
+#       whether in tort (including negligence), contract, or otherwise,
+#       unless required by applicable law (such as deliberate and grossly
+#       negligent acts) or agreed to in writing, shall any Contributor be
+#       liable to You for damages, including any direct, indirect, special,
+#       incidental, or consequential damages of any character arising as a
+#       result of this License or out of the use or inability to use the
+#       Work (including but not limited to damages for loss of goodwill,
+#       work stoppage, computer failure or malfunction, or any and all
+#       other commercial damages or losses), even if such Contributor
+#       has been advised of the possibility of such damages.
+
+#    9. Accepting Warranty or Additional Liability. While redistributing
+#       the Work or Derivative Works thereof, You may choose to offer,
+#       and charge a fee for, acceptance of support, warranty, indemnity,
+#       or other liability obligations and/or rights consistent with this
+#       License. However, in accepting such obligations, You may act only
+#       on Your own behalf and on Your sole responsibility, not on behalf
+#       of any other Contributor, and only if You agree to indemnify,
+#       defend, and hold each Contributor harmless for any liability
+#       incurred by, or claims asserted against, such Contributor by reason
+#       of your accepting any such warranty or additional liability.
+
+#    END OF TERMS AND CONDITIONS
+
+#    APPENDIX: How to apply the Apache License to your work.
+
+#       To apply the Apache License to your work, attach the following
+#       boilerplate notice, with the fields enclosed by brackets "[]"
+#       replaced with your own identifying information. (Don't include
+#       the brackets!)  The text should be enclosed in the appropriate
+#       comment syntax for the file format. We also recommend that a
+#       file or class name and description of purpose be included on the
+#       same "printed page" as the copyright notice for easier
+#       identification within third-party archives.
+
+#    Copyright [yyyy] [name of copyright owner]
+
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+
+#        http://www.apache.org/licenses/LICENSE-2.0
+
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.

From 16dc41cfb6e9e3b5dcadfd8322591a4af5e81592 Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Thu, 14 Mar 2024 18:56:58 +0100
Subject: [PATCH 11/14] add `add_hydrogens` functions

---
 deeprank2/tools/pdbprep/detect_protonation.py | 70 ++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/deeprank2/tools/pdbprep/detect_protonation.py b/deeprank2/tools/pdbprep/detect_protonation.py
index 4925bd9cd..3018d9910 100644
--- a/deeprank2/tools/pdbprep/detect_protonation.py
+++ b/deeprank2/tools/pdbprep/detect_protonation.py
@@ -1,4 +1,71 @@
+from tempfile import TemporaryFile
+
+from openmm import LangevinIntegrator, unit
+from openmm import app as mmapp
+
 _MIN_ATOMS_TO_PROTONATE = 5  # TODO: why do we need this check?
+_TEMPERATURE = 310
+
+
+def add_hydrogens(
+    pdb_str: str,
+    protonated_sequence: list[str | None],
+    max_iterations: int = 100,
+    random_seed: int = 917,
+) -> str:
+    """Add hydrogens."""
+    with TemporaryFile(mode="w", suffix="pdb", encoding="utf-8") as input_pdb, TemporaryFile(mode="r", encoding="utf-8") as output_pdb:
+        input_pdb.write(pdb_str)
+
+        # PARAMETERS
+        forcefield_model = "amber14-all.xml"  #'charmm36.xml'
+        water_model = "amber14/tip3p.xml"  #'charmm36/tip3p-pme-b.xml'
+        platform_properties = {"Threads": str(1)}
+
+        # PREPARES MODEL
+        forcefield = mmapp.ForceField(forcefield_model, water_model)
+        structure = mmapp.PDBFile(input_pdb)
+
+        model = mmapp.Modeller(structure.topology, structure.positions)
+        model.addHydrogens(forcefield=forcefield, variants=protonated_sequence)
+
+        structure.positions = model.positions
+        structure.topology = model.topology
+
+        system = forcefield.createSystem(structure.topology)
+
+        integrator = LangevinIntegrator(
+            _TEMPERATURE * unit.kelvin,
+            1.0 / unit.picosecond,
+            2.0 * unit.femtosecond,
+        )
+
+        integrator.setRandomNumberSeed(random_seed)
+        integrator.setConstraintTolerance(0.00001)
+
+        simulation = mmapp.Simulation(
+            structure.topology,
+            system,
+            integrator,
+            platformProperties=platform_properties,
+        )
+
+        context = simulation.context
+        context.setPositions(model.positions)
+
+        state = context.getState(getEnergy=True)
+        ini_ene = state.getPotentialEnergy().value_in_unit(unit.kilocalorie_per_mole)  # noqa:F841 TODO: check if this line is needed
+        simulation.minimizeEnergy(maxIterations=max_iterations)
+        structure.positions = context.getState(getPositions=True).getPositions()
+
+        # TODO: check whether these lines need to be repeated or whether that's a typo.
+        state = context.getState(getEnergy=True)
+        simulation.minimizeEnergy(maxIterations=max_iterations)
+        structure.positions = context.getState(getPositions=True).getPositions()
+
+        mmapp.PDBFile.writeFile(structure.topology, structure.positions, output_pdb)
+
+        return output_pdb.read()
 
 
 def detect_protonation_state(pdb_str: str) -> list[str | None]:
@@ -67,7 +134,8 @@ def _protonation_resname(resname: str, atoms_in_residue: list[str]) -> str:  # n
     return resname
 
 
-# This module is modified from https://github.com/DeepRank/pdbprep/blob/main/detect_protonation.py,
+# This module is modified from https://github.com/DeepRank/pdbprep/blob/main/
+# original modules names: detect_protonation.py and add_hydrogens.py),
 # written by João M.C. Teixeira (https://github.com/joaomcteixeira)
 # publishd under the following license:
 

From 49f5eda9a5b62af164aaa350d0d08d88a4e5359b Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Thu, 14 Mar 2024 19:21:04 +0100
Subject: [PATCH 12/14] add PRAS to preprocess file

---
 deeprank2/tools/pdbprep/preprocess.py | 65 +++++++++++++++++++++------
 1 file changed, 52 insertions(+), 13 deletions(-)

diff --git a/deeprank2/tools/pdbprep/preprocess.py b/deeprank2/tools/pdbprep/preprocess.py
index 0b5f4d2c4..c0cbf68d0 100644
--- a/deeprank2/tools/pdbprep/preprocess.py
+++ b/deeprank2/tools/pdbprep/preprocess.py
@@ -1,8 +1,40 @@
+from pathlib import Path
+from tempfile import TemporaryFile
+
 from pdbtools import pdb_delresname, pdb_fixinsert, pdb_keepcoord, pdb_reatom, pdb_reres, pdb_rplresname, pdb_selaltloc, pdb_sort, pdb_tidy
+from Pras_Server.RunType import InitRunType as PRAS
 
 from deeprank2.domain.aminoacidlist import amino_acids_by_code, amino_acids_by_letter
 
 
+def preprocess_pdbs(
+    pdb_path: str | Path,
+    rename_residues: dict[str, str] | None = None,
+) -> str:
+    """Preprocess a pdb file for adding/fixing hydrogens.
+
+    Args:
+        pdb_path: Path of pdb file to preprocess.
+        rename_residues: Dictionary mapping of non-standard residue names (keys) to their standard names.
+            Defaults to:
+            {
+                "MSE": "MET",
+                "HIP": "HIS",
+                "HIE": "HIS",
+                "HID": "HIS",
+                "HSE": "HIS",
+                "HSD": "HIS",
+            }
+    """
+    with Path(pdb_path).open("r") as f:
+        pdb_str = f.read()
+
+    pdb_str = _run_pdb_tools(pdb_str, rename_residues)
+    pdb_str = _add_missing_heavy_atoms(pdb_str)
+
+    return pdb_str  # noqa: RET504
+
+
 def _run_pdb_tools(
     pdb_str: str,
     rename_residues: dict[str, str] | None = None,
@@ -22,23 +54,11 @@ def _run_pdb_tools(
         8. Renumber atoms from 1.
         9. Tidy up to somewhat adhere to pdb format specifications.
 
-    Args:
-        pdb_str: string representation of pdb file.
-        rename_residues: dictionary mapping non-standard residue names (keys) to their standard names. Defaults to:
-            {
-                "MSE": "MET",
-                "HIP": "HIS",
-                "HIE": "HIS",
-                "HID": "HIS",
-                "HSE": "HIS",
-                "HSD": "HIS",
-            }
-
     Raises:
         ValueError: if an invalid amino acid (3-letter or 1-letter) code is given as a value to rename_residues.
 
     Returns:
-        str: updated pdb
+        str: Updated pdb
     """
     if not rename_residues:
         rename_residues = {
@@ -70,3 +90,22 @@ def _run_pdb_tools(
     new_pdb = pdb_tidy.run(new_pdb)  # Tidy up to somewhat adhere to pdb format specifications
 
     return "".join(list(new_pdb))
+
+
+def _add_missing_heavy_atoms(pdb_str: str) -> str:
+    """Add missing heavy atoms (usually many) using PRAS.
+
+    PRAS can only use files (no strings) as input and output, which is why this function is wrapped inside
+    TemporaryFile context managers.
+
+    Returns:
+        str: Updated pdb
+    """
+    with TemporaryFile(mode="w", suffix="pdb", encoding="utf-8") as input_pdb, TemporaryFile(mode="r", encoding="utf-8") as output_pdb:
+        input_pdb.write(pdb_str)
+
+        fixing = PRAS(ofname=output_pdb)
+        fixing.fname = input_pdb
+        fixing.ProcessOther()  # write to specified filename
+
+        return output_pdb.read()

From 9da6ce479de2281a048c0e4cae448a878ec9bf7a Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Thu, 14 Mar 2024 23:22:37 +0100
Subject: [PATCH 13/14] move pdb2pqr function into detect protonation module

---
 deeprank2/tools/pdbprep/detect_protonation.py | 135 +++++++++++++++---
 deeprank2/tools/pdbprep/pras_pdb2pqr.py       | 114 ---------------
 2 files changed, 115 insertions(+), 134 deletions(-)
 delete mode 100644 deeprank2/tools/pdbprep/pras_pdb2pqr.py

diff --git a/deeprank2/tools/pdbprep/detect_protonation.py b/deeprank2/tools/pdbprep/detect_protonation.py
index 3018d9910..40733742f 100644
--- a/deeprank2/tools/pdbprep/detect_protonation.py
+++ b/deeprank2/tools/pdbprep/detect_protonation.py
@@ -1,7 +1,10 @@
+from dataclasses import dataclass
 from tempfile import TemporaryFile
 
 from openmm import LangevinIntegrator, unit
 from openmm import app as mmapp
+from pdb2pqr.config import FORCE_FIELDS
+from pdb2pqr.main import main_driver as pdb2pqr
 
 _MIN_ATOMS_TO_PROTONATE = 5  # TODO: why do we need this check?
 _TEMPERATURE = 310
@@ -9,12 +12,20 @@
 
 def add_hydrogens(
     pdb_str: str,
-    protonated_sequence: list[str | None],
     max_iterations: int = 100,
+    constraint_tolerance: float = 1e-05,
     random_seed: int = 917,
 ) -> str:
-    """Add hydrogens."""
-    with TemporaryFile(mode="w", suffix="pdb", encoding="utf-8") as input_pdb, TemporaryFile(mode="r", encoding="utf-8") as output_pdb:
+    """Add hydrogens to pdb file.
+
+    Args:
+        pdb_str: String representation of pdb file, preprocessed using deeprank2.tools.pdbprep.preprocess.
+        max_iterations: Maximum number of iterations to perform during energy minimization. Defaults to 100.
+        constraint_tolerance: Distance tolerance of LangevinIntegrator within which constraints are maintained, as a
+            fraction of the constrained distance. Defaults to  1e-05.
+        random_seed: Random seed for LangevinIntegrator.
+    """
+    with TemporaryFile(mode="w", suffix="pdb") as input_pdb, TemporaryFile(mode="r") as output_pdb:
         input_pdb.write(pdb_str)
 
         # PARAMETERS
@@ -25,6 +36,7 @@ def add_hydrogens(
         # PREPARES MODEL
         forcefield = mmapp.ForceField(forcefield_model, water_model)
         structure = mmapp.PDBFile(input_pdb)
+        protonated_sequence = _detect_protonation_state(pdb_str)
 
         model = mmapp.Modeller(structure.topology, structure.positions)
         model.addHydrogens(forcefield=forcefield, variants=protonated_sequence)
@@ -35,13 +47,13 @@ def add_hydrogens(
         system = forcefield.createSystem(structure.topology)
 
         integrator = LangevinIntegrator(
-            _TEMPERATURE * unit.kelvin,
-            1.0 / unit.picosecond,
-            2.0 * unit.femtosecond,
+            temperature=_TEMPERATURE * unit.kelvin,
+            frictionCoeff=1.0 / unit.picosecond,
+            stepSize=2.0 * unit.femtosecond,
         )
 
         integrator.setRandomNumberSeed(random_seed)
-        integrator.setConstraintTolerance(0.00001)
+        integrator.setConstraintTolerance(constraint_tolerance)
 
         simulation = mmapp.Simulation(
             structure.topology,
@@ -68,15 +80,10 @@ def add_hydrogens(
         return output_pdb.read()
 
 
-def detect_protonation_state(pdb_str: str) -> list[str | None]:
-    """Detect protonation states.
+def _detect_protonation_state(pdb_str: str) -> list[str | None]:
+    """Detect protonation states and return them as a sequence of alternative residue names."""
+    _calculate_protonation_state(pdb_str)
 
-    Args:
-        pdb_str (str): string representation of pdb file.
-
-    Returns:
-        list of protonation-specific residue names, which can be used to ... #TODO: finish this sentence
-    """
     pdb_lines = pdb_str.splitlines()
     protonable_residues = ("HIS", "ASP", "GLU", "CYS", "LYS")
 
@@ -108,8 +115,11 @@ def detect_protonation_state(pdb_str: str) -> list[str | None]:
     return residues
 
 
-def _protonation_resname(resname: str, atoms_in_residue: list[str]) -> str:  # noqa: PLR0911
-    """Returns alternate residue name based on protonation state."""
+def _protonation_resname(  # noqa:PLR0911
+    resname: str,
+    atoms_in_residue: list[str],
+) -> str:
+    """Return alternate residue name based on protonation state."""
     if resname == "HIS":
         if "HD1" in atoms_in_residue and "HE2" in atoms_in_residue:
             return "HIP"
@@ -134,12 +144,97 @@ def _protonation_resname(resname: str, atoms_in_residue: list[str]) -> str:  # n
     return resname
 
 
-# This module is modified from https://github.com/DeepRank/pdbprep/blob/main/
-# original modules names: detect_protonation.py and add_hydrogens.py),
+def _calculate_protonation_state(
+    pdb_str: str,
+    forcefield: str = "AMBER",
+) -> str:
+    """Calculate the protonation states using PDB2PQR.
+
+    PDB2PQR can only use files (no strings) as input and output, which is why this function is wrapped inside
+    TemporaryFile context managers.
+    """
+    with TemporaryFile(mode="w", suffix="pdb") as input_pdb, TemporaryFile(mode="r") as output_pdb:
+        input_pdb.write(pdb_str)
+
+        input_args = _Pdb2pqrArgs(input_pdb, output_pdb, forcefield)
+        pdb2pqr(input_args)
+
+        return output_pdb.read()
+
+
+@dataclass
+class _Pdb2pqrArgs:
+    """Input arguments to `main_driver` function of PDB2PQR.
+
+    These are usually given via CLI using argparse. All arguments, including those kept as default need to be given to
+    `main_driver` if called from script.
+    The argument given to `main_driver` is accessed via dot notation and is iterated over, which is why this is created
+    as a dataclass with an iterator.
+
+    Args*:
+        input_path: Input file path.
+        output_pqr: Output file path.
+        ff: Name of the selected forcefield.
+
+        *all other arguments should remain untouched.
+
+    Raises:
+        ValueError: if the forcefield is not recognized
+    """
+
+    input_path: str
+    output_pqr: str
+    ff: str = "AMBER"
+
+    # arguments set different from default
+    debump: bool = True
+    keep_chain: bool = True
+    log_level: str = "CRITICAL"
+
+    # arguments kept as default
+    ph: float = 7.0
+    assign_only: bool = False
+    clean: bool = False
+    userff: None = None
+    ffout: None = None
+    usernames: None = None
+    ligand: None = None
+    neutraln: bool = False
+    neutralc: bool = False
+    drop_water: bool = False
+    pka_method: None = None
+    opt: bool = True
+    include_header: bool = False
+    whitespace: bool = False
+    pdb_output: None = None
+    apbs_input: None = None
+
+    def __post_init__(self):
+        self._index = 0
+        if self.ff.lower() not in FORCE_FIELDS:
+            msg = f"Forcefield {self.ff} not recognized. Valid options: {FORCE_FIELDS}."
+            raise ValueError(msg)
+        if self.ff.lower() != "amber":
+            msg = f"Forcefield given as {self.ff}. Currently only AMBER forcefield is implemented."
+            raise NotImplementedError(msg)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        settings = vars(self)
+        if self._index < len(settings):
+            setting = list(settings)[self._index]
+            self._index += 1
+            return setting
+        raise StopIteration
+
+
+# Part of this module is modified from https://github.com/DeepRank/pdbprep/blob/main/
+# original module names: detect_protonation.py and add_hydrogens.py),
 # written by João M.C. Teixeira (https://github.com/joaomcteixeira)
 # publishd under the following license:
 
-
 #                                  Apache License
 #                            Version 2.0, January 2004
 #                         http://www.apache.org/licenses/
diff --git a/deeprank2/tools/pdbprep/pras_pdb2pqr.py b/deeprank2/tools/pdbprep/pras_pdb2pqr.py
deleted file mode 100644
index b66a78df9..000000000
--- a/deeprank2/tools/pdbprep/pras_pdb2pqr.py
+++ /dev/null
@@ -1,114 +0,0 @@
-from dataclasses import dataclass
-from tempfile import TemporaryFile
-
-from pdb2pqr.config import FORCE_FIELDS
-from pdb2pqr.main import main_driver as pdb2pqr
-from Pras_Server.RunType import InitRunType as PRAS
-
-
-def add_missing_heavy_atoms(pdb_str: str) -> str:
-    """Add missing heavy atoms (usually many) using PRAS.
-
-    PRAS can only use files (no strings) as input and output, which is why this function is wrapped inside
-    TemporaryFile context managers.
-
-    Args:
-        pdb_str: string representation of pdb file.
-
-    Returns:
-        str: updated pdb
-    """
-    with TemporaryFile(mode="w", suffix="pdb", encoding="utf-8") as input_pdb, TemporaryFile(mode="r", encoding="utf-8") as output_pdb:
-        input_pdb.write(pdb_str)
-
-        fixing = PRAS(ofname=output_pdb)
-        fixing.fname = input_pdb
-        fixing.ProcessOther()  # write to specified filename
-
-        return output_pdb.read()
-
-
-def calculate_protonation_state(pdb_str: str, forcefield: str = "AMBER") -> str:
-    """Calculate the protonation states using PDB2PQR.
-
-    PDB2PQR can only use files (no strings) as input and output, which is why this function is wrapped inside
-    TemporaryFile context managers.
-
-    Args:
-        pdb_str: string representation of pdb file.
-        forcefield: Which forcefield to use. Defaults to "AMBER".
-
-    Returns:
-        str: updated pdb
-    """
-    with TemporaryFile(mode="w", suffix="pdb", encoding="utf-8") as input_pdb, TemporaryFile(mode="r", encoding="utf-8") as output_pdb:
-        input_pdb.write(pdb_str)
-
-        input_args = _Pdb2pqrArgs(input_pdb, output_pdb, forcefield)
-        pdb2pqr(input_args)
-
-        return output_pdb.read()
-
-
-@dataclass
-class _Pdb2pqrArgs:
-    """Input arguments to `main_driver` function of PDB2PQR.
-
-    These are usually given via CLI using argparse. All arguments, including those kept as default need to be given to
-    `main_driver` if called from script.
-    The argument given to `main_driver` is accessed via dot notation and is iterated over, which is why this is created
-    as a dataclass with an iterator.
-
-    Args:
-        input_path: path of the input file
-        output_pqr: path of the output file
-        ff: which forcefield to use
-        all other arguments should remain untouched.
-
-    Raises:
-        ValueError: if the forcefield is not recognized
-    """
-
-    input_path: str
-    output_pqr: str
-    ff: str = "AMBER"
-
-    # arguments set different from default
-    debump: bool = True
-    keep_chain: bool = True
-    log_level: str = "CRITICAL"
-
-    # arguments kept as default
-    ph: float = 7.0
-    assign_only: bool = False
-    clean: bool = False
-    userff: None = None
-    ffout: None = None
-    usernames: None = None
-    ligand: None = None
-    neutraln: bool = False
-    neutralc: bool = False
-    drop_water: bool = False
-    pka_method: None = None
-    opt: bool = True
-    include_header: bool = False
-    whitespace: bool = False
-    pdb_output: None = None
-    apbs_input: None = None
-
-    def __post_init__(self):
-        self._index = 0
-        if self.ff.lower() not in FORCE_FIELDS:
-            msg = f"Forcefield {self.ff} not recognized. Valid options: {FORCE_FIELDS}."
-            raise ValueError(msg)
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        settings = vars(self)
-        if self._index < len(settings):
-            setting = list(settings)[self._index]
-            self._index += 1
-            return setting
-        raise StopIteration

From 6e708aca37a24f1cc7ea9cbfa744a5dc95024fe2 Mon Sep 17 00:00:00 2001
From: Dani Bodor <d.bodor@esciencecenter.nl>
Date: Thu, 14 Mar 2024 23:45:04 +0100
Subject: [PATCH 14/14] rename hydrogenation

---
 .../tools/pdbprep/{detect_protonation.py => hydrogenation.py}     | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename deeprank2/tools/pdbprep/{detect_protonation.py => hydrogenation.py} (100%)

diff --git a/deeprank2/tools/pdbprep/detect_protonation.py b/deeprank2/tools/pdbprep/hydrogenation.py
similarity index 100%
rename from deeprank2/tools/pdbprep/detect_protonation.py
rename to deeprank2/tools/pdbprep/hydrogenation.py