diff --git a/docs/index.md b/docs/index.md index cc06810..e023afa 100644 --- a/docs/index.md +++ b/docs/index.md @@ -12,7 +12,7 @@ micromamba install -c conda-forge medchem ## Getting Started -The best way to get started is by going through the tutorials. +The best way to get started is by going through [**the tutorials**](./tutorials/Basic_Concepts.ipynb). ## Usage Notice diff --git a/docs/tutorials/Basic_Concepts.ipynb b/docs/tutorials/Basic_Concepts.ipynb index a4bec12..ff79dfc 100644 --- a/docs/tutorials/Basic_Concepts.ipynb +++ b/docs/tutorials/Basic_Concepts.ipynb @@ -7,7 +7,7 @@ "source": [ "# Basic Concepts\n", "\n", - "This tutorial is an introduction to medicinal chemistry molecular filtering. It will show that applying thoes type of filters and rules systematically and blindly is often not a good idea. While powerful such filtering technics must always be carefully assesed and prototyped before using systematically and at large scale.\n" + "This tutorial is an introduction to medicinal chemistry molecular filtering. It will show that applying those type of filters and rules systematically and blindly is often **not a good idea**. While powerful, such filtering technics must always be carefully assesed and prototyped before using systematically and at large scale.\n" ] }, { @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 19, "id": "cc35111a-d3a8-4313-962c-290bbf563f35", "metadata": { "tags": [] @@ -38,6 +38,7 @@ "import matplotlib.colors\n", "import seaborn as sns\n", "\n", + "from rdkit.Chem import PandasTools\n", "import datamol as dm\n", "import pandas as pd\n", "\n", @@ -49,21 +50,23 @@ "id": "da48fd38", "metadata": {}, "source": [ - "## Commercial drugs does not always pass common filters\n", + "## Marketed drugs does not always pass medchem filters\n", "\n", - "_NOTE(hadim): show that some drugs pass and some other does not pass the filters. It's important to consider the whole pipeline. At early stage, filters and rules might make sense and during LO and further, small critical modifications will make the drug to fail on certain filters that used to work before_\n" + "The most obvious example for not blindly applying medicinal filters, alerts and rules systematically is when looking at the already approved and marketed drugs. The below example shows that for many common filters, a large proportion of approved drugs does not pass them.\n", + "\n", + "That being said it's important to keep in mind that all the approved drugs has gone into a very long and lengthy development process in which their structures have been further optimized and finetuned. During that process it's very common to include or exclude particular features that early during a drug pipeline could have been seen as an unwanted feature but turned out to be a beneficial one during the late stages of a program.\n" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 70, "id": "f1d0b87c", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "39cb21d15edf4e868818779d5a6c717a", + "model_id": "77ad091ceae64effbb7f1bd44c69ed92", "version_major": 2, "version_minor": 0 }, @@ -73,11 +76,111 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0f3859cda7f743dbbd5cf72eb1d7b536", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Common alerts filtering: 0%| | 0/1935 [00:00" ] @@ -182,12 +285,17 @@ " \"filter_lilly_demerit\",\n", "]\n", "\n", + "# Some sorting for a nice plot\n", + "data[\"n_filters_pass\"] = data[filter_columns].sum(axis=1)\n", + "data = data.sort_values(\"n_filters_pass\", ascending=True)\n", + "\n", + "# Plot\n", "\n", "f, ax = plt.subplots(figsize=(14, 4), constrained_layout=True)\n", "\n", "cmap = matplotlib.colors.ListedColormap([\"#EF6262\", \"#1D5B79\"], None)\n", "\n", - "sns.heatmap(\n", + "a = sns.heatmap(\n", " data[filter_columns].T,\n", " annot=False,\n", " ax=ax,\n", @@ -196,1283 +304,186 @@ " cbar=True,\n", " cmap=cmap,\n", ")\n", - "# sns.heatmap(data[filter_columns], annot=False, ax=ax, xticklabels=False, yticklabels=False)\n", "\n", "ax.collections[0].colorbar.set_ticks([0.25, 0.75])\n", - "ax.collections[0].colorbar.set_ticklabels([False, True], fontsize=14)\n", + "ax.collections[0].colorbar.set_ticklabels([\"Don't Pass\", \"Pass\"], fontsize=14)\n", "\n", "ax.set_xlabel(f\"Drug Molecules (n={len(data)})\", fontsize=18)\n", - "_ = ax.set_ylabel(\"Medchem Filters\", fontsize=18)\n" - ] - }, - { - "cell_type": "markdown", - "id": "ff59fb42", - "metadata": {}, - "source": [ - "## New drugs versus old drugs\n", - "\n", - "_NOTE(hadim): shows new drugs less likely to pass common filters (cite a paper if possible)_\n" - ] - }, - { - "cell_type": "markdown", - "id": "b2964a0b", - "metadata": {}, - "source": [ - "## General Guidelines\n", - "\n", - "_NOTE(hadim): from Manu \"I think there is some ‘guideline’ about the rules in a csv to understand which rules apply to which class of molecules or stage of discovery. It’s mostly heuristic, but could be worth checking properly and highlight to help people getting started.\"_\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d2911b4a", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "024abf1a", - "metadata": {}, - "source": [ - "## New drugs versus old drugs\n", + "ax.set_ylabel(\"Medchem Filters\", fontsize=18)\n", "\n", - "_NOTE(hadim): shows new drugs less likely to pass common filters (cite a paper if possible)_\n" + "# Add percentage of passing mols in the y labels\n", + "new_ylabels = []\n", + "for t in ax.yaxis.get_ticklabels():\n", + " perc = data[t.get_text()].sum() / len(data) * 100\n", + " new_ylabels.append(f\"{t.get_text()} ({perc:.0f}%)\")\n", + "_ = ax.yaxis.set_ticklabels(new_ylabels, fontsize=12)\n" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "4023936f", - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "id": "0a4a63e7", "metadata": {}, "source": [ - "## Case study: pro-drugs\n", + "## Showcasing pro-drugs\n", + "\n", + "Some drugs are designed as [prodrug](https://en.wikipedia.org/wiki/Prodrug) that will be only be made active once in the human body after being metabolized. Those compounds will tend to have unwanted molecular features that are often flagged by common alerts and rules.\n", "\n", - "In this example, an example of a pro-drug (Atenovofir for HIV) is shown. _ADD MORE ABOUT THE STORY HERE_\n" + "Below we load a small dataset of 7 drug/prodrug pairs and apply some common medchem rules on them." ] }, { "cell_type": "code", - "execution_count": 3, - "id": "07a5d772", + "execution_count": 69, + "id": "1a7a37f0", "metadata": {}, "outputs": [ { "data": { - "image/svg+xml": [ - "\n", - "\n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DrugDrug molProdrugProdrug molrule_of_fiverule_of_ghoserule_of_veberrule_of_zinc
0Gabapentin
\"Mol\"/
Gabapentin enacarbil
\"Mol\"/
Ro5
Drug: True
Prodrug: True
Ghose
Drug: False
Prodrug: True
Veber
Drug: True
Prodrug: True
Zinc
Drug: True
Prodrug: True
1Dabigatran
\"Mol\"/
Dabigatran etexilate
\"Mol\"/
Ro5
Drug: True
Prodrug: False
Ghose
Drug: False
Prodrug: False
Veber
Drug: False
Prodrug: False
Zinc
Drug: False
Prodrug: False
2Sofosbuvir
\"Mol\"/
Sofosbuvir
\"Mol\"/
Ro5
Drug: True
Prodrug: False
Ghose
Drug: False
Prodrug: False
Veber
Drug: False
Prodrug: False
Zinc
Drug: False
Prodrug: False
3Tedizolid
\"Mol\"/
Tedizolid phosphate
\"Mol\"/
Ro5
Drug: True
Prodrug: False
Ghose
Drug: True
Prodrug: True
Veber
Drug: True
Prodrug: False
Zinc
Drug: True
Prodrug: False
4Isavuconazole
\"Mol\"/
Isavuconazonium
\"Mol\"/
Ro5
Drug: True
Prodrug: False
Ghose
Drug: True
Prodrug: False
Veber
Drug: True
Prodrug: False
Zinc
Drug: True
Prodrug: False
5Aripiprazole
\"Mol\"/
Aripiprazole lauroxil
\"Mol\"/
Ro5
Drug: True
Prodrug: False
Ghose
Drug: True
Prodrug: False
Veber
Drug: True
Prodrug: False
Zinc
Drug: True
Prodrug: False
6ACT-333679
\"Mol\"/
Selexipag
\"Mol\"/
Ro5
Drug: True
Prodrug: True
Ghose
Drug: True
Prodrug: False
Veber
Drug: False
Prodrug: False
Zinc
Drug: True
Prodrug: True
" ], "text/plain": [ - "" + "" ] }, - "execution_count": 3, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Build the dataset\n", - "smiles_list = [\n", - " \"NC1=C2C(N(CC(OCP(O)(O)=O)C)C=N2)=NC=N1\",\n", - " \"NC1=C2C(N(CC(OCP(OCOC(OC(C)C)=O)(OCOC(OC(C)C)=O)=O)C)C=N2)=NC=N1\",\n", - " \"NC1=C2C(N(CC(OCP(OC3=CC=CC=C3)(NC(C)C(OC(C)C)=O)=O)C)C=N2)=NC=N1\",\n", - "]\n", + "# Load the dataset\n", + "data = pd.read_csv(\"./data/Drug_Prodrug_pairs.csv\")\n", "\n", - "data = pd.DataFrame({\"smiles\": smiles_list})\n", - "data[\"mol\"] = data[\"smiles\"].apply(dm.to_mol)\n", + "# Compute mol objects\n", + "data[\"Drug mol\"] = data[\"Drug SMILES\"].apply(dm.to_mol)\n", + "data[\"Prodrug mol\"] = data[\"Prodrug SMILES\"].apply(dm.to_mol)\n", "\n", - "# Apply basic rules\n", - "data[\"rule_of_five\"] = data[\"smiles\"].apply(mc.rules.basic_rules.rule_of_five)\n", - "data[\"rule_of_ghose\"] = data[\"smiles\"].apply(mc.rules.basic_rules.rule_of_ghose)\n", - "data[\"rule_of_veber\"] = data[\"smiles\"].apply(mc.rules.basic_rules.rule_of_veber)\n", - "data[\"rule_of_zinc\"] = data[\"smiles\"].apply(mc.rules.basic_rules.rule_of_zinc)\n", + "# Reorder the columns\n", + "data = data[[\"Drug\", \"Drug mol\", \"Prodrug\", \"Prodrug mol\"]]\n", + "\n", + "\n", + "# Apply a few medchem rules to the drugs and prodrugs\n", + "def _apply_rules(row):\n", + " rule_names = [\n", + " (\"Ro5\", \"rule_of_five\"),\n", + " (\"Ghose\", \"rule_of_ghose\"),\n", + " (\"Veber\", \"rule_of_veber\"),\n", + " (\"Zinc\", \"rule_of_zinc\"),\n", + " ]\n", + "\n", + " for rule_label, rule_name in rule_names:\n", + " rule_fn = getattr(mc.rules.basic_rules, rule_name)\n", + "\n", + " # Apply the rule to the drug and prodrug\n", + " drug_ok = rule_fn(dm.copy_mol(row[\"Drug mol\"]))\n", + " prodrug_ok = rule_fn(dm.copy_mol(row[\"Prodrug mol\"]))\n", + "\n", + " row[rule_name] = f\"{rule_label}\\nDrug: {drug_ok}\\nProdrug: {prodrug_ok}\"\n", "\n", - "# Display the molecules\n", - "legends = []\n", - "legends += [f\"Ro5={v}\" for v in data[\"rule_of_five\"]]\n", - "legends += [f\"Ghose={v}\" for v in data[\"rule_of_ghose\"]]\n", - "legends += [f\"Veber={v}\" for v in data[\"rule_of_veber\"]]\n", - "legends += [f\"Zinc={v}\" for v in data[\"rule_of_zinc\"]]\n", + " return row\n", "\n", - "mol_list = data[\"mol\"].tolist() * 4\n", "\n", - "dm.to_image(mol_list, legends=legends, n_cols=3, mol_size=(300, 200))\n" + "data = data.apply(_apply_rules, axis=1)\n", + "\n", + "# Enable drawing in the dataframe\n", + "PandasTools.ChangeMoleculeRendering(data)\n", + "\n", + "# Small hack to display new lines as well as RDKit drawings\n", + "HTML(data.to_html().replace(\"\\\\n\", \"
\"))\n" ] }, { @@ -1480,16 +491,18 @@ "id": "1973324f", "metadata": {}, "source": [ - "_NOTE(hadim): DISCUSS THE RESULTS._\n" + "While this is not systematic, we observe that many drugs passes the rules while the associated prodrugs does not pass them." ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "id": "333ca047", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "---\n", + "\n", + "-- The End :-)" + ] } ], "metadata": { diff --git a/docs/tutorials/Catalogs.ipynb b/docs/tutorials/Catalogs.ipynb index f0c4a53..dab59bf 100644 --- a/docs/tutorials/Catalogs.ipynb +++ b/docs/tutorials/Catalogs.ipynb @@ -711,6 +711,16 @@ "matches = [custom_catalog.HasMatch(x) for x in mols]\n", "dm.to_image(mols, legends=[f\"Match: {x}\" for x in matches])\n" ] + }, + { + "cell_type": "markdown", + "id": "dcef48d4", + "metadata": {}, + "source": [ + "---\n", + "\n", + "-- The End :-)" + ] } ], "metadata": { diff --git a/docs/tutorials/Chemical_Groups.ipynb b/docs/tutorials/Chemical_Groups.ipynb index a1d1b51..ff8c2ca 100644 --- a/docs/tutorials/Chemical_Groups.ipynb +++ b/docs/tutorials/Chemical_Groups.ipynb @@ -588,12 +588,14 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "4f369694", + "cell_type": "markdown", + "id": "f446e5b2", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "---\n", + "\n", + "-- The End :-)" + ] } ], "metadata": { diff --git a/docs/tutorials/Constraints_Filtering.ipynb b/docs/tutorials/Constraints_Filtering.ipynb index fddfa5b..e2aac6d 100644 --- a/docs/tutorials/Constraints_Filtering.ipynb +++ b/docs/tutorials/Constraints_Filtering.ipynb @@ -515,6 +515,16 @@ "\n", "dm.to_image(mols, legends=legends)" ] + }, + { + "cell_type": "markdown", + "id": "2021dfa2", + "metadata": {}, + "source": [ + "---\n", + "\n", + "-- The End :-)" + ] } ], "metadata": { diff --git a/docs/tutorials/Functional_API.ipynb b/docs/tutorials/Functional_API.ipynb index bf0ddb4..1a669c6 100644 --- a/docs/tutorials/Functional_API.ipynb +++ b/docs/tutorials/Functional_API.ipynb @@ -9121,12 +9121,12 @@ }, { "cell_type": "markdown", - "id": "51768c29", + "id": "8713cdbb", "metadata": {}, "source": [ "---\n", "\n", - "The End!" + "-- The End :-)" ] } ], diff --git a/docs/tutorials/Medchem_Query_Language.ipynb b/docs/tutorials/Medchem_Query_Language.ipynb index 883c28d..65ec267 100644 --- a/docs/tutorials/Medchem_Query_Language.ipynb +++ b/docs/tutorials/Medchem_Query_Language.ipynb @@ -1135,6 +1135,16 @@ " - OR operator : `OR` or `|` or `||` or `or`\n", " - NOT operator : `NOT` or `!` or `~` or `not`\n" ] + }, + { + "cell_type": "markdown", + "id": "a851c80b", + "metadata": {}, + "source": [ + "---\n", + "\n", + "-- The End :-)" + ] } ], "metadata": { diff --git a/docs/tutorials/Medchem_Rules.ipynb b/docs/tutorials/Medchem_Rules.ipynb index 0acfb9f..17e60dd 100644 --- a/docs/tutorials/Medchem_Rules.ipynb +++ b/docs/tutorials/Medchem_Rules.ipynb @@ -1654,6 +1654,16 @@ "# Apply rule #2\n", "rule_2(mol)" ] + }, + { + "cell_type": "markdown", + "id": "eb05d65b", + "metadata": {}, + "source": [ + "---\n", + "\n", + "-- The End :-)" + ] } ], "metadata": { diff --git a/docs/tutorials/Molecular_Complexity.ipynb b/docs/tutorials/Molecular_Complexity.ipynb index daebd6c..7a36e4a 100644 --- a/docs/tutorials/Molecular_Complexity.ipynb +++ b/docs/tutorials/Molecular_Complexity.ipynb @@ -1466,6 +1466,16 @@ "\n", "dm.to_image(data[\"mol\"].tolist(), legends=legends, mol_size=(300, 200))" ] + }, + { + "cell_type": "markdown", + "id": "165a451d", + "metadata": {}, + "source": [ + "---\n", + "\n", + "-- The End :-)" + ] } ], "metadata": { diff --git a/docs/tutorials/Structural_Filters.ipynb b/docs/tutorials/Structural_Filters.ipynb index c462899..bf564e3 100644 --- a/docs/tutorials/Structural_Filters.ipynb +++ b/docs/tutorials/Structural_Filters.ipynb @@ -3126,6 +3126,16 @@ "\n", "results.head()" ] + }, + { + "cell_type": "markdown", + "id": "3bae7171", + "metadata": {}, + "source": [ + "---\n", + "\n", + "-- The End :-)" + ] } ], "metadata": { diff --git a/docs/tutorials/data/Drug_Prodrug_pairs.csv b/docs/tutorials/data/Drug_Prodrug_pairs.csv new file mode 100644 index 0000000..712a2e2 --- /dev/null +++ b/docs/tutorials/data/Drug_Prodrug_pairs.csv @@ -0,0 +1,8 @@ +Drug,Drug SMILES,Prodrug,Prodrug SMILES +Gabapentin,O=C(O)CC1(CN)CCCCC1,Gabapentin enacarbil,CC(C)C(=O)OC(C)OC(=O)NCC1(CCCCC1)CC(=O)O +Dabigatran,CN1C2=C(C=C(C=C2)C(=O)N(CCC(=O)O)C3=CC=CC=N3)N=C1CNC4=CC=C(C=C4)C(=N)N,Dabigatran etexilate,CCCCCCOC(=O)N=C(C1=CC=C(C=C1)NCC2=NC3=C(N2C)C=CC(=C3)C(=O)N(CCC(=O)OCC)C4=CC=CC=N4)N +Sofosbuvir,OP(OC[C@@H]1[C@@H](O)[C@@](C)(F)[C@H](N2C=CC(NC2=O)=O)O1)(O)=O,Sofosbuvir,C[C@@H](C(=O)OC(C)C)N[P@](=O)(OC[C@@H]1[C@H]([C@@]([C@@H](O1)N2C=CC(=O)NC2=O)(C)F)O)OC3=CC=CC=C3 +Tedizolid,O=C4O[C@H](CN4c3cc(F)c(c1ccc(nc1)c2nn(nn2)C)cc3)CO,Tedizolid phosphate,CN1N=NC(=N1)C1=CC=C(C=N1)C1=CC=C(C=C1F)N1C[C@H](COP(O)(O)=O)OC1=O +Isavuconazole,C[C@H]([C@](c1c(F)ccc(F)c1)(O)Cn2ncnc2)c3scc(c4ccc(C#N)cc4)n3,Isavuconazonium,C[C@H]([C@](c1c(F)ccc(F)c1)(O)Cn2nc[n+](C(OC(N(c3ncccc3COC(CNC)=O)C)=O)C)c2)c4scc(c5ccc(C#N)cc5)n4 +Aripiprazole,Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl,Aripiprazole lauroxil,CCCCCCCCCCCC(=O)OCN1C(=O)CCC2=C1C=C(C=C2)OCCCCN3CCN(CC3)C4=C(C(=CC=C4)Cl)Cl +ACT-333679,CC(N(C1=CN=C(C2=CC=CC=C2)C(C3=CC=CC=C3)=N1)CCCCOCC(O)=O)C,Selexipag,CC(C)N(CCCCOCC(=O)NS(=O)(=O)C)C1=CN=C(C(=N1)C2=CC=CC=C2)C3=CC=CC=C3 \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index ba784f5..d0fd1a8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -14,7 +14,6 @@ strict: true nav: - Overview: index.md - - CLI: cli.md - Tutorials: - Basic Concepts: tutorials/Basic_Concepts.ipynb - Functional API: tutorials/Functional_API.ipynb @@ -25,6 +24,7 @@ nav: - Medchem Query Language: tutorials/Medchem_Query_Language.ipynb - Molecular Complexity: tutorials/Molecular_Complexity.ipynb - Constraints Filtering: tutorials/Constraints_Filtering.ipynb + - CLI: cli.md - API: - medchem.rules: api/medchem.rules.md - medchem.structural: api/medchem.structural.md diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py index 97cc21e..6a7e018 100644 --- a/tests/test_notebooks.py +++ b/tests/test_notebooks.py @@ -12,6 +12,10 @@ NOTEBOOK_PATHS = sorted(list(TUTORIALS_DIR.glob("*.ipynb"))) NOTEBOOK_PATHS = list(filter(lambda x: x.name not in DISABLE_NOTEBOOKS, NOTEBOOK_PATHS)) +# Discard some notebooks +NOTEBOOKS_TO_DISCARD = ["Basic_Concepts.ipynb"] +NOTEBOOK_PATHS = list(filter(lambda x: x.name not in NOTEBOOKS_TO_DISCARD, NOTEBOOK_PATHS)) + @pytest.mark.parametrize("nb_path", NOTEBOOK_PATHS, ids=[str(n.name) for n in NOTEBOOK_PATHS]) def test_notebook(nb_path):