diff --git a/ipython/Conformer Generation Workflow.ipynb b/ipython/Conformer Generation Workflow.ipynb new file mode 100644 index 00000000..c10c1b30 --- /dev/null +++ b/ipython/Conformer Generation Workflow.ipynb @@ -0,0 +1,561 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Stable species conformer search\n", + "\n", + "Leverage ETKDG and GeoMol as 3D geometry embedder for stochastic conformer generation\n", + "\n", + "The idea is to have modular methods for each step, which are currently hardcoded. This includes:\n", + "- initial conformer embedding (ETKDG, GeoMol)\n", + "- optimization/energy (MMFF, UFF, GFN-FF, GFN2-xTB)\n", + "- pruning (torsion fingerprints, CREGEN)\n", + "- convergence metrics (conformational entropy/partition function)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3a45eb9b", + "metadata": {}, + "outputs": [], + "source": [ + "from rdmc.conformer_generation.embedders import *\n", + "from rdmc.conformer_generation.optimizers import *\n", + "from rdmc.conformer_generation.pruners import *\n", + "from rdmc.conformer_generation.metrics import *\n", + "from rdmc.conformer_generation.generators import StochasticConformerGenerator\n", + "\n", + "from rdmc import RDKitMol\n", + "from rdmc.view import mol_viewer, interactive_conformer_viewer, conformer_viewer\n", + "\n", + "T = 298 # K\n", + "R = 0.0019872 # kcal/(K*mol)\n", + "HARTREE_TO_KCAL_MOL = 627.503\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "id": "e0216632", + "metadata": {}, + "source": [ + "## 1. Test embedder" + ] + }, + { + "cell_type": "markdown", + "id": "79826cc2", + "metadata": {}, + "source": [ + "Create the 3D geometry for the molecule specified by the SMILES (`smi`). Currently it has no 3D conformer embedded, therefore the visualization returns a 2D illustration of the molecule" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "61a6eb92", + "metadata": {}, + "outputs": [ + { + "data": { + "application/3dmoljs_load.v0": "
\n

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n
\n", + "text/html": [ + "
\n", + "

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n", + "
\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smi = \"[C:1]([C@@:2]([O:3][H:12])([C:4]([N:5]([C:6](=[O:7])[H:16])[H:15])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]\" # example 1\n", + "smi = \"CN1C2=C(C=C(C=C2)Cl)C(=NCC1=O)C3=CC=CC=C3\" # example 2\n", + "\n", + "mol_viewer(RDKitMol.FromSmiles(smi))" + ] + }, + { + "cell_type": "markdown", + "id": "7b022193", + "metadata": {}, + "source": [ + "### 1.1 ETKDG embedder" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "886ca5f8", + "metadata": {}, + "outputs": [], + "source": [ + "n_confs = 10 # Number of conformers to create\n", + "\n", + "embedder = ETKDGEmbedder() # Initialize conformer embedder\n", + "unique_mol_data = embedder(smi, n_confs) # Embed molecule 3D geometries with ETKDG\n", + "mol = dict_to_mol(unique_mol_data) # Convert raw data to a molecule object" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f8b9af06", + "metadata": {}, + "outputs": [ + { + "data": { + "application/3dmoljs_load.v0": "
\n

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n
\n", + "text/html": [ + "
\n", + "

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n", + "
\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "visualize_conf_id = 2\n", + "\n", + "mol_viewer(mol, confId=visualize_conf_id) # visualize the molecule" + ] + }, + { + "cell_type": "markdown", + "id": "7424c20e", + "metadata": {}, + "source": [ + "### 1.2 GeoMol Conformer" + ] + }, + { + "cell_type": "markdown", + "id": "49022146", + "metadata": {}, + "source": [ + "Supported options:\n", + "- `dataset`: `drug` or `qm9`\n", + "- `device`: `cpu`, or `cuda` (or specific cuda device like `cuda:0`)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9431fbc8", + "metadata": {}, + "outputs": [], + "source": [ + "n_confs = 10 # Number of conformers to create\n", + "dataset = \"drugs\"\n", + "device = \"cuda\"\n", + "\n", + "embedder = GeoMolEmbedder(dataset=dataset, track_stats=True, temp_schedule=\"none\", device=device) # Initialize conformer embedder\n", + "unique_mol_data = embedder(smi, n_confs) # Embed molecule 3D geometries with ETKDG\n", + "mol = dict_to_mol(unique_mol_data) # Convert raw data to a molecule object" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "9ac12a75", + "metadata": {}, + "outputs": [ + { + "data": { + "application/3dmoljs_load.v0": "
\n

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n
\n", + "text/html": [ + "
\n", + "

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n", + "
\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "visualize_conf_id = 2\n", + "\n", + "mol_viewer(mol, confId=visualize_conf_id) # visualize the molecule" + ] + }, + { + "cell_type": "markdown", + "id": "46be5c44", + "metadata": {}, + "source": [ + "## 2. Create a conformer generation workflow" + ] + }, + { + "cell_type": "markdown", + "id": "a35cd583", + "metadata": {}, + "source": [ + "### 2.1 Choose each components\n", + "- embedder\n", + "- optimizer\n", + "- pruner\n", + "- metric\n", + "\n", + "you can also use default config by providing `config` to the generator. You can open a new cell and use `StochasticConformerGenerator.set_config?` to check what is the default configuration" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "92bfdbc4", + "metadata": {}, + "outputs": [], + "source": [ + "# embedder = ETKDGEmbedder(track_stats=True)\n", + "embedder = GeoMolEmbedder(dataset=\"drugs\", track_stats=True, temp_schedule=\"none\", device=\"cpu\") # Initialize conformer embedder\n", + "optimizer = XTBOptimizer()\n", + "pruner = TorsionPruner(max_chk_threshold=30)\n", + "metric = SCGMetric(metric=\"entropy\", window=5, threshold=0.005)" + ] + }, + { + "cell_type": "markdown", + "id": "0522a7ff", + "metadata": {}, + "source": [ + "### 2.2 Conformer generation" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9f099b43", + "metadata": {}, + "outputs": [ + { + "data": { + "application/3dmoljs_load.v0": "
\n

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n
\n", + "text/html": [ + "
\n", + "

3Dmol.js failed to load for some reason. Please check your browser console for error messages.

\n", + "
\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smi = \"CN1C2=C(C=C(C=C2)Cl)C(=NCC1=O)C3=CC=CC=C3\"\n", + "\n", + "mol_viewer(RDKitMol.FromSmiles(smi))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e72b2651", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024/03/14 04:33:34 PM | StochasticConformerGenerator | INFO: Generating conformers for CN1C2=C(C=C(C=C2)Cl)C(=NCC1=O)C3=CC=CC=C3\n", + "2024/03/14 04:33:34 PM | StochasticConformerGenerator | INFO: \n", + "Iteration 1: embedding 100 initial guesses...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024/03/14 04:33:35 PM | StochasticConformerGenerator | INFO: Iteration 1: optimizing initial guesses...\n", + "2024/03/14 04:33:39 PM | StochasticConformerGenerator | INFO: Iteration 1: pruning conformers...\n", + "2024/03/14 04:33:40 PM | StochasticConformerGenerator | INFO: Iteration 1: kept 12 unique conformers\n", + "2024/03/14 04:33:40 PM | StochasticConformerGenerator | INFO: \n", + "Iteration 2: embedding 100 initial guesses...\n", + "2024/03/14 04:33:40 PM | StochasticConformerGenerator | INFO: Iteration 2: optimizing initial guesses...\n", + "2024/03/14 04:33:45 PM | StochasticConformerGenerator | INFO: Iteration 2: pruning conformers...\n", + "2024/03/14 04:33:45 PM | StochasticConformerGenerator | INFO: Iteration 2: kept 12 unique conformers\n", + "2024/03/14 04:33:45 PM | StochasticConformerGenerator | INFO: Iteration 2: stop crietria reached\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of conformers: 12\n", + "Metric: 4.938e-03\n" + ] + } + ], + "source": [ + "n_conformers_per_iter = 100\n", + "min_iters = 2\n", + "max_iters = 5\n", + "\n", + "scg = StochasticConformerGenerator(\n", + " smiles=smi,\n", + " embedder=embedder,\n", + " optimizer=optimizer,\n", + " pruner=pruner,\n", + " metric=metric,\n", + " min_iters=min_iters,\n", + " max_iters=max_iters,\n", + ")\n", + "\n", + "unique_mol_data = scg(n_conformers_per_iter)\n", + "print(\n", + " f\"Number of conformers: {len(unique_mol_data)}\\n\"\n", + " f\"Metric: {scg.metric.metric_history[-1]:.3e}\"\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "GeoMol", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ipython/Generate Atommapped SMILES.ipynb b/ipython/Generate Atommapped SMILES.ipynb index 83e9f39c..1572a373 100644 --- a/ipython/Generate Atommapped SMILES.ipynb +++ b/ipython/Generate Atommapped SMILES.ipynb @@ -838,7 +838,7 @@ "source": [ "new_rxn = Reaction(r_complex, p_complex)\n", "display(new_rxn)\n", - "print(rxn.to_smiles())" + "print(new_rxn.to_smiles())" ] }, { @@ -870,7 +870,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.12" + "version": "3.9.18" }, "vscode": { "interpreter": { diff --git a/ipython/stochastic_conf_pipeline_LP.ipynb b/ipython/stochastic_conf_pipeline_LP.ipynb deleted file mode 100644 index 3b1f782d..00000000 --- a/ipython/stochastic_conf_pipeline_LP.ipynb +++ /dev/null @@ -1,327 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Stable species conformer search\n", - "Leverage ETKDG for stochastic conformer generation\n", - "\n", - "Use this as a base for ML conformer generation\n", - "\n", - "The idea is to have modular methods for each step, which are currently hardcoded. This includes:\n", - "- initial conformer embedding (ETKDG, GeoMol)\n", - "- optimization/energy (MMFF, UFF, GFN-FF, GFN2-xTB)\n", - "- pruning (torsion fingerprints, CREGEN)\n", - "- convergence metrics (conformational entropy/partition function)" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from rdmc.conformer_generation.embedders import *\n", - "from rdmc.conformer_generation.optimizers import *\n", - "from rdmc.conformer_generation.pruners import *\n", - "from rdmc.conformer_generation.metrics import *\n", - "from rdmc.conformer_generation.generators import StochasticConformerGenerator\n", - "\n", - "from rdmc.view import mol_viewer, interactive_conformer_viewer, conformer_viewer\n", - "\n", - "T = 298 # K\n", - "R = 0.0019872 # kcal/(K*mol)\n", - "HARTREE_TO_KCAL_MOL = 627.503" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "from rdkit import Chem\n", - "\n", - "smi = \"[C:1]([C@@:2]([O:3][H:12])([C:4]([N:5]([C:6](=[O:7])[H:16])[H:15])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]\"" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2022/04/01 03:17:42 PM | StochasticConformerGenerator | INFO: Config specified: using default settings for normal config\n", - "2022/04/01 03:17:42 PM | StochasticConformerGenerator | INFO: Generating conformers for [C:1]([C@@:2]([O:3][H:12])([C:4]([N:5]([C:6](=[O:7])[H:16])[H:15])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]\n", - "2022/04/01 03:17:42 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 1: embedding 100 initial guesses...\n", - "2022/04/01 03:17:42 PM | StochasticConformerGenerator | INFO: Iteration 1: optimizing initial guesses...\n", - "2022/04/01 03:17:45 PM | StochasticConformerGenerator | INFO: Iteration 1: pruning conformers...\n", - "2022/04/01 03:17:45 PM | StochasticConformerGenerator | INFO: Iteration 1: kept 9 unique conformers\n", - "2022/04/01 03:17:45 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 2: embedding 100 initial guesses...\n", - "2022/04/01 03:17:46 PM | StochasticConformerGenerator | INFO: Iteration 2: optimizing initial guesses...\n", - "2022/04/01 03:17:49 PM | StochasticConformerGenerator | INFO: Iteration 2: pruning conformers...\n", - "2022/04/01 03:17:49 PM | StochasticConformerGenerator | INFO: Iteration 2: kept 18 unique conformers\n", - "2022/04/01 03:17:49 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 3: embedding 100 initial guesses...\n", - "2022/04/01 03:17:49 PM | StochasticConformerGenerator | INFO: Iteration 3: optimizing initial guesses...\n", - "2022/04/01 03:17:52 PM | StochasticConformerGenerator | INFO: Iteration 3: pruning conformers...\n", - "2022/04/01 03:17:52 PM | StochasticConformerGenerator | INFO: Iteration 3: kept 27 unique conformers\n", - "2022/04/01 03:17:52 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 4: embedding 100 initial guesses...\n", - "2022/04/01 03:17:52 PM | StochasticConformerGenerator | INFO: Iteration 4: optimizing initial guesses...\n", - "2022/04/01 03:17:55 PM | StochasticConformerGenerator | INFO: Iteration 4: pruning conformers...\n", - "2022/04/01 03:17:55 PM | StochasticConformerGenerator | INFO: Iteration 4: kept 35 unique conformers\n", - "2022/04/01 03:17:55 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 5: embedding 100 initial guesses...\n", - "2022/04/01 03:17:55 PM | StochasticConformerGenerator | INFO: Iteration 5: optimizing initial guesses...\n", - "2022/04/01 03:17:59 PM | StochasticConformerGenerator | INFO: Iteration 5: pruning conformers...\n", - "2022/04/01 03:17:59 PM | StochasticConformerGenerator | INFO: Iteration 5: kept 43 unique conformers\n", - "2022/04/01 03:17:59 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 6: embedding 100 initial guesses...\n", - "2022/04/01 03:17:59 PM | StochasticConformerGenerator | INFO: Iteration 6: optimizing initial guesses...\n", - "2022/04/01 03:18:02 PM | StochasticConformerGenerator | INFO: Iteration 6: pruning conformers...\n", - "2022/04/01 03:18:02 PM | StochasticConformerGenerator | INFO: Iteration 6: kept 44 unique conformers\n", - "2022/04/01 03:18:02 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 7: embedding 100 initial guesses...\n", - "2022/04/01 03:18:02 PM | StochasticConformerGenerator | INFO: Iteration 7: optimizing initial guesses...\n", - "2022/04/01 03:18:05 PM | StochasticConformerGenerator | INFO: Iteration 7: pruning conformers...\n", - "2022/04/01 03:18:05 PM | StochasticConformerGenerator | INFO: Iteration 7: kept 46 unique conformers\n", - "2022/04/01 03:18:05 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 8: embedding 100 initial guesses...\n", - "2022/04/01 03:18:05 PM | StochasticConformerGenerator | INFO: Iteration 8: optimizing initial guesses...\n", - "2022/04/01 03:18:08 PM | StochasticConformerGenerator | INFO: Iteration 8: pruning conformers...\n", - "2022/04/01 03:18:08 PM | StochasticConformerGenerator | INFO: Iteration 8: kept 52 unique conformers\n", - "2022/04/01 03:18:08 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 9: embedding 100 initial guesses...\n", - "2022/04/01 03:18:09 PM | StochasticConformerGenerator | INFO: Iteration 9: optimizing initial guesses...\n", - "2022/04/01 03:18:12 PM | StochasticConformerGenerator | INFO: Iteration 9: pruning conformers...\n", - "2022/04/01 03:18:12 PM | StochasticConformerGenerator | INFO: Iteration 9: kept 50 unique conformers\n", - "2022/04/01 03:18:12 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 10: embedding 100 initial guesses...\n", - "2022/04/01 03:18:12 PM | StochasticConformerGenerator | INFO: Iteration 10: optimizing initial guesses...\n", - "2022/04/01 03:18:15 PM | StochasticConformerGenerator | INFO: Iteration 10: pruning conformers...\n", - "2022/04/01 03:18:15 PM | StochasticConformerGenerator | INFO: Iteration 10: kept 49 unique conformers\n", - "2022/04/01 03:18:15 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 11: embedding 100 initial guesses...\n", - "2022/04/01 03:18:15 PM | StochasticConformerGenerator | INFO: Iteration 11: optimizing initial guesses...\n", - "2022/04/01 03:18:18 PM | StochasticConformerGenerator | INFO: Iteration 11: pruning conformers...\n", - "2022/04/01 03:18:18 PM | StochasticConformerGenerator | INFO: Iteration 11: kept 54 unique conformers\n", - "2022/04/01 03:18:18 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 12: embedding 100 initial guesses...\n", - "2022/04/01 03:18:18 PM | StochasticConformerGenerator | INFO: Iteration 12: optimizing initial guesses...\n", - "2022/04/01 03:18:21 PM | StochasticConformerGenerator | INFO: Iteration 12: pruning conformers...\n", - "2022/04/01 03:18:22 PM | StochasticConformerGenerator | INFO: Iteration 12: kept 52 unique conformers\n", - "2022/04/01 03:18:22 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 13: embedding 100 initial guesses...\n", - "2022/04/01 03:18:22 PM | StochasticConformerGenerator | INFO: Iteration 13: optimizing initial guesses...\n", - "2022/04/01 03:18:25 PM | StochasticConformerGenerator | INFO: Iteration 13: pruning conformers...\n", - "2022/04/01 03:18:25 PM | StochasticConformerGenerator | INFO: Iteration 13: kept 59 unique conformers\n", - "2022/04/01 03:18:25 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 14: embedding 100 initial guesses...\n", - "2022/04/01 03:18:25 PM | StochasticConformerGenerator | INFO: Iteration 14: optimizing initial guesses...\n", - "2022/04/01 03:18:28 PM | StochasticConformerGenerator | INFO: Iteration 14: pruning conformers...\n", - "2022/04/01 03:18:28 PM | StochasticConformerGenerator | INFO: Iteration 14: kept 66 unique conformers\n", - "2022/04/01 03:18:28 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 15: embedding 100 initial guesses...\n", - "2022/04/01 03:18:28 PM | StochasticConformerGenerator | INFO: Iteration 15: optimizing initial guesses...\n", - "2022/04/01 03:18:31 PM | StochasticConformerGenerator | INFO: Iteration 15: pruning conformers...\n", - "2022/04/01 03:18:31 PM | StochasticConformerGenerator | INFO: Iteration 15: kept 74 unique conformers\n", - "2022/04/01 03:18:31 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 16: embedding 100 initial guesses...\n", - "2022/04/01 03:18:32 PM | StochasticConformerGenerator | INFO: Iteration 16: optimizing initial guesses...\n", - "2022/04/01 03:18:35 PM | StochasticConformerGenerator | INFO: Iteration 16: pruning conformers...\n", - "2022/04/01 03:18:35 PM | StochasticConformerGenerator | INFO: Iteration 16: kept 76 unique conformers\n", - "2022/04/01 03:18:35 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 17: embedding 100 initial guesses...\n", - "2022/04/01 03:18:35 PM | StochasticConformerGenerator | INFO: Iteration 17: optimizing initial guesses...\n", - "2022/04/01 03:18:38 PM | StochasticConformerGenerator | INFO: Iteration 17: pruning conformers...\n", - "2022/04/01 03:18:38 PM | StochasticConformerGenerator | INFO: Iteration 17: kept 72 unique conformers\n", - "2022/04/01 03:18:38 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 18: embedding 100 initial guesses...\n", - "2022/04/01 03:18:38 PM | StochasticConformerGenerator | INFO: Iteration 18: optimizing initial guesses...\n", - "2022/04/01 03:18:41 PM | StochasticConformerGenerator | INFO: Iteration 18: pruning conformers...\n", - "2022/04/01 03:18:41 PM | StochasticConformerGenerator | INFO: Iteration 18: kept 68 unique conformers\n", - "2022/04/01 03:18:41 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 19: embedding 100 initial guesses...\n", - "2022/04/01 03:18:41 PM | StochasticConformerGenerator | INFO: Iteration 19: optimizing initial guesses...\n", - "2022/04/01 03:18:44 PM | StochasticConformerGenerator | INFO: Iteration 19: pruning conformers...\n", - "2022/04/01 03:18:44 PM | StochasticConformerGenerator | INFO: Iteration 19: kept 65 unique conformers\n", - "2022/04/01 03:18:44 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 20: embedding 100 initial guesses...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2022/04/01 03:18:45 PM | StochasticConformerGenerator | INFO: Iteration 20: optimizing initial guesses...\n", - "2022/04/01 03:18:48 PM | StochasticConformerGenerator | INFO: Iteration 20: pruning conformers...\n", - "2022/04/01 03:18:48 PM | StochasticConformerGenerator | INFO: Iteration 20: kept 66 unique conformers\n", - "2022/04/01 03:18:48 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 21: embedding 100 initial guesses...\n", - "2022/04/01 03:18:48 PM | StochasticConformerGenerator | INFO: Iteration 21: optimizing initial guesses...\n", - "2022/04/01 03:18:51 PM | StochasticConformerGenerator | INFO: Iteration 21: pruning conformers...\n", - "2022/04/01 03:18:51 PM | StochasticConformerGenerator | INFO: Iteration 21: kept 65 unique conformers\n", - "2022/04/01 03:18:51 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 22: embedding 100 initial guesses...\n", - "2022/04/01 03:18:51 PM | StochasticConformerGenerator | INFO: Iteration 22: optimizing initial guesses...\n", - "2022/04/01 03:18:54 PM | StochasticConformerGenerator | INFO: Iteration 22: pruning conformers...\n", - "2022/04/01 03:18:54 PM | StochasticConformerGenerator | INFO: Iteration 22: kept 63 unique conformers\n", - "2022/04/01 03:18:54 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 23: embedding 100 initial guesses...\n", - "2022/04/01 03:18:54 PM | StochasticConformerGenerator | INFO: Iteration 23: optimizing initial guesses...\n", - "2022/04/01 03:18:58 PM | StochasticConformerGenerator | INFO: Iteration 23: pruning conformers...\n", - "2022/04/01 03:18:58 PM | StochasticConformerGenerator | INFO: Iteration 23: kept 63 unique conformers\n", - "2022/04/01 03:18:58 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 24: embedding 100 initial guesses...\n", - "2022/04/01 03:18:58 PM | StochasticConformerGenerator | INFO: Iteration 24: optimizing initial guesses...\n", - "2022/04/01 03:19:01 PM | StochasticConformerGenerator | INFO: Iteration 24: pruning conformers...\n", - "2022/04/01 03:19:01 PM | StochasticConformerGenerator | INFO: Iteration 24: kept 65 unique conformers\n", - "2022/04/01 03:19:01 PM | StochasticConformerGenerator | INFO: \n", - "Iteration 25: embedding 100 initial guesses...\n", - "2022/04/01 03:19:01 PM | StochasticConformerGenerator | INFO: Iteration 25: optimizing initial guesses...\n", - "2022/04/01 03:19:04 PM | StochasticConformerGenerator | INFO: Iteration 25: pruning conformers...\n", - "2022/04/01 03:19:04 PM | StochasticConformerGenerator | INFO: Iteration 25: kept 63 unique conformers\n", - "2022/04/01 03:19:04 PM | StochasticConformerGenerator | INFO: Iteration 25: stop crietria reached\n", - "\n", - "2022/04/01 03:19:04 PM | StochasticConformerGenerator | INFO: Calling CRESTPruner\n", - "2022/04/01 03:19:04 PM | StochasticConformerGenerator | INFO: Calling XTBOptimizer\n", - "2022/04/01 03:19:13 PM | StochasticConformerGenerator | INFO: Calling CRESTPruner\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "9 0.008233205857910033\n" - ] - } - ], - "source": [ - "embedder = ETKDGEmbedder(track_stats=True)\n", - "# embedder = GeoMolEmbedder(\"../rdmc/external/GeoMol/trained_models/both/\", dataset=\"drugs\", track_stats=True, temp_schedule=\"none\")\n", - "optimizer = XTBOptimizer()\n", - "pruner = TorsionPruner(max_chk_threshold=30)\n", - "metric = SCGMetric(metric=\"entropy\", window=5, threshold=0.005)\n", - "n_conformers_per_iter = 100\n", - "\n", - "scg = StochasticConformerGenerator(\n", - " smiles=smi,\n", - " config=\"normal\",\n", - " embedder=embedder,\n", - " min_iters=5\n", - ")\n", - "\n", - "unique_mol_data = scg(n_conformers_per_iter)\n", - "print(len(unique_mol_data), scg.metric.metric_history[-1])" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "a006a637bd1943c0b5db28b99366abe4", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "interactive(children=(IntSlider(value=0, description='confId', max=8), Output()), _dom_classes=('widget-intera…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - ".(confId)>" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mol = dict_to_mol(unique_mol_data)\n", - "interactive_conformer_viewer(mol)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test_smiles = [\n", - " \"C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O\",\n", - " \"CC[C@@H]1CCCC[C@@H]1C\",\n", - " \"CCCCCCCCCC\",\n", - " \"CC1=CCCC(C1/C=C/C(=O)C)(C)C\",\n", - " \"C[C@H]1CC[C@H](CC2=C1CC[C@@H]2C)C(C)(C)O\",\n", - " \"N[C@@H](Cc1ccccc1)C(=O)N[C@@H](Cc2ccccc2)C(O)=O\",\n", - " \"C1COCC(=O)N1C2=CC=C(C=C2)N3C[C@@H](OC3=O)CNC(=O)C4=CC=C(S4)Cl\",\n", - " \"CCCCCOC(=O)NC1=NC(=O)N(C=C1F)[C@H]2[C@@H]([C@@H]([C@H](O2)C)O)O\",\n", - " \"CCC(CC)O[C@@H]1C=C(C[C@@H]([C@H]1NC(=O)C)N)C(=O)OCC\",\n", - " \"C[C@]12CC[C@@H](CC1=CC[C@@H]3[C@@H]2CC[C@]4([C@H]3CC=C4C5=CN=CC=C5)C)O\",\n", - " \"C1CCN(C1)CCOC2=C3COC/C=C/COCC4=CC(=CC=C4)C5=NC(=NC=C5)NC(=C3)C=C2\",\n", - " \"C[C@@H]1OC[C@@H]2[C@@H](O1)[C@@H]([C@H]([C@@H](O2)O[C@H]3[C@H]4COC(=O)[C@@H]4[C@@H](C5=CC6=C(C=C35)OCO6)C7=CC(=C(C(=C7)OC)O)OC)O)O\",\n", - " \"CC(C)C[C@@H](C1=C(C(=C(C(=C1O)C=O)O)C=O)O)[C@]2(CC[C@@H]3[C@@H]2[C@H]4[C@H](C4(C)C)CC[C@@]3(C)O)C\",\n", - " \"C[C@H](N)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)O\",\n", - " \"CC1=C2[C@H](C(=O)[C@@]3([C@H](C[C@@H]4[C@]([C@H]3[C@@H]([C@@](C2(C)C)(C[C@@H]1OC(=O)[C@@H]([C@H](C5=CC=CC=C5)NC(=O)C6=CC=CC=C6)O)O)OC(=O)C7=CC=CC=C7)(CO4)OC(=O)C)O)C)OC(=O)C\",\n", - " \"C[C@@H]1C[C@H]2CC[C@H](O2)[C@@H](C(=O)O[C@H](C[C@@H]3CC[C@@H](O3)[C@H](C(=O)O[C@@H](C[C@H]4CC[C@H](O4)[C@@H](C(=O)O[C@H](C[C@@H]5CC[C@@H](O5)[C@H](C(=O)O1)C)C)C)C)C)C)C\",\n", - " \"CC[C@@]1(C[C@H]2C[C@@](C3=C(CCN(C2)C1)C4=CC=CC=C4N3)(C5=C(C=C6C(=C5)[C@]78CCN9[C@H]7[C@@](C=CC9)([C@H]([C@@]([C@@H]8N6C)(C(=O)OC)O)OC(=O)C)CC)OC)C(=O)OC)O\",\n", - " \"CC(C)[C@@H](C(=O)N1CC2(CC2)C[C@H]1C3=NC=C(N3)C4=CC5=C(C=C4)C6=C(C5(F)F)C=C(C=C6)C7=CC8=C(C=C7)N=C(N8)[C@@H]9[C@H]1CC[C@H](C1)N9C(=O)[C@H](C(C)C)NC(=O)OC)NC(=O)OC\",\n", - " \"C[C@@H]1CC[C@H]2C[C@@H](/C(=C/C=C/C=C/[C@H](C[C@H](C(=O)[C@@H]([C@@H](/C(=C/[C@H](C(=O)C[C@H](OC(=O)[C@@H]3CCCCN3C(=O)C(=O)[C@@]1(O2)O)[C@H](C)C[C@@H]4CC[C@H]([C@@H](C4)OC)OCCO)C)/C)O)OC)C)C)/C)OC\",\n", - " \"C[C@H]1[C@H]([C@@](C[C@@H](O1)O[C@@H]2[C@H]([C@@H]([C@H](O[C@H]2OC3=C4C=C5C=C3OC6=C(C=C(C=C6)[C@H]([C@H](C(=O)N[C@H](C(=O)N[C@H]5C(=O)N[C@@H]7C8=CC(=C(C=C8)O)C9=C(C=C(C=C9O)O)[C@H](NC(=O)[C@H]([C@@H](C1=CC(=C(O4)C=C1)Cl)O)NC7=O)C(=O)O)CC(=O)N)NC(=O)[C@@H](CC(C)C)NC)O)Cl)CO)O)O)(C)N)O\"\n", - "]\n", - "\n", - "import csv\n", - "\n", - "with open(\"./../rdmc/conformer_exps/organic_conf_gen/smiles.csv\", \"w\") as f:\n", - " writer = csv.writer(f, delimiter=\"\\n\")\n", - " writer.writerow(test_smiles)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python [conda env:rdmc_env] *", - "language": "python", - "name": "conda-env-rdmc_env-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/rdmc/conformer_generation/embedders.py b/rdmc/conformer_generation/embedders.py index 211eb3c2..ff9c548c 100644 --- a/rdmc/conformer_generation/embedders.py +++ b/rdmc/conformer_generation/embedders.py @@ -5,27 +5,27 @@ Modules for providing initial guess geometries """ +from pathlib import Path + from rdmc import RDKitMol -import os.path as osp -import yaml + import numpy as np from time import time - -try: - import torch - from torch_geometric.data import Batch -except ImportError: - pass - from .utils import * +# GeoMol relevant imports try: import torch - from rdmc.external.GeoMol.model.model import GeoMol - from rdmc.external.GeoMol.model.featurization import featurize_mol_from_smiles - from rdmc.external.GeoMol.model.inference import construct_conformers -except ImportError: + from geomol.model import GeoMol + from geomol.featurization import featurize_mol_from_smiles, from_data_list + from geomol.inference import construct_conformers + from geomol.utils import model_path as geomol_model_path + import yaml # only used to load GeoMol parameters +except ImportError as e: + GeoMol = None + print(e) print("No GeoMol installation detected. Skipping import...") + print("Please install the GeoMol fork at https://github.com/xiaoruiDong/GeoMol") class ConfGenEmbedder: @@ -58,8 +58,10 @@ def update_mol(self, smiles: str): # Copy the graph but remove conformers self.mol = self.mol.Copy(quickCopy=True) - def embed_conformers(self, - n_conformers: int): + def embed_conformers( + self, + n_conformers: int + ): """ Embed conformers according to the molecule graph. @@ -71,10 +73,11 @@ def embed_conformers(self, """ raise NotImplementedError - def update_stats(self, - n_trials: int, - time: float = 0. - ) -> dict: + def update_stats( + self, + n_trials: int, + time: float = 0. + ) -> dict: """ Update the statistics of the conformer generation. @@ -88,10 +91,12 @@ def update_stats(self, n_success = self.mol.GetNumConformers() self.n_success = n_success self.percent_success = n_success / n_trials * 100 - stats = {"iter": self.iter, - "time": time, - "n_success": self.n_success, - "percent_success": self.percent_success} + stats = { + "iter": self.iter, + "time": time, + "n_success": self.n_success, + "percent_success": self.percent_success + } self.stats.append(stats) return stats @@ -104,9 +109,11 @@ def write_mol_data(self): """ return mol_to_dict(self.mol, copy=False, iter=self.iter) - def __call__(self, - smiles: str, - n_conformers: int): + def __call__( + self, + smiles: str, + n_conformers: int + ): """ Embed conformers according to the molecule graph. @@ -137,28 +144,36 @@ class GeoMolEmbedder(ConfGenEmbedder): Embed conformers using GeoMol. Args: - trained_model_dir (str): Directory of the trained model. + trained_model_dir (str, optional): Directory of the trained model. If not provided, the models distributed with the package will be used. dataset (str, optional): Dataset used for training. Defaults to ``"drugs"``. temp_schedule (str, optional): Temperature schedule. Defaults to ``"linear"``. track_stats (bool, optional): Whether to track the statistics of the conformer generation. Defaults to ``False``. """ - def __init__(self, - trained_model_dir: str, - dataset: str = "drugs", - temp_schedule: str = "linear", - track_stats: bool = False): + def __init__( + self, + trained_model_dir: str = None, + dataset: str = "drugs", + temp_schedule: str = "linear", + track_stats: bool = False, + device: str = 'cpu', + ): + if GeoMol is None: + raise ImportError("No GeoMol installation detected. Please install the GeoMol fork at https://github.com/xiaoruiDong/GeoMol.") super(GeoMolEmbedder, self).__init__(track_stats) # TODO: add option of pre-pruning geometries using alpha values - # TODO: inverstigate option of changing "temperature" each iteration to sample diverse geometries + # TODO: investigate option of changing "temperature" each iteration to sample diverse geometries + self.device = device - with open(osp.join(trained_model_dir, "model_parameters.yml")) as f: + trained_model_dir = geomol_model_path / dataset if trained_model_dir is None else Path(trained_model_dir) + with open(trained_model_dir / "model_parameters.yml") as f: model_parameters = yaml.full_load(f) model = GeoMol(**model_parameters) - state_dict = torch.load(osp.join(trained_model_dir, "best_model.pt"), map_location=torch.device('cpu')) + state_dict = torch.load(trained_model_dir / "best_model.pt", map_location=torch.device(device)) model.load_state_dict(state_dict, strict=True) + model.to(self.device) model.eval() self.model = model self.tg_data = None @@ -166,8 +181,14 @@ def __init__(self, self.temp_schedule = temp_schedule self.dataset = dataset - def embed_conformers(self, - n_conformers: int): + def to(self, device: str): + self.device = device + self.model.to(device) + + def embed_conformers( + self, + n_conformers: int + ): """ Embed conformers according to the molecule graph. @@ -186,11 +207,11 @@ def embed_conformers(self, # featurize data and run GeoMol if self.tg_data is None: self.tg_data = featurize_mol_from_smiles(self.smiles, dataset=self.dataset) - data = Batch.from_data_list([self.tg_data]) # need to run this bc of dumb internal GeoMol processing + data = from_data_list([self.tg_data]).to(self.device) # need to run this bc of dumb internal GeoMol processing self.model(data, inference=True, n_model_confs=n_conformers) # process predictions - model_coords = construct_conformers(data, self.model).double().cpu().detach().numpy() + model_coords = construct_conformers(data, self.model, self.device).double().cpu().detach().numpy() split_model_coords = np.split(model_coords, n_conformers, axis=1) # package in mol and return diff --git a/rdmc/conformer_generation/optimizers.py b/rdmc/conformer_generation/optimizers.py index 09118d14..e90cf744 100644 --- a/rdmc/conformer_generation/optimizers.py +++ b/rdmc/conformer_generation/optimizers.py @@ -192,7 +192,7 @@ def optimize_conformers(self, positions = opt_mol.GetPositions() conf = new_mol.GetConformer(id=c_id) conf.SetPositions(positions) - energy = float(opt_mol.GetProp('total energy / Eh')) # * HARTREE_TO_KCAL_MOL # kcal/mol (TODO: check) + energy = props['total energy'] mol_data[c_id].update({"positions": positions, # issues if not all opts succeeded? "conf": conf, "energy": energy}) diff --git a/rdmc/external/logparser/base.py b/rdmc/external/logparser/base.py index 017593a7..a164be3d 100644 --- a/rdmc/external/logparser/base.py +++ b/rdmc/external/logparser/base.py @@ -19,7 +19,7 @@ try: from ipywidgets import interact, IntSlider, Dropdown, FloatLogSlider except ImportError: - pass + interact = None class BaseLog(object): @@ -270,6 +270,12 @@ def get_scf_energies(self, # sub2 stores the energies for subsequent jobs e.g., multiple sps if 'opt' in self.job_type or 'scan' in self.job_type: sub1 = scf_energies[:num_opt_geoms][self.get_converged_geom_idx()] + elif 'irc' in self.job_type: + # If taking corrector steps and job failed due to corrector fails + # There is one more energy value compared to the number of geometries + sub1 = scf_energies[: len(self.cclib_results.optstatus)][ + self.get_converged_geom_idx() + ] else: sub1 = scf_energies[self.get_converged_geom_idx()] if 'scan' not in self.job_type: @@ -700,6 +706,9 @@ def interact_opt(self, Returns: interact """ + if interact is None: + raise ImportError('interact is not installed. Please install it by `pip install ipywidgets`.') + mol = self.get_mol(converged=False, sanitize=sanitize, backend=backend) xyzs = self.get_xyzs(converged=False) sdfs = [mol.ToMolBlock(confId=i) for i in range(mol.GetNumConformers())] @@ -838,6 +847,9 @@ def view_freq(self, Returns: interact """ + if interact is None: + raise ImportError('interact is not installed. Please install it by `pip install ipywidgets`.') + xyz = self.get_xyzs(converged=True)[0] lines = xyz.splitlines() vib_xyz_list = lines[0:2] @@ -851,6 +863,9 @@ def interact_freq(self): """ Create a IPython interactive widget to investigate the frequency calculation. """ + if interact is None: + raise ImportError('interact is not installed. Please install it by `pip install ipywidgets`.') + dropdown = Dropdown( options=self.freqs, value=self.freqs[0], @@ -1018,6 +1033,9 @@ def interact_irc(self, Returns: interact """ + if interact is None: + raise ImportError('interact is not installed. Please install it by `pip install ipywidgets`.') + mol = self._process_irc_mol(sanitize=sanitize, converged=converged, backend=backend, bothway=bothway) sdfs = [mol.ToMolBlock(confId=i) for i in range(mol.GetNumConformers())] xyzs = self.get_xyzs(converged=converged) @@ -1188,6 +1206,9 @@ def interact_scan(self, Returns: interact """ + if interact is None: + raise ImportError('interact is not installed. Please install it by `pip install ipywidgets`.') + mol = self._process_scan_mol(align_scan=align_scan, align_frag_idx=align_frag_idx, sanitize=sanitize, diff --git a/rdmc/external/logparser/gaussian.py b/rdmc/external/logparser/gaussian.py index 04b639c9..082ccaa2 100644 --- a/rdmc/external/logparser/gaussian.py +++ b/rdmc/external/logparser/gaussian.py @@ -116,7 +116,7 @@ def _update_schemes(self): scheme_str = ''.join(line.strip('\n')[1:] for line in scheme_lines[1:]) try: - self._schemes = scheme_to_dict(scheme_str) + self._schemes = scheme_to_dict(scheme_str.lower()) except Exception as e: print(f'Calculation scheme parser encounters a problem. \nGot: {e}\n' f'Feel free to raise an issue about this error at RDMC\'s Github Repo.') diff --git a/rdmc/external/xtb_tools/opt.py b/rdmc/external/xtb_tools/opt.py index e0235872..df02ae9c 100644 --- a/rdmc/external/xtb_tools/opt.py +++ b/rdmc/external/xtb_tools/opt.py @@ -7,7 +7,7 @@ """ import json -import os +from pathlib import Path from shutil import rmtree import subprocess import tempfile @@ -25,7 +25,7 @@ TS_PATH_INP, ) -XTB_INPUT_FILE = os.path.join(UTILS_PATH, "xtb.inp") +XTB_INPUT_FILE = Path(UTILS_PATH) / "xtb.inp" def read_xtb_json(json_file, mol): @@ -48,6 +48,7 @@ def read_xtb_json(json_file, mol): atoms = [ATOMNUM_TO_ELEM[atom.GetAtomicNum()] for atom in mol.GetAtoms()] atomic_energy = sum([ATOM_ENERGIES_XTB[atom] for atom in atoms]) props = { + "total energy": data["total energy"], "E_form": data["total energy"] - atomic_energy, # already in Hartree "E_homo": E_homo * EV_TO_HARTREE, "E_lumo": E_lumo * EV_TO_HARTREE, @@ -130,19 +131,20 @@ def run_xtb_calc(mol, confId=0, job="", return_optmol=False, method="gfn2", leve method = "--" + method input_file = TS_PATH_INP if job == "--path" else "" - temp_dir = os.path.abspath(save_dir) if save_dir else tempfile.mkdtemp() - logfile = os.path.join(temp_dir, "xtb.log") - xtb_out = os.path.join(temp_dir, "xtbout.json") - xtb_wbo = os.path.join(temp_dir, "wbo") - xtb_g98 = os.path.join(temp_dir, "g98.out") - xtb_ts = os.path.join(temp_dir, "xtbpath_ts.xyz") + temp_dir = Path(save_dir).absolute() if save_dir else Path(tempfile.mkdtemp()).absolute() + logfile = temp_dir / "xtb.log" + xtb_out = temp_dir / "xtbout.json" + xtb_wbo = temp_dir / "wbo" + xtb_g98 = temp_dir / "g98.out" + xtb_ts = temp_dir / "xtbpath_ts.xyz" - sdf_path = os.path.join(temp_dir, "mol.sdf") - mol.ToSDFFile(sdf_path, confId=confId) + sdf_path = temp_dir / "mol.sdf" + mol.ToSDFFile(str(sdf_path), confId=confId) + update_rdkit_mol_format(sdf_path) command = [ XTB_BINARY, - sdf_path, + str(sdf_path), xtb_command, method, level, @@ -156,9 +158,10 @@ def run_xtb_calc(mol, confId=0, job="", return_optmol=False, method="gfn2", leve ] if job == "--path": - p_sdf_path = os.path.join(temp_dir, "pmol.sdf") - pmol.ToSDFFile(p_sdf_path, confId=pconfId) - command.insert(3, p_sdf_path) + p_sdf_path = temp_dir / "pmol.sdf" + pmol.ToSDFFile(str(p_sdf_path), confId=pconfId) + update_rdkit_mol_format(sdf_path) + command.insert(3, str(p_sdf_path)) with open(logfile, "w") as f: xtb_run = subprocess.run( @@ -183,7 +186,7 @@ def run_xtb_calc(mol, confId=0, job="", return_optmol=False, method="gfn2", leve [line for line in log_data if "GEOMETRY OPTIMIZATION CONVERGED AFTER" in line][-1].split()[-3]) except IndexError: # logfile doesn't exist for [H] - if not os.path.exists(os.path.join(temp_dir, "xtbopt.sdf")): + if not (temp_dir / "xtbopt.sdf").exists(): not save_dir and rmtree(temp_dir) raise ValueError(f"xTB calculation failed.") else: @@ -200,7 +203,7 @@ def run_xtb_calc(mol, confId=0, job="", return_optmol=False, method="gfn2", leve if job == "--path": try: - opt_mol = RDKitMol.FromFile(os.path.join(temp_dir, "xtbpath_ts.xyz")) + opt_mol = RDKitMol.FromFile(str(temp_dir / "xtbpath_ts.xyz")) except FileNotFoundError: return (props, None) if return_optmol else props # props.update(read_xtb_json(xtb_out, opt_mol)) @@ -208,13 +211,51 @@ def run_xtb_calc(mol, confId=0, job="", return_optmol=False, method="gfn2", leve return (props, opt_mol) if return_optmol else props if method == "--gff": - opt_mol = RDKitMol.FromFile(os.path.join(temp_dir, "xtbopt.sdf"))[0] + opt_mol = RDKitMol.FromFile(str(temp_dir / "xtbopt.sdf"))[0] + try: + with open(temp_dir / "gfnff_lists.json", "r") as f: + props["total energy"] = json.load(f)["total energy"] + except FileNotFoundError: + props["total energy"] = 0. not save_dir and rmtree(temp_dir) return (props, opt_mol) if return_optmol else props props.update(read_xtb_json(xtb_out, mol)) if return_optmol: - opt_mol = RDKitMol.FromFile(os.path.join(temp_dir, "xtbopt.sdf"))[0] + opt_mol = RDKitMol.FromFile(str(temp_dir / "xtbopt.sdf"))[0] props.update({"wbo": get_wbo(xtb_wbo)}) not save_dir and rmtree(temp_dir) return (props, opt_mol) if return_optmol else props + + +def update_rdkit_mol_format(path): + """ + After xTB changes its parser backend to mctc-lib, it stops being able to read Mol/SDF + files generated from RDKit. This is due to, in the bond property section, mctc-lib + looks for 7 elements while RDKit only generates 4. As xTB doesn't really need to know + the extra information, we can simply assign them to 0s. This patch function helps + append the missing 0s. + """ + + with open(path, "r") as f: + lines = f.readlines() + + n_atoms = int(lines[3].split()[0]) + n_bonds = int(lines[3].split()[1]) + + # Check if the file needs to be fixed, only check once + n_bond_props = len(lines[4 + n_atoms].split()) + if n_bond_props > 7: + raise ValueError("This SDF/Mol file is abnormal, please double check your file") + elif n_bond_props == 7: # No need to fix + return + else: + n_0s = 7 - n_bond_props + + new_lines = lines[:4 + n_atoms] + [ + line[:-1] + " 0" * n_0s + "\n" + for line in lines[4 + n_atoms: 4 + n_atoms + n_bonds] + ] + lines[4 + n_atoms + n_bonds:] + + with open(path, "w") as f: + f.writelines(new_lines) diff --git a/rdmc/view.py b/rdmc/view.py index 944bb2b0..63829e02 100644 --- a/rdmc/view.py +++ b/rdmc/view.py @@ -11,7 +11,10 @@ from rdmc.mol import RDKitMol from rdmc.ts import clean_ts -from ipywidgets import interact, IntSlider +try: + from ipywidgets import interact, IntSlider +except ImportError: + interact = None def mol_viewer(obj: str, @@ -162,6 +165,9 @@ def interactive_conformer_viewer(mol, **kwargs): Returns: py3Dmol.view: The molecule viewer with slider to view different conformers. """ + if not interact: + raise ImportError("This function requires ipywidgets to be installed. You can install it by pip or conda") + if isinstance(mol, list) or isinstance(mol, tuple): def viewer(confId): return mol_viewer(obj=mol[confId], confId=0, **kwargs)