diff --git a/ipython/Conformer Generation Workflow.ipynb b/ipython/Conformer Generation Workflow.ipynb
new file mode 100644
index 00000000..c10c1b30
--- /dev/null
+++ b/ipython/Conformer Generation Workflow.ipynb
@@ -0,0 +1,561 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Stable species conformer search\n",
+ "\n",
+ "Leverage ETKDG and GeoMol as 3D geometry embedder for stochastic conformer generation\n",
+ "\n",
+ "The idea is to have modular methods for each step, which are currently hardcoded. This includes:\n",
+ "- initial conformer embedding (ETKDG, GeoMol)\n",
+ "- optimization/energy (MMFF, UFF, GFN-FF, GFN2-xTB)\n",
+ "- pruning (torsion fingerprints, CREGEN)\n",
+ "- convergence metrics (conformational entropy/partition function)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "3a45eb9b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from rdmc.conformer_generation.embedders import *\n",
+ "from rdmc.conformer_generation.optimizers import *\n",
+ "from rdmc.conformer_generation.pruners import *\n",
+ "from rdmc.conformer_generation.metrics import *\n",
+ "from rdmc.conformer_generation.generators import StochasticConformerGenerator\n",
+ "\n",
+ "from rdmc import RDKitMol\n",
+ "from rdmc.view import mol_viewer, interactive_conformer_viewer, conformer_viewer\n",
+ "\n",
+ "T = 298 # K\n",
+ "R = 0.0019872 # kcal/(K*mol)\n",
+ "HARTREE_TO_KCAL_MOL = 627.503\n",
+ "\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e0216632",
+ "metadata": {},
+ "source": [
+ "## 1. Test embedder"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "79826cc2",
+ "metadata": {},
+ "source": [
+ "Create the 3D geometry for the molecule specified by the SMILES (`smi`). Currently it has no 3D conformer embedded, therefore the visualization returns a 2D illustration of the molecule"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "61a6eb92",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/3dmoljs_load.v0": "
\n
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
\n
\n",
+ "text/html": [
+ "\n",
+ "
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
\n",
+ "
\n",
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "smi = \"[C:1]([C@@:2]([O:3][H:12])([C:4]([N:5]([C:6](=[O:7])[H:16])[H:15])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]\" # example 1\n",
+ "smi = \"CN1C2=C(C=C(C=C2)Cl)C(=NCC1=O)C3=CC=CC=C3\" # example 2\n",
+ "\n",
+ "mol_viewer(RDKitMol.FromSmiles(smi))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7b022193",
+ "metadata": {},
+ "source": [
+ "### 1.1 ETKDG embedder"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "886ca5f8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "n_confs = 10 # Number of conformers to create\n",
+ "\n",
+ "embedder = ETKDGEmbedder() # Initialize conformer embedder\n",
+ "unique_mol_data = embedder(smi, n_confs) # Embed molecule 3D geometries with ETKDG\n",
+ "mol = dict_to_mol(unique_mol_data) # Convert raw data to a molecule object"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "f8b9af06",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/3dmoljs_load.v0": "\n
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
\n
\n",
+ "text/html": [
+ "\n",
+ "
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
\n",
+ "
\n",
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "visualize_conf_id = 2\n",
+ "\n",
+ "mol_viewer(mol, confId=visualize_conf_id) # visualize the molecule"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7424c20e",
+ "metadata": {},
+ "source": [
+ "### 1.2 GeoMol Conformer"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "49022146",
+ "metadata": {},
+ "source": [
+ "Supported options:\n",
+ "- `dataset`: `drug` or `qm9`\n",
+ "- `device`: `cpu`, or `cuda` (or specific cuda device like `cuda:0`)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "9431fbc8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "n_confs = 10 # Number of conformers to create\n",
+ "dataset = \"drugs\"\n",
+ "device = \"cuda\"\n",
+ "\n",
+ "embedder = GeoMolEmbedder(dataset=dataset, track_stats=True, temp_schedule=\"none\", device=device) # Initialize conformer embedder\n",
+ "unique_mol_data = embedder(smi, n_confs) # Embed molecule 3D geometries with ETKDG\n",
+ "mol = dict_to_mol(unique_mol_data) # Convert raw data to a molecule object"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "9ac12a75",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/3dmoljs_load.v0": "\n
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
\n
\n",
+ "text/html": [
+ "\n",
+ "
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
\n",
+ "
\n",
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "visualize_conf_id = 2\n",
+ "\n",
+ "mol_viewer(mol, confId=visualize_conf_id) # visualize the molecule"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "46be5c44",
+ "metadata": {},
+ "source": [
+ "## 2. Create a conformer generation workflow"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a35cd583",
+ "metadata": {},
+ "source": [
+ "### 2.1 Choose each components\n",
+ "- embedder\n",
+ "- optimizer\n",
+ "- pruner\n",
+ "- metric\n",
+ "\n",
+ "you can also use default config by providing `config` to the generator. You can open a new cell and use `StochasticConformerGenerator.set_config?` to check what is the default configuration"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "92bfdbc4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# embedder = ETKDGEmbedder(track_stats=True)\n",
+ "embedder = GeoMolEmbedder(dataset=\"drugs\", track_stats=True, temp_schedule=\"none\", device=\"cpu\") # Initialize conformer embedder\n",
+ "optimizer = XTBOptimizer()\n",
+ "pruner = TorsionPruner(max_chk_threshold=30)\n",
+ "metric = SCGMetric(metric=\"entropy\", window=5, threshold=0.005)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0522a7ff",
+ "metadata": {},
+ "source": [
+ "### 2.2 Conformer generation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "9f099b43",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/3dmoljs_load.v0": "\n
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
\n
\n",
+ "text/html": [
+ "\n",
+ "
3Dmol.js failed to load for some reason. Please check your browser console for error messages.
\n",
+ "
\n",
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "smi = \"CN1C2=C(C=C(C=C2)Cl)C(=NCC1=O)C3=CC=CC=C3\"\n",
+ "\n",
+ "mol_viewer(RDKitMol.FromSmiles(smi))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "e72b2651",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024/03/14 04:33:34 PM | StochasticConformerGenerator | INFO: Generating conformers for CN1C2=C(C=C(C=C2)Cl)C(=NCC1=O)C3=CC=CC=C3\n",
+ "2024/03/14 04:33:34 PM | StochasticConformerGenerator | INFO: \n",
+ "Iteration 1: embedding 100 initial guesses...\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024/03/14 04:33:35 PM | StochasticConformerGenerator | INFO: Iteration 1: optimizing initial guesses...\n",
+ "2024/03/14 04:33:39 PM | StochasticConformerGenerator | INFO: Iteration 1: pruning conformers...\n",
+ "2024/03/14 04:33:40 PM | StochasticConformerGenerator | INFO: Iteration 1: kept 12 unique conformers\n",
+ "2024/03/14 04:33:40 PM | StochasticConformerGenerator | INFO: \n",
+ "Iteration 2: embedding 100 initial guesses...\n",
+ "2024/03/14 04:33:40 PM | StochasticConformerGenerator | INFO: Iteration 2: optimizing initial guesses...\n",
+ "2024/03/14 04:33:45 PM | StochasticConformerGenerator | INFO: Iteration 2: pruning conformers...\n",
+ "2024/03/14 04:33:45 PM | StochasticConformerGenerator | INFO: Iteration 2: kept 12 unique conformers\n",
+ "2024/03/14 04:33:45 PM | StochasticConformerGenerator | INFO: Iteration 2: stop crietria reached\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of conformers: 12\n",
+ "Metric: 4.938e-03\n"
+ ]
+ }
+ ],
+ "source": [
+ "n_conformers_per_iter = 100\n",
+ "min_iters = 2\n",
+ "max_iters = 5\n",
+ "\n",
+ "scg = StochasticConformerGenerator(\n",
+ " smiles=smi,\n",
+ " embedder=embedder,\n",
+ " optimizer=optimizer,\n",
+ " pruner=pruner,\n",
+ " metric=metric,\n",
+ " min_iters=min_iters,\n",
+ " max_iters=max_iters,\n",
+ ")\n",
+ "\n",
+ "unique_mol_data = scg(n_conformers_per_iter)\n",
+ "print(\n",
+ " f\"Number of conformers: {len(unique_mol_data)}\\n\"\n",
+ " f\"Metric: {scg.metric.metric_history[-1]:.3e}\"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "GeoMol",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/ipython/Generate Atommapped SMILES.ipynb b/ipython/Generate Atommapped SMILES.ipynb
index 83e9f39c..1572a373 100644
--- a/ipython/Generate Atommapped SMILES.ipynb
+++ b/ipython/Generate Atommapped SMILES.ipynb
@@ -838,7 +838,7 @@
"source": [
"new_rxn = Reaction(r_complex, p_complex)\n",
"display(new_rxn)\n",
- "print(rxn.to_smiles())"
+ "print(new_rxn.to_smiles())"
]
},
{
@@ -870,7 +870,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.12"
+ "version": "3.9.18"
},
"vscode": {
"interpreter": {
diff --git a/ipython/stochastic_conf_pipeline_LP.ipynb b/ipython/stochastic_conf_pipeline_LP.ipynb
deleted file mode 100644
index 3b1f782d..00000000
--- a/ipython/stochastic_conf_pipeline_LP.ipynb
+++ /dev/null
@@ -1,327 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Stable species conformer search\n",
- "Leverage ETKDG for stochastic conformer generation\n",
- "\n",
- "Use this as a base for ML conformer generation\n",
- "\n",
- "The idea is to have modular methods for each step, which are currently hardcoded. This includes:\n",
- "- initial conformer embedding (ETKDG, GeoMol)\n",
- "- optimization/energy (MMFF, UFF, GFN-FF, GFN2-xTB)\n",
- "- pruning (torsion fingerprints, CREGEN)\n",
- "- convergence metrics (conformational entropy/partition function)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "from rdmc.conformer_generation.embedders import *\n",
- "from rdmc.conformer_generation.optimizers import *\n",
- "from rdmc.conformer_generation.pruners import *\n",
- "from rdmc.conformer_generation.metrics import *\n",
- "from rdmc.conformer_generation.generators import StochasticConformerGenerator\n",
- "\n",
- "from rdmc.view import mol_viewer, interactive_conformer_viewer, conformer_viewer\n",
- "\n",
- "T = 298 # K\n",
- "R = 0.0019872 # kcal/(K*mol)\n",
- "HARTREE_TO_KCAL_MOL = 627.503"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "from rdkit import Chem\n",
- "\n",
- "smi = \"[C:1]([C@@:2]([O:3][H:12])([C:4]([N:5]([C:6](=[O:7])[H:16])[H:15])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2022/04/01 03:17:42 PM | StochasticConformerGenerator | INFO: Config specified: using default settings for normal config\n",
- "2022/04/01 03:17:42 PM | StochasticConformerGenerator | INFO: Generating conformers for [C:1]([C@@:2]([O:3][H:12])([C:4]([N:5]([C:6](=[O:7])[H:16])[H:15])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]\n",
- "2022/04/01 03:17:42 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 1: embedding 100 initial guesses...\n",
- "2022/04/01 03:17:42 PM | StochasticConformerGenerator | INFO: Iteration 1: optimizing initial guesses...\n",
- "2022/04/01 03:17:45 PM | StochasticConformerGenerator | INFO: Iteration 1: pruning conformers...\n",
- "2022/04/01 03:17:45 PM | StochasticConformerGenerator | INFO: Iteration 1: kept 9 unique conformers\n",
- "2022/04/01 03:17:45 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 2: embedding 100 initial guesses...\n",
- "2022/04/01 03:17:46 PM | StochasticConformerGenerator | INFO: Iteration 2: optimizing initial guesses...\n",
- "2022/04/01 03:17:49 PM | StochasticConformerGenerator | INFO: Iteration 2: pruning conformers...\n",
- "2022/04/01 03:17:49 PM | StochasticConformerGenerator | INFO: Iteration 2: kept 18 unique conformers\n",
- "2022/04/01 03:17:49 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 3: embedding 100 initial guesses...\n",
- "2022/04/01 03:17:49 PM | StochasticConformerGenerator | INFO: Iteration 3: optimizing initial guesses...\n",
- "2022/04/01 03:17:52 PM | StochasticConformerGenerator | INFO: Iteration 3: pruning conformers...\n",
- "2022/04/01 03:17:52 PM | StochasticConformerGenerator | INFO: Iteration 3: kept 27 unique conformers\n",
- "2022/04/01 03:17:52 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 4: embedding 100 initial guesses...\n",
- "2022/04/01 03:17:52 PM | StochasticConformerGenerator | INFO: Iteration 4: optimizing initial guesses...\n",
- "2022/04/01 03:17:55 PM | StochasticConformerGenerator | INFO: Iteration 4: pruning conformers...\n",
- "2022/04/01 03:17:55 PM | StochasticConformerGenerator | INFO: Iteration 4: kept 35 unique conformers\n",
- "2022/04/01 03:17:55 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 5: embedding 100 initial guesses...\n",
- "2022/04/01 03:17:55 PM | StochasticConformerGenerator | INFO: Iteration 5: optimizing initial guesses...\n",
- "2022/04/01 03:17:59 PM | StochasticConformerGenerator | INFO: Iteration 5: pruning conformers...\n",
- "2022/04/01 03:17:59 PM | StochasticConformerGenerator | INFO: Iteration 5: kept 43 unique conformers\n",
- "2022/04/01 03:17:59 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 6: embedding 100 initial guesses...\n",
- "2022/04/01 03:17:59 PM | StochasticConformerGenerator | INFO: Iteration 6: optimizing initial guesses...\n",
- "2022/04/01 03:18:02 PM | StochasticConformerGenerator | INFO: Iteration 6: pruning conformers...\n",
- "2022/04/01 03:18:02 PM | StochasticConformerGenerator | INFO: Iteration 6: kept 44 unique conformers\n",
- "2022/04/01 03:18:02 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 7: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:02 PM | StochasticConformerGenerator | INFO: Iteration 7: optimizing initial guesses...\n",
- "2022/04/01 03:18:05 PM | StochasticConformerGenerator | INFO: Iteration 7: pruning conformers...\n",
- "2022/04/01 03:18:05 PM | StochasticConformerGenerator | INFO: Iteration 7: kept 46 unique conformers\n",
- "2022/04/01 03:18:05 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 8: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:05 PM | StochasticConformerGenerator | INFO: Iteration 8: optimizing initial guesses...\n",
- "2022/04/01 03:18:08 PM | StochasticConformerGenerator | INFO: Iteration 8: pruning conformers...\n",
- "2022/04/01 03:18:08 PM | StochasticConformerGenerator | INFO: Iteration 8: kept 52 unique conformers\n",
- "2022/04/01 03:18:08 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 9: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:09 PM | StochasticConformerGenerator | INFO: Iteration 9: optimizing initial guesses...\n",
- "2022/04/01 03:18:12 PM | StochasticConformerGenerator | INFO: Iteration 9: pruning conformers...\n",
- "2022/04/01 03:18:12 PM | StochasticConformerGenerator | INFO: Iteration 9: kept 50 unique conformers\n",
- "2022/04/01 03:18:12 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 10: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:12 PM | StochasticConformerGenerator | INFO: Iteration 10: optimizing initial guesses...\n",
- "2022/04/01 03:18:15 PM | StochasticConformerGenerator | INFO: Iteration 10: pruning conformers...\n",
- "2022/04/01 03:18:15 PM | StochasticConformerGenerator | INFO: Iteration 10: kept 49 unique conformers\n",
- "2022/04/01 03:18:15 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 11: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:15 PM | StochasticConformerGenerator | INFO: Iteration 11: optimizing initial guesses...\n",
- "2022/04/01 03:18:18 PM | StochasticConformerGenerator | INFO: Iteration 11: pruning conformers...\n",
- "2022/04/01 03:18:18 PM | StochasticConformerGenerator | INFO: Iteration 11: kept 54 unique conformers\n",
- "2022/04/01 03:18:18 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 12: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:18 PM | StochasticConformerGenerator | INFO: Iteration 12: optimizing initial guesses...\n",
- "2022/04/01 03:18:21 PM | StochasticConformerGenerator | INFO: Iteration 12: pruning conformers...\n",
- "2022/04/01 03:18:22 PM | StochasticConformerGenerator | INFO: Iteration 12: kept 52 unique conformers\n",
- "2022/04/01 03:18:22 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 13: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:22 PM | StochasticConformerGenerator | INFO: Iteration 13: optimizing initial guesses...\n",
- "2022/04/01 03:18:25 PM | StochasticConformerGenerator | INFO: Iteration 13: pruning conformers...\n",
- "2022/04/01 03:18:25 PM | StochasticConformerGenerator | INFO: Iteration 13: kept 59 unique conformers\n",
- "2022/04/01 03:18:25 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 14: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:25 PM | StochasticConformerGenerator | INFO: Iteration 14: optimizing initial guesses...\n",
- "2022/04/01 03:18:28 PM | StochasticConformerGenerator | INFO: Iteration 14: pruning conformers...\n",
- "2022/04/01 03:18:28 PM | StochasticConformerGenerator | INFO: Iteration 14: kept 66 unique conformers\n",
- "2022/04/01 03:18:28 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 15: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:28 PM | StochasticConformerGenerator | INFO: Iteration 15: optimizing initial guesses...\n",
- "2022/04/01 03:18:31 PM | StochasticConformerGenerator | INFO: Iteration 15: pruning conformers...\n",
- "2022/04/01 03:18:31 PM | StochasticConformerGenerator | INFO: Iteration 15: kept 74 unique conformers\n",
- "2022/04/01 03:18:31 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 16: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:32 PM | StochasticConformerGenerator | INFO: Iteration 16: optimizing initial guesses...\n",
- "2022/04/01 03:18:35 PM | StochasticConformerGenerator | INFO: Iteration 16: pruning conformers...\n",
- "2022/04/01 03:18:35 PM | StochasticConformerGenerator | INFO: Iteration 16: kept 76 unique conformers\n",
- "2022/04/01 03:18:35 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 17: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:35 PM | StochasticConformerGenerator | INFO: Iteration 17: optimizing initial guesses...\n",
- "2022/04/01 03:18:38 PM | StochasticConformerGenerator | INFO: Iteration 17: pruning conformers...\n",
- "2022/04/01 03:18:38 PM | StochasticConformerGenerator | INFO: Iteration 17: kept 72 unique conformers\n",
- "2022/04/01 03:18:38 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 18: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:38 PM | StochasticConformerGenerator | INFO: Iteration 18: optimizing initial guesses...\n",
- "2022/04/01 03:18:41 PM | StochasticConformerGenerator | INFO: Iteration 18: pruning conformers...\n",
- "2022/04/01 03:18:41 PM | StochasticConformerGenerator | INFO: Iteration 18: kept 68 unique conformers\n",
- "2022/04/01 03:18:41 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 19: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:41 PM | StochasticConformerGenerator | INFO: Iteration 19: optimizing initial guesses...\n",
- "2022/04/01 03:18:44 PM | StochasticConformerGenerator | INFO: Iteration 19: pruning conformers...\n",
- "2022/04/01 03:18:44 PM | StochasticConformerGenerator | INFO: Iteration 19: kept 65 unique conformers\n",
- "2022/04/01 03:18:44 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 20: embedding 100 initial guesses...\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2022/04/01 03:18:45 PM | StochasticConformerGenerator | INFO: Iteration 20: optimizing initial guesses...\n",
- "2022/04/01 03:18:48 PM | StochasticConformerGenerator | INFO: Iteration 20: pruning conformers...\n",
- "2022/04/01 03:18:48 PM | StochasticConformerGenerator | INFO: Iteration 20: kept 66 unique conformers\n",
- "2022/04/01 03:18:48 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 21: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:48 PM | StochasticConformerGenerator | INFO: Iteration 21: optimizing initial guesses...\n",
- "2022/04/01 03:18:51 PM | StochasticConformerGenerator | INFO: Iteration 21: pruning conformers...\n",
- "2022/04/01 03:18:51 PM | StochasticConformerGenerator | INFO: Iteration 21: kept 65 unique conformers\n",
- "2022/04/01 03:18:51 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 22: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:51 PM | StochasticConformerGenerator | INFO: Iteration 22: optimizing initial guesses...\n",
- "2022/04/01 03:18:54 PM | StochasticConformerGenerator | INFO: Iteration 22: pruning conformers...\n",
- "2022/04/01 03:18:54 PM | StochasticConformerGenerator | INFO: Iteration 22: kept 63 unique conformers\n",
- "2022/04/01 03:18:54 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 23: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:54 PM | StochasticConformerGenerator | INFO: Iteration 23: optimizing initial guesses...\n",
- "2022/04/01 03:18:58 PM | StochasticConformerGenerator | INFO: Iteration 23: pruning conformers...\n",
- "2022/04/01 03:18:58 PM | StochasticConformerGenerator | INFO: Iteration 23: kept 63 unique conformers\n",
- "2022/04/01 03:18:58 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 24: embedding 100 initial guesses...\n",
- "2022/04/01 03:18:58 PM | StochasticConformerGenerator | INFO: Iteration 24: optimizing initial guesses...\n",
- "2022/04/01 03:19:01 PM | StochasticConformerGenerator | INFO: Iteration 24: pruning conformers...\n",
- "2022/04/01 03:19:01 PM | StochasticConformerGenerator | INFO: Iteration 24: kept 65 unique conformers\n",
- "2022/04/01 03:19:01 PM | StochasticConformerGenerator | INFO: \n",
- "Iteration 25: embedding 100 initial guesses...\n",
- "2022/04/01 03:19:01 PM | StochasticConformerGenerator | INFO: Iteration 25: optimizing initial guesses...\n",
- "2022/04/01 03:19:04 PM | StochasticConformerGenerator | INFO: Iteration 25: pruning conformers...\n",
- "2022/04/01 03:19:04 PM | StochasticConformerGenerator | INFO: Iteration 25: kept 63 unique conformers\n",
- "2022/04/01 03:19:04 PM | StochasticConformerGenerator | INFO: Iteration 25: stop crietria reached\n",
- "\n",
- "2022/04/01 03:19:04 PM | StochasticConformerGenerator | INFO: Calling CRESTPruner\n",
- "2022/04/01 03:19:04 PM | StochasticConformerGenerator | INFO: Calling XTBOptimizer\n",
- "2022/04/01 03:19:13 PM | StochasticConformerGenerator | INFO: Calling CRESTPruner\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "9 0.008233205857910033\n"
- ]
- }
- ],
- "source": [
- "embedder = ETKDGEmbedder(track_stats=True)\n",
- "# embedder = GeoMolEmbedder(\"../rdmc/external/GeoMol/trained_models/both/\", dataset=\"drugs\", track_stats=True, temp_schedule=\"none\")\n",
- "optimizer = XTBOptimizer()\n",
- "pruner = TorsionPruner(max_chk_threshold=30)\n",
- "metric = SCGMetric(metric=\"entropy\", window=5, threshold=0.005)\n",
- "n_conformers_per_iter = 100\n",
- "\n",
- "scg = StochasticConformerGenerator(\n",
- " smiles=smi,\n",
- " config=\"normal\",\n",
- " embedder=embedder,\n",
- " min_iters=5\n",
- ")\n",
- "\n",
- "unique_mol_data = scg(n_conformers_per_iter)\n",
- "print(len(unique_mol_data), scg.metric.metric_history[-1])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "a006a637bd1943c0b5db28b99366abe4",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "interactive(children=(IntSlider(value=0, description='confId', max=8), Output()), _dom_classes=('widget-intera…"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- ".(confId)>"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "mol = dict_to_mol(unique_mol_data)\n",
- "interactive_conformer_viewer(mol)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "test_smiles = [\n",
- " \"C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O\",\n",
- " \"CC[C@@H]1CCCC[C@@H]1C\",\n",
- " \"CCCCCCCCCC\",\n",
- " \"CC1=CCCC(C1/C=C/C(=O)C)(C)C\",\n",
- " \"C[C@H]1CC[C@H](CC2=C1CC[C@@H]2C)C(C)(C)O\",\n",
- " \"N[C@@H](Cc1ccccc1)C(=O)N[C@@H](Cc2ccccc2)C(O)=O\",\n",
- " \"C1COCC(=O)N1C2=CC=C(C=C2)N3C[C@@H](OC3=O)CNC(=O)C4=CC=C(S4)Cl\",\n",
- " \"CCCCCOC(=O)NC1=NC(=O)N(C=C1F)[C@H]2[C@@H]([C@@H]([C@H](O2)C)O)O\",\n",
- " \"CCC(CC)O[C@@H]1C=C(C[C@@H]([C@H]1NC(=O)C)N)C(=O)OCC\",\n",
- " \"C[C@]12CC[C@@H](CC1=CC[C@@H]3[C@@H]2CC[C@]4([C@H]3CC=C4C5=CN=CC=C5)C)O\",\n",
- " \"C1CCN(C1)CCOC2=C3COC/C=C/COCC4=CC(=CC=C4)C5=NC(=NC=C5)NC(=C3)C=C2\",\n",
- " \"C[C@@H]1OC[C@@H]2[C@@H](O1)[C@@H]([C@H]([C@@H](O2)O[C@H]3[C@H]4COC(=O)[C@@H]4[C@@H](C5=CC6=C(C=C35)OCO6)C7=CC(=C(C(=C7)OC)O)OC)O)O\",\n",
- " \"CC(C)C[C@@H](C1=C(C(=C(C(=C1O)C=O)O)C=O)O)[C@]2(CC[C@@H]3[C@@H]2[C@H]4[C@H](C4(C)C)CC[C@@]3(C)O)C\",\n",
- " \"C[C@H](N)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)N[C@@H](C)C(=O)O\",\n",
- " \"CC1=C2[C@H](C(=O)[C@@]3([C@H](C[C@@H]4[C@]([C@H]3[C@@H]([C@@](C2(C)C)(C[C@@H]1OC(=O)[C@@H]([C@H](C5=CC=CC=C5)NC(=O)C6=CC=CC=C6)O)O)OC(=O)C7=CC=CC=C7)(CO4)OC(=O)C)O)C)OC(=O)C\",\n",
- " \"C[C@@H]1C[C@H]2CC[C@H](O2)[C@@H](C(=O)O[C@H](C[C@@H]3CC[C@@H](O3)[C@H](C(=O)O[C@@H](C[C@H]4CC[C@H](O4)[C@@H](C(=O)O[C@H](C[C@@H]5CC[C@@H](O5)[C@H](C(=O)O1)C)C)C)C)C)C)C\",\n",
- " \"CC[C@@]1(C[C@H]2C[C@@](C3=C(CCN(C2)C1)C4=CC=CC=C4N3)(C5=C(C=C6C(=C5)[C@]78CCN9[C@H]7[C@@](C=CC9)([C@H]([C@@]([C@@H]8N6C)(C(=O)OC)O)OC(=O)C)CC)OC)C(=O)OC)O\",\n",
- " \"CC(C)[C@@H](C(=O)N1CC2(CC2)C[C@H]1C3=NC=C(N3)C4=CC5=C(C=C4)C6=C(C5(F)F)C=C(C=C6)C7=CC8=C(C=C7)N=C(N8)[C@@H]9[C@H]1CC[C@H](C1)N9C(=O)[C@H](C(C)C)NC(=O)OC)NC(=O)OC\",\n",
- " \"C[C@@H]1CC[C@H]2C[C@@H](/C(=C/C=C/C=C/[C@H](C[C@H](C(=O)[C@@H]([C@@H](/C(=C/[C@H](C(=O)C[C@H](OC(=O)[C@@H]3CCCCN3C(=O)C(=O)[C@@]1(O2)O)[C@H](C)C[C@@H]4CC[C@H]([C@@H](C4)OC)OCCO)C)/C)O)OC)C)C)/C)OC\",\n",
- " \"C[C@H]1[C@H]([C@@](C[C@@H](O1)O[C@@H]2[C@H]([C@@H]([C@H](O[C@H]2OC3=C4C=C5C=C3OC6=C(C=C(C=C6)[C@H]([C@H](C(=O)N[C@H](C(=O)N[C@H]5C(=O)N[C@@H]7C8=CC(=C(C=C8)O)C9=C(C=C(C=C9O)O)[C@H](NC(=O)[C@H]([C@@H](C1=CC(=C(O4)C=C1)Cl)O)NC7=O)C(=O)O)CC(=O)N)NC(=O)[C@@H](CC(C)C)NC)O)Cl)CO)O)O)(C)N)O\"\n",
- "]\n",
- "\n",
- "import csv\n",
- "\n",
- "with open(\"./../rdmc/conformer_exps/organic_conf_gen/smiles.csv\", \"w\") as f:\n",
- " writer = csv.writer(f, delimiter=\"\\n\")\n",
- " writer.writerow(test_smiles)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python [conda env:rdmc_env] *",
- "language": "python",
- "name": "conda-env-rdmc_env-py"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/rdmc/conformer_generation/embedders.py b/rdmc/conformer_generation/embedders.py
index 211eb3c2..ff9c548c 100644
--- a/rdmc/conformer_generation/embedders.py
+++ b/rdmc/conformer_generation/embedders.py
@@ -5,27 +5,27 @@
Modules for providing initial guess geometries
"""
+from pathlib import Path
+
from rdmc import RDKitMol
-import os.path as osp
-import yaml
+
import numpy as np
from time import time
-
-try:
- import torch
- from torch_geometric.data import Batch
-except ImportError:
- pass
-
from .utils import *
+# GeoMol relevant imports
try:
import torch
- from rdmc.external.GeoMol.model.model import GeoMol
- from rdmc.external.GeoMol.model.featurization import featurize_mol_from_smiles
- from rdmc.external.GeoMol.model.inference import construct_conformers
-except ImportError:
+ from geomol.model import GeoMol
+ from geomol.featurization import featurize_mol_from_smiles, from_data_list
+ from geomol.inference import construct_conformers
+ from geomol.utils import model_path as geomol_model_path
+ import yaml # only used to load GeoMol parameters
+except ImportError as e:
+ GeoMol = None
+ print(e)
print("No GeoMol installation detected. Skipping import...")
+ print("Please install the GeoMol fork at https://github.com/xiaoruiDong/GeoMol")
class ConfGenEmbedder:
@@ -58,8 +58,10 @@ def update_mol(self, smiles: str):
# Copy the graph but remove conformers
self.mol = self.mol.Copy(quickCopy=True)
- def embed_conformers(self,
- n_conformers: int):
+ def embed_conformers(
+ self,
+ n_conformers: int
+ ):
"""
Embed conformers according to the molecule graph.
@@ -71,10 +73,11 @@ def embed_conformers(self,
"""
raise NotImplementedError
- def update_stats(self,
- n_trials: int,
- time: float = 0.
- ) -> dict:
+ def update_stats(
+ self,
+ n_trials: int,
+ time: float = 0.
+ ) -> dict:
"""
Update the statistics of the conformer generation.
@@ -88,10 +91,12 @@ def update_stats(self,
n_success = self.mol.GetNumConformers()
self.n_success = n_success
self.percent_success = n_success / n_trials * 100
- stats = {"iter": self.iter,
- "time": time,
- "n_success": self.n_success,
- "percent_success": self.percent_success}
+ stats = {
+ "iter": self.iter,
+ "time": time,
+ "n_success": self.n_success,
+ "percent_success": self.percent_success
+ }
self.stats.append(stats)
return stats
@@ -104,9 +109,11 @@ def write_mol_data(self):
"""
return mol_to_dict(self.mol, copy=False, iter=self.iter)
- def __call__(self,
- smiles: str,
- n_conformers: int):
+ def __call__(
+ self,
+ smiles: str,
+ n_conformers: int
+ ):
"""
Embed conformers according to the molecule graph.
@@ -137,28 +144,36 @@ class GeoMolEmbedder(ConfGenEmbedder):
Embed conformers using GeoMol.
Args:
- trained_model_dir (str): Directory of the trained model.
+ trained_model_dir (str, optional): Directory of the trained model. If not provided, the models distributed with the package will be used.
dataset (str, optional): Dataset used for training. Defaults to ``"drugs"``.
temp_schedule (str, optional): Temperature schedule. Defaults to ``"linear"``.
track_stats (bool, optional): Whether to track the statistics of the conformer generation. Defaults to ``False``.
"""
- def __init__(self,
- trained_model_dir: str,
- dataset: str = "drugs",
- temp_schedule: str = "linear",
- track_stats: bool = False):
+ def __init__(
+ self,
+ trained_model_dir: str = None,
+ dataset: str = "drugs",
+ temp_schedule: str = "linear",
+ track_stats: bool = False,
+ device: str = 'cpu',
+ ):
+ if GeoMol is None:
+ raise ImportError("No GeoMol installation detected. Please install the GeoMol fork at https://github.com/xiaoruiDong/GeoMol.")
super(GeoMolEmbedder, self).__init__(track_stats)
# TODO: add option of pre-pruning geometries using alpha values
- # TODO: inverstigate option of changing "temperature" each iteration to sample diverse geometries
+ # TODO: investigate option of changing "temperature" each iteration to sample diverse geometries
+ self.device = device
- with open(osp.join(trained_model_dir, "model_parameters.yml")) as f:
+ trained_model_dir = geomol_model_path / dataset if trained_model_dir is None else Path(trained_model_dir)
+ with open(trained_model_dir / "model_parameters.yml") as f:
model_parameters = yaml.full_load(f)
model = GeoMol(**model_parameters)
- state_dict = torch.load(osp.join(trained_model_dir, "best_model.pt"), map_location=torch.device('cpu'))
+ state_dict = torch.load(trained_model_dir / "best_model.pt", map_location=torch.device(device))
model.load_state_dict(state_dict, strict=True)
+ model.to(self.device)
model.eval()
self.model = model
self.tg_data = None
@@ -166,8 +181,14 @@ def __init__(self,
self.temp_schedule = temp_schedule
self.dataset = dataset
- def embed_conformers(self,
- n_conformers: int):
+ def to(self, device: str):
+ self.device = device
+ self.model.to(device)
+
+ def embed_conformers(
+ self,
+ n_conformers: int
+ ):
"""
Embed conformers according to the molecule graph.
@@ -186,11 +207,11 @@ def embed_conformers(self,
# featurize data and run GeoMol
if self.tg_data is None:
self.tg_data = featurize_mol_from_smiles(self.smiles, dataset=self.dataset)
- data = Batch.from_data_list([self.tg_data]) # need to run this bc of dumb internal GeoMol processing
+ data = from_data_list([self.tg_data]).to(self.device) # need to run this bc of dumb internal GeoMol processing
self.model(data, inference=True, n_model_confs=n_conformers)
# process predictions
- model_coords = construct_conformers(data, self.model).double().cpu().detach().numpy()
+ model_coords = construct_conformers(data, self.model, self.device).double().cpu().detach().numpy()
split_model_coords = np.split(model_coords, n_conformers, axis=1)
# package in mol and return
diff --git a/rdmc/conformer_generation/optimizers.py b/rdmc/conformer_generation/optimizers.py
index 09118d14..e90cf744 100644
--- a/rdmc/conformer_generation/optimizers.py
+++ b/rdmc/conformer_generation/optimizers.py
@@ -192,7 +192,7 @@ def optimize_conformers(self,
positions = opt_mol.GetPositions()
conf = new_mol.GetConformer(id=c_id)
conf.SetPositions(positions)
- energy = float(opt_mol.GetProp('total energy / Eh')) # * HARTREE_TO_KCAL_MOL # kcal/mol (TODO: check)
+ energy = props['total energy']
mol_data[c_id].update({"positions": positions, # issues if not all opts succeeded?
"conf": conf,
"energy": energy})
diff --git a/rdmc/external/logparser/base.py b/rdmc/external/logparser/base.py
index 017593a7..a164be3d 100644
--- a/rdmc/external/logparser/base.py
+++ b/rdmc/external/logparser/base.py
@@ -19,7 +19,7 @@
try:
from ipywidgets import interact, IntSlider, Dropdown, FloatLogSlider
except ImportError:
- pass
+ interact = None
class BaseLog(object):
@@ -270,6 +270,12 @@ def get_scf_energies(self,
# sub2 stores the energies for subsequent jobs e.g., multiple sps
if 'opt' in self.job_type or 'scan' in self.job_type:
sub1 = scf_energies[:num_opt_geoms][self.get_converged_geom_idx()]
+ elif 'irc' in self.job_type:
+ # If taking corrector steps and job failed due to corrector fails
+ # There is one more energy value compared to the number of geometries
+ sub1 = scf_energies[: len(self.cclib_results.optstatus)][
+ self.get_converged_geom_idx()
+ ]
else:
sub1 = scf_energies[self.get_converged_geom_idx()]
if 'scan' not in self.job_type:
@@ -700,6 +706,9 @@ def interact_opt(self,
Returns:
interact
"""
+ if interact is None:
+ raise ImportError('interact is not installed. Please install it by `pip install ipywidgets`.')
+
mol = self.get_mol(converged=False, sanitize=sanitize, backend=backend)
xyzs = self.get_xyzs(converged=False)
sdfs = [mol.ToMolBlock(confId=i) for i in range(mol.GetNumConformers())]
@@ -838,6 +847,9 @@ def view_freq(self,
Returns:
interact
"""
+ if interact is None:
+ raise ImportError('interact is not installed. Please install it by `pip install ipywidgets`.')
+
xyz = self.get_xyzs(converged=True)[0]
lines = xyz.splitlines()
vib_xyz_list = lines[0:2]
@@ -851,6 +863,9 @@ def interact_freq(self):
"""
Create a IPython interactive widget to investigate the frequency calculation.
"""
+ if interact is None:
+ raise ImportError('interact is not installed. Please install it by `pip install ipywidgets`.')
+
dropdown = Dropdown(
options=self.freqs,
value=self.freqs[0],
@@ -1018,6 +1033,9 @@ def interact_irc(self,
Returns:
interact
"""
+ if interact is None:
+ raise ImportError('interact is not installed. Please install it by `pip install ipywidgets`.')
+
mol = self._process_irc_mol(sanitize=sanitize, converged=converged, backend=backend, bothway=bothway)
sdfs = [mol.ToMolBlock(confId=i) for i in range(mol.GetNumConformers())]
xyzs = self.get_xyzs(converged=converged)
@@ -1188,6 +1206,9 @@ def interact_scan(self,
Returns:
interact
"""
+ if interact is None:
+ raise ImportError('interact is not installed. Please install it by `pip install ipywidgets`.')
+
mol = self._process_scan_mol(align_scan=align_scan,
align_frag_idx=align_frag_idx,
sanitize=sanitize,
diff --git a/rdmc/external/logparser/gaussian.py b/rdmc/external/logparser/gaussian.py
index 04b639c9..082ccaa2 100644
--- a/rdmc/external/logparser/gaussian.py
+++ b/rdmc/external/logparser/gaussian.py
@@ -116,7 +116,7 @@ def _update_schemes(self):
scheme_str = ''.join(line.strip('\n')[1:] for line in scheme_lines[1:])
try:
- self._schemes = scheme_to_dict(scheme_str)
+ self._schemes = scheme_to_dict(scheme_str.lower())
except Exception as e:
print(f'Calculation scheme parser encounters a problem. \nGot: {e}\n'
f'Feel free to raise an issue about this error at RDMC\'s Github Repo.')
diff --git a/rdmc/external/xtb_tools/opt.py b/rdmc/external/xtb_tools/opt.py
index e0235872..df02ae9c 100644
--- a/rdmc/external/xtb_tools/opt.py
+++ b/rdmc/external/xtb_tools/opt.py
@@ -7,7 +7,7 @@
"""
import json
-import os
+from pathlib import Path
from shutil import rmtree
import subprocess
import tempfile
@@ -25,7 +25,7 @@
TS_PATH_INP,
)
-XTB_INPUT_FILE = os.path.join(UTILS_PATH, "xtb.inp")
+XTB_INPUT_FILE = Path(UTILS_PATH) / "xtb.inp"
def read_xtb_json(json_file, mol):
@@ -48,6 +48,7 @@ def read_xtb_json(json_file, mol):
atoms = [ATOMNUM_TO_ELEM[atom.GetAtomicNum()] for atom in mol.GetAtoms()]
atomic_energy = sum([ATOM_ENERGIES_XTB[atom] for atom in atoms])
props = {
+ "total energy": data["total energy"],
"E_form": data["total energy"] - atomic_energy, # already in Hartree
"E_homo": E_homo * EV_TO_HARTREE,
"E_lumo": E_lumo * EV_TO_HARTREE,
@@ -130,19 +131,20 @@ def run_xtb_calc(mol, confId=0, job="", return_optmol=False, method="gfn2", leve
method = "--" + method
input_file = TS_PATH_INP if job == "--path" else ""
- temp_dir = os.path.abspath(save_dir) if save_dir else tempfile.mkdtemp()
- logfile = os.path.join(temp_dir, "xtb.log")
- xtb_out = os.path.join(temp_dir, "xtbout.json")
- xtb_wbo = os.path.join(temp_dir, "wbo")
- xtb_g98 = os.path.join(temp_dir, "g98.out")
- xtb_ts = os.path.join(temp_dir, "xtbpath_ts.xyz")
+ temp_dir = Path(save_dir).absolute() if save_dir else Path(tempfile.mkdtemp()).absolute()
+ logfile = temp_dir / "xtb.log"
+ xtb_out = temp_dir / "xtbout.json"
+ xtb_wbo = temp_dir / "wbo"
+ xtb_g98 = temp_dir / "g98.out"
+ xtb_ts = temp_dir / "xtbpath_ts.xyz"
- sdf_path = os.path.join(temp_dir, "mol.sdf")
- mol.ToSDFFile(sdf_path, confId=confId)
+ sdf_path = temp_dir / "mol.sdf"
+ mol.ToSDFFile(str(sdf_path), confId=confId)
+ update_rdkit_mol_format(sdf_path)
command = [
XTB_BINARY,
- sdf_path,
+ str(sdf_path),
xtb_command,
method,
level,
@@ -156,9 +158,10 @@ def run_xtb_calc(mol, confId=0, job="", return_optmol=False, method="gfn2", leve
]
if job == "--path":
- p_sdf_path = os.path.join(temp_dir, "pmol.sdf")
- pmol.ToSDFFile(p_sdf_path, confId=pconfId)
- command.insert(3, p_sdf_path)
+ p_sdf_path = temp_dir / "pmol.sdf"
+ pmol.ToSDFFile(str(p_sdf_path), confId=pconfId)
+ update_rdkit_mol_format(sdf_path)
+ command.insert(3, str(p_sdf_path))
with open(logfile, "w") as f:
xtb_run = subprocess.run(
@@ -183,7 +186,7 @@ def run_xtb_calc(mol, confId=0, job="", return_optmol=False, method="gfn2", leve
[line for line in log_data if "GEOMETRY OPTIMIZATION CONVERGED AFTER" in line][-1].split()[-3])
except IndexError:
# logfile doesn't exist for [H]
- if not os.path.exists(os.path.join(temp_dir, "xtbopt.sdf")):
+ if not (temp_dir / "xtbopt.sdf").exists():
not save_dir and rmtree(temp_dir)
raise ValueError(f"xTB calculation failed.")
else:
@@ -200,7 +203,7 @@ def run_xtb_calc(mol, confId=0, job="", return_optmol=False, method="gfn2", leve
if job == "--path":
try:
- opt_mol = RDKitMol.FromFile(os.path.join(temp_dir, "xtbpath_ts.xyz"))
+ opt_mol = RDKitMol.FromFile(str(temp_dir / "xtbpath_ts.xyz"))
except FileNotFoundError:
return (props, None) if return_optmol else props
# props.update(read_xtb_json(xtb_out, opt_mol))
@@ -208,13 +211,51 @@ def run_xtb_calc(mol, confId=0, job="", return_optmol=False, method="gfn2", leve
return (props, opt_mol) if return_optmol else props
if method == "--gff":
- opt_mol = RDKitMol.FromFile(os.path.join(temp_dir, "xtbopt.sdf"))[0]
+ opt_mol = RDKitMol.FromFile(str(temp_dir / "xtbopt.sdf"))[0]
+ try:
+ with open(temp_dir / "gfnff_lists.json", "r") as f:
+ props["total energy"] = json.load(f)["total energy"]
+ except FileNotFoundError:
+ props["total energy"] = 0.
not save_dir and rmtree(temp_dir)
return (props, opt_mol) if return_optmol else props
props.update(read_xtb_json(xtb_out, mol))
if return_optmol:
- opt_mol = RDKitMol.FromFile(os.path.join(temp_dir, "xtbopt.sdf"))[0]
+ opt_mol = RDKitMol.FromFile(str(temp_dir / "xtbopt.sdf"))[0]
props.update({"wbo": get_wbo(xtb_wbo)})
not save_dir and rmtree(temp_dir)
return (props, opt_mol) if return_optmol else props
+
+
+def update_rdkit_mol_format(path):
+ """
+ After xTB changes its parser backend to mctc-lib, it stops being able to read Mol/SDF
+ files generated from RDKit. This is due to, in the bond property section, mctc-lib
+ looks for 7 elements while RDKit only generates 4. As xTB doesn't really need to know
+ the extra information, we can simply assign them to 0s. This patch function helps
+ append the missing 0s.
+ """
+
+ with open(path, "r") as f:
+ lines = f.readlines()
+
+ n_atoms = int(lines[3].split()[0])
+ n_bonds = int(lines[3].split()[1])
+
+ # Check if the file needs to be fixed, only check once
+ n_bond_props = len(lines[4 + n_atoms].split())
+ if n_bond_props > 7:
+ raise ValueError("This SDF/Mol file is abnormal, please double check your file")
+ elif n_bond_props == 7: # No need to fix
+ return
+ else:
+ n_0s = 7 - n_bond_props
+
+ new_lines = lines[:4 + n_atoms] + [
+ line[:-1] + " 0" * n_0s + "\n"
+ for line in lines[4 + n_atoms: 4 + n_atoms + n_bonds]
+ ] + lines[4 + n_atoms + n_bonds:]
+
+ with open(path, "w") as f:
+ f.writelines(new_lines)
diff --git a/rdmc/view.py b/rdmc/view.py
index 944bb2b0..63829e02 100644
--- a/rdmc/view.py
+++ b/rdmc/view.py
@@ -11,7 +11,10 @@
from rdmc.mol import RDKitMol
from rdmc.ts import clean_ts
-from ipywidgets import interact, IntSlider
+try:
+ from ipywidgets import interact, IntSlider
+except ImportError:
+ interact = None
def mol_viewer(obj: str,
@@ -162,6 +165,9 @@ def interactive_conformer_viewer(mol, **kwargs):
Returns:
py3Dmol.view: The molecule viewer with slider to view different conformers.
"""
+ if not interact:
+ raise ImportError("This function requires ipywidgets to be installed. You can install it by pip or conda")
+
if isinstance(mol, list) or isinstance(mol, tuple):
def viewer(confId):
return mol_viewer(obj=mol[confId], confId=0, **kwargs)