Skip to content

Commit

Permalink
Merge pull request #96 from xiaoruiDong/v1.0/cg_refactor
Browse files Browse the repository at this point in the history
Update Notebooks Part 1
  • Loading branch information
xiaoruiDong authored Mar 29, 2024
2 parents d240de4 + 1632332 commit a1db373
Show file tree
Hide file tree
Showing 14 changed files with 4,189 additions and 1,018 deletions.
95 changes: 79 additions & 16 deletions ipython/Conformer Generation Workflow.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"Leverage ETKDG and GeoMol as 3D geometry embedder for stochastic conformer generation\n",
"\n",
"The idea is to have modular methods for each step, which are currently hardcoded. This includes:\n",
"- initial conformer embedding (ETKDG, GeoMol)\n",
"- optimization/energy (MMFF, UFF, GFN-FF, GFN2-xTB)\n",
"- initial conformer embedding (ETKDG, GeoMol, Torsional-Diffusion)\n",
"- optimization/energy (MMFF, UFF, GFN-FF, GFN2-xTB, other semi-empirical in QM software)\n",
"- pruning (torsion fingerprints, CREGEN)\n",
"- convergence metrics (conformational entropy/partition function)"
]
Expand All @@ -22,14 +22,16 @@
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"from rdmc.conformer_generation.embedders import *\n",
"from rdmc.conformer_generation.optimizers import *\n",
"from rdmc.conformer_generation.pruners import *\n",
"from rdmc.conformer_generation.metrics import *\n",
"from rdmc.conformer_generation.generators import StochasticConformerGenerator\n",
"from rdmc.conformer_generation.utils import dict_to_mol\n",
"\n",
"from rdmc import RDKitMol\n",
"from rdmc import Mol\n",
"from rdtools.view import mol_viewer, interactive_conformer_viewer, conformer_viewer\n",
"\n",
"T = 298 # K\n",
Expand Down Expand Up @@ -66,7 +68,7 @@
"smi = \"[C:1]([C@@:2]([O:3][H:12])([C:4]([N:5]([C:6](=[O:7])[H:16])[H:15])([H:13])[H:14])[H:11])([H:8])([H:9])[H:10]\" # example 1\n",
"smi = \"CN1C2=C(C=C(C=C2)Cl)C(=NCC1=O)C3=CC=CC=C3\" # example 2\n",
"\n",
"mol_viewer(RDKitMol.FromSmiles(smi))"
"mol_viewer(Mol.FromSmiles(smi))"
]
},
{
Expand All @@ -87,8 +89,7 @@
"n_confs = 10 # Number of conformers to create\n",
"\n",
"embedder = ETKDGEmbedder() # Initialize conformer embedder\n",
"unique_mol_data = embedder(smi, n_confs) # Embed molecule 3D geometries with ETKDG\n",
"mol = dict_to_mol(unique_mol_data) # Convert raw data to a molecule object"
"mol = embedder(smi, n_confs) # Embed molecule 3D geometries with ETKDG"
]
},
{
Expand Down Expand Up @@ -140,9 +141,8 @@
"dataset = \"drugs\"\n",
"device = \"cuda\"\n",
"\n",
"embedder = GeoMolEmbedder(dataset=dataset, track_stats=True, temp_schedule=\"none\", device=device) # Initialize conformer embedder\n",
"unique_mol_data = embedder(smi, n_confs) # Embed molecule 3D geometries with ETKDG\n",
"mol = dict_to_mol(unique_mol_data) # Convert raw data to a molecule object"
"embedder = GeoMolEmbedder(dataset=dataset, track_stats=True, temp_schedule=\"none\", device=device)\n",
"mol = embedder(smi, n_confs)"
]
},
{
Expand All @@ -157,6 +157,57 @@
"mol_viewer(mol, conf_id=visualize_conf_id) # visualize the molecule"
]
},
{
"cell_type": "markdown",
"id": "cf852fbd",
"metadata": {},
"source": [
"### 1.3 Torsional-Diffusion Embedder\n"
]
},
{
"cell_type": "markdown",
"id": "a00f0f28",
"metadata": {},
"source": [
"You can skip this block if you don't have Torsional-Diffusion installed. To install it,\n",
"\n",
"Clone Xiaorui's fork\n",
"```\n",
"git clone https://github.com/xiaoruiDong/torsional-diffusion\n",
"```\n",
"and then follow the instruction for installing the environment and downloading the trained models"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38d85e46",
"metadata": {},
"outputs": [],
"source": [
"n_confs = 10 # Number of conformers to create\n",
"\n",
"embedder = TorsionalDiffusionEmbedder(\n",
" repo_dir= Path.home() / \"Apps/torsional-diffusion\", # path to cloned repo\n",
" model_dir= Path.home() / \"Apps/torsional-diffusion/workdir/drugs_default\", # path to downloaded model\n",
")\n",
"\n",
"mol = embedder(smi, n_confs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fd37bd33",
"metadata": {},
"outputs": [],
"source": [
"visualize_conf_id = 2\n",
"\n",
"mol_viewer(mol, conf_id=visualize_conf_id) # visualize the molecule"
]
},
{
"cell_type": "markdown",
"id": "46be5c44",
Expand Down Expand Up @@ -195,6 +246,9 @@
"optimizer = MMFFOptimizer()\n",
"# if you installed XTB, you can uncomment the following line\n",
"# optimizer = XTBOptimizer()\n",
"# if you have Gaussian, Orca, or QChem, you can try the following optimizers\n",
"# but they can be much slower when using more expansive methods\n",
"# optimizer = GaussianOptimizer(nprocs=16, memory=32, method=\"PM7\")\n",
"\n",
"# Pruner\n",
"pruner = TorsionPruner(max_chk_threshold=30)\n",
Expand All @@ -220,7 +274,7 @@
"source": [
"smi = \"CN1C2=C(C=C(C=C2)Cl)C(=NCC1=O)C3=CC=CC=C3\"\n",
"\n",
"mol_viewer(RDKitMol.FromSmiles(smi))"
"mol_viewer(Mol.FromSmiles(smi))"
]
},
{
Expand All @@ -232,9 +286,9 @@
},
"outputs": [],
"source": [
"n_conformers_per_iter = 100\n",
"min_iters = 2\n",
"max_iters = 5\n",
"n_conformers_per_iter = 10\n",
"min_iters = 1\n",
"max_iters = 3\n",
"\n",
"scg = StochasticConformerGenerator(\n",
" smiles=smi,\n",
Expand All @@ -250,16 +304,25 @@
"print(\n",
" f\"Number of conformers: {len(unique_mol_data)}\\n\"\n",
" f\"Metric: {scg.metric.metric_history[-1]:.3e}\"\n",
")"
")\n",
"mol = dict_to_mol(unique_mol_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5aee01ec",
"id": "d9e4925b",
"metadata": {},
"outputs": [],
"source": []
"source": [
"from rdkit import Chem\n",
"\n",
"Chem.rdMolAlign.AlignMolConformers(\n",
" mol, atomIds=[atom.GetIdx() for atom in mol.GetHeavyAtoms()]\n",
") # Align heavy atoms\n",
"\n",
"interactive_conformer_viewer(mol)"
]
}
],
"metadata": {
Expand Down
Loading

0 comments on commit a1db373

Please sign in to comment.