From 26d3a76d8a340f95d4935b1e6bd15ba7fc5db586 Mon Sep 17 00:00:00 2001 From: Nazanin Donyapour Date: Fri, 15 Sep 2023 15:09:11 -0400 Subject: [PATCH] vacuum minimization --- cwl_adapters/combine_structure.cwl | 16 ++++++- examples/config_ci.json | 6 ++- examples/docking/docking.yml | 1 + examples/docking/docking_stability.yml | 24 +++++++++- examples/docking/dsb.yml | 46 +++++++++++-------- examples/docking/gen_topol_params.yml | 1 + examples/docking/vs_demo_2.yml | 13 ++++-- examples/docking/vs_demo_3.yml | 13 ++++-- examples/docking/vs_demo_4.yml | 13 ++++-- examples/gromacs/basic.yml | 6 +++ examples/gromacs/cg.yml | 7 +++ examples/gromacs/min.yml | 10 ++++ examples/gromacs/setup.yml | 7 --- examples/gromacs/setup_vac_min.yml | 33 ++++++++++++- examples/gromacs/solv_ion.yml | 21 ++++++--- examples/gromacs/stability.yml | 3 +- examples/gromacs/steep.yml | 7 +++ examples/gromacs/topology.yml | 9 +++- examples/gromacs/tutorial.yml | 2 +- examples/scripts/Dockerfile_combine_structure | 2 +- examples/scripts/combine_structure.py | 41 ++++++++++++++--- 21 files changed, 221 insertions(+), 60 deletions(-) diff --git a/cwl_adapters/combine_structure.cwl b/cwl_adapters/combine_structure.cwl index 4a8f6d50..98fd7779 100644 --- a/cwl_adapters/combine_structure.cwl +++ b/cwl_adapters/combine_structure.cwl @@ -12,7 +12,7 @@ baseCommand: ['python', '/combine_structure.py'] hints: DockerRequirement: - dockerPull: ndonyapour/combine_structure + dockerPull: ndonyapour/combine_structure_zip inputs: input_structure1: @@ -40,6 +40,20 @@ inputs: - edam:format_3877 inputBinding: prefix: --input_structure2 + + input_top_zip_path: + label: Input zip file + doc: |- + Input zip file + Type: string + File type: input + Accepted formats: zip + Example file: https://github.com/bioexcel/biobb_md/blob/master/biobb_md/test/data/gromacs/genion.zip + type: File + format: + - edam:format_3987 + inputBinding: + prefix: --input_top_zip_path output_structure_path: label: Output combined PDB file path diff --git a/examples/config_ci.json b/examples/config_ci.json index 0a30d5e6..871c02d4 100644 --- a/examples/config_ci.json +++ b/examples/config_ci.json @@ -10,7 +10,11 @@ "vs_demo_2_weekly", "vs_demo_3_weekly", "vs_demo_4_weekly", - "docking_rescoring_weekly" + "docking_rescoring_weekly", + "stability", + "docking_stability", + "nmr", + "tutorial" ], // NOTE: Most of the workflows in this list have free variables because they are subworkflows // i.e. if you try to run them, you will get "Missing required input parameter" diff --git a/examples/docking/docking.yml b/examples/docking/docking.yml index a59bbd0f..380fe96d 100644 --- a/examples/docking/docking.yml +++ b/examples/docking/docking.yml @@ -90,6 +90,7 @@ steps: in: input_structure1: '*receptor.xyz' # '*receptor_hydrogens.pdb' input_structure2: '*pose_ligand.xyz' + input_top_zip_path: '*complex_vac.zip' output_structure_path: '&complex_vac.pdb' wic: diff --git a/examples/docking/docking_stability.yml b/examples/docking/docking_stability.yml index 17d89ec5..2f9eb70e 100644 --- a/examples/docking/docking_stability.yml +++ b/examples/docking/docking_stability.yml @@ -29,14 +29,31 @@ wic: inlineable: True (2, stability.yml): wic: - inlineable: True + inlineable: False steps: + (1, setup_vac_min.yml): + wic: + steps: + (3, solv_ion.yml): + wic: + steps: + (4, genion): + in: + output_top_zip_path: '&genion_complex.zip' (2, basic.yml): + in: + top_zip_path: '*genion_complex.zip' wic: steps: (3, prod.yml): wic: steps: + (2, grompp): + in: + input_top_zip_path: '*genion_complex.zip' + (3, mdrun): + in: + output_crd_path: '&prod_complex.gro' (4, cwl_watcher_analysis.yml): wic: backend: complex @@ -45,4 +62,7 @@ wic: steps: (1, analysis_realtime.yml): wic: - backend: complex \ No newline at end of file + backend: complex + (2, analysis_final.yml): + wic: + backend: complex \ No newline at end of file diff --git a/examples/docking/dsb.yml b/examples/docking/dsb.yml index cb59535f..d98d45e8 100644 --- a/examples/docking/dsb.yml +++ b/examples/docking/dsb.yml @@ -9,23 +9,17 @@ steps: sdf_path: ~sdf_path - stability.yml: in: - # NOTE: crd_path and top_zip_path are currently only necessary - # due to a limitation of the inlineing implementation. - # Moreover, crd_path and top_zip_path are overridden below crd_path: '*complex_vac.pdb' top_zip_path: '*complex_vac.zip' - nsteps: 100000 + nsteps: 10000 dt: 0.002 temperature: 298.0 pressure: 1.0 - stability.yml: in: - # NOTE: crd_path and top_zip_path are currently only necessary - # due to a limitation of the inlineing implementation. - # Moreover, crd_path and top_zip_path are overridden below - crd_path: '*complex_vac.pdb' - top_zip_path: '*complex_vac.zip' - nsteps: 100000 + crd_path: '*ligand_GMX.gro' + top_zip_path: '*ligand_GMX.zip' + nsteps: 10000 dt: 0.002 temperature: 298.0 pressure: 1.0 @@ -41,13 +35,18 @@ wic: environment: action: checkpoint steps: - (1, setup.yml): + (1, setup_vac_min.yml): wic: steps: - (4, genion): - in: - output_top_zip_path: '&genion_complex.zip' + (3, solv_ion.yml): + wic: + steps: + (4, genion): + in: + output_top_zip_path: '&genion_complex.zip' (2, basic.yml): + in: + top_zip_path: '*genion_complex.zip' wic: steps: (3, prod.yml): @@ -82,13 +81,24 @@ wic: action: restore save_defs: ['genion_complex.zip', 'prod_complex.gro'] steps: - (1, setup.yml): + (1, setup_vac_min.yml): wic: steps: - (4, genion): - in: - output_top_zip_path: '&genion_ligand.zip' + (1, topology.yml): + wic: + steps: + (1, editconf): + in: + output_crd_path: '&ligand_box.g96' + (3, solv_ion.yml): + wic: + steps: + (4, genion): + in: + output_top_zip_path: '&genion_ligand.zip' (2, basic.yml): + in: + top_zip_path: '*genion_ligand.zip' wic: steps: (3, prod.yml): diff --git a/examples/docking/gen_topol_params.yml b/examples/docking/gen_topol_params.yml index 57c1341b..15859d37 100644 --- a/examples/docking/gen_topol_params.yml +++ b/examples/docking/gen_topol_params.yml @@ -70,6 +70,7 @@ steps: in: input_structure1: ~input_receptor_xyz_path input_structure2: '*pose_ligand.xyz' + input_top_zip_path: '*complex_vac.zip' output_structure_path: '&complex_vac.pdb' wic: diff --git a/examples/docking/vs_demo_2.yml b/examples/docking/vs_demo_2.yml index 611148a3..7c462650 100644 --- a/examples/docking/vs_demo_2.yml +++ b/examples/docking/vs_demo_2.yml @@ -146,13 +146,18 @@ wic: wic: inlineable: False steps: - (1, setup.yml): + (1, setup_vac_min.yml): wic: steps: - (4, genion): - in: - output_top_zip_path: '&genion_complex.zip' + (3, solv_ion.yml): + wic: + steps: + (4, genion): + in: + output_top_zip_path: '&genion_complex.zip' (2, basic.yml): + in: + top_zip_path: '*genion_complex.zip' wic: steps: (3, prod.yml): diff --git a/examples/docking/vs_demo_3.yml b/examples/docking/vs_demo_3.yml index 3e65c515..b9e5deb1 100644 --- a/examples/docking/vs_demo_3.yml +++ b/examples/docking/vs_demo_3.yml @@ -124,13 +124,18 @@ wic: wic: inlineable: False steps: - (1, setup.yml): + (1, setup_vac_min.yml): wic: steps: - (4, genion): - in: - output_top_zip_path: '&genion_complex.zip' + (3, solv_ion.yml): + wic: + steps: + (4, genion): + in: + output_top_zip_path: '&genion_complex.zip' (2, basic.yml): + in: + top_zip_path: '*genion_complex.zip' wic: steps: (3, prod.yml): diff --git a/examples/docking/vs_demo_4.yml b/examples/docking/vs_demo_4.yml index 7bc8716a..28576bfe 100644 --- a/examples/docking/vs_demo_4.yml +++ b/examples/docking/vs_demo_4.yml @@ -191,13 +191,18 @@ wic: wic: inlineable: False steps: - (1, setup.yml): + (1, setup_vac_min.yml): wic: steps: - (4, genion): - in: - output_top_zip_path: '&genion_complex.zip' + (3, solv_ion.yml): + wic: + steps: + (4, genion): + in: + output_top_zip_path: '&genion_complex.zip' (2, basic.yml): + in: + top_zip_path: '*genion_complex.zip' wic: steps: (3, prod.yml): diff --git a/examples/gromacs/basic.yml b/examples/gromacs/basic.yml index 3cec70df..46294795 100644 --- a/examples/gromacs/basic.yml +++ b/examples/gromacs/basic.yml @@ -1,4 +1,8 @@ inputs: + top_zip_path: + type: File + format: + - edam:format_3987 nsteps: type: int dt: @@ -16,6 +20,8 @@ steps: # to this level and use the following syntax to pass it to only one call site. # (Moreover, the former will create multiple definitions of &min.tpr, which is not allowed.) - min.yml: + in: + top_zip_path: ~top_zip_path - equil.yml: - prod.yml: in: diff --git a/examples/gromacs/cg.yml b/examples/gromacs/cg.yml index 5e82877d..8077fb68 100644 --- a/examples/gromacs/cg.yml +++ b/examples/gromacs/cg.yml @@ -1,6 +1,13 @@ +inputs: + top_zip_path: + type: File + format: + - edam:format_3987 + steps: - grompp: in: + input_top_zip_path: ~top_zip_path config: mdp: integrator: cg diff --git a/examples/gromacs/min.yml b/examples/gromacs/min.yml index 713e15f5..d917c899 100644 --- a/examples/gromacs/min.yml +++ b/examples/gromacs/min.yml @@ -1,9 +1,19 @@ +inputs: + top_zip_path: + type: File + format: + - edam:format_3987 + steps: - steep.yml: + in: + top_zip_path: ~top_zip_path # Fatal error: # The coordinates could not be constrained. Minimizer 'cg' can not handle # constraint failures, use minimizer 'steep' before using 'cg'. - cg.yml: + in: + top_zip_path: ~top_zip_path # Fatal error: # The combination of constraints and L-BFGS minimization is not implemented. # Either do not use constraints, or use another minimizer (e.g. steepest diff --git a/examples/gromacs/setup.yml b/examples/gromacs/setup.yml index 3738feb0..ff5c59a8 100644 --- a/examples/gromacs/setup.yml +++ b/examples/gromacs/setup.yml @@ -10,13 +10,6 @@ inputs: - edam:format_3987 steps: -# - editconf: -# in: -# input_crd_path: ~crd_path -# #input_crd_path: '*complex_vac.pdb' -# config: -# box_type: cubic -# distance_to_molecule: 1.2 - gmx_editconf: in: input_crd_path: ~crd_path diff --git a/examples/gromacs/setup_vac_min.yml b/examples/gromacs/setup_vac_min.yml index 1be9c40f..6226f91e 100644 --- a/examples/gromacs/setup_vac_min.yml +++ b/examples/gromacs/setup_vac_min.yml @@ -1,8 +1,37 @@ +inputs: + crd_path: + type: File + format: + - edam:format_1476 + - edam:format_2033 + top_zip_path: + type: File + format: + - edam:format_3987 + steps: - topology.yml: + in: + crd_path: ~crd_path - min.yml: # Minimize in vacuum first, then in solvent. + in: + top_zip_path: ~top_zip_path - solv_ion.yml: - + in: + top_zip_path: ~top_zip_path wic: graphviz: - label: System Setup \ No newline at end of file + label: System Setup + steps: + (1, topology.yml): + wic: + graphviz: + label: 'Initialize\nPeriodic Box' + (2, min.yml): + wic: + graphviz: + label: 'Vacuum Minimization' + (3, solv_ion.yml): + wic: + graphviz: + label: 'Add Water\nSolvent' diff --git a/examples/gromacs/solv_ion.yml b/examples/gromacs/solv_ion.yml index f987b25a..364193a9 100644 --- a/examples/gromacs/solv_ion.yml +++ b/examples/gromacs/solv_ion.yml @@ -1,12 +1,19 @@ +inputs: + top_zip_path: + type: File + format: + - edam:format_3987 + steps: - - editconf: + - gmx_editconf: in: - config: - box_type: cubic - distance_to_molecule: 1.0 + #input_crd_path: ~crd_path + align_principal_axes: 0 + box_type: cubic + distance_to_molecule: 1.2 - solvate: in: - input_top_zip_path: '*complex_vac.zip' + input_top_zip_path: ~top_zip_path output_crd_path: '&solvate.gro' output_top_zip_path: '&solvate.zip' - grompp: @@ -28,10 +35,10 @@ wic: graphviz: label: Solvation & Ionization ranksame: - - (1, editconf) + - (1, gmx_editconf) - (4, genion) steps: - (1, editconf): + (1, gmx_editconf): wic: graphviz: label: 'Initialize\nPeriodic Box' diff --git a/examples/gromacs/stability.yml b/examples/gromacs/stability.yml index b0583585..3f4b3a8c 100644 --- a/examples/gromacs/stability.yml +++ b/examples/gromacs/stability.yml @@ -18,12 +18,13 @@ inputs: - edam:format_3987 steps: - - setup.yml: + - setup_vac_min.yml: in: crd_path: ~crd_path top_zip_path: ~top_zip_path - basic.yml: in: + top_zip_path: '*genion.zip' nsteps: ~nsteps dt: ~dt ref-t: ~temperature diff --git a/examples/gromacs/steep.yml b/examples/gromacs/steep.yml index 11e9cb84..1537e1ee 100644 --- a/examples/gromacs/steep.yml +++ b/examples/gromacs/steep.yml @@ -1,3 +1,9 @@ +inputs: + top_zip_path: + type: File + format: + - edam:format_3987 + # On my machine, 10 iterations of steepest descent is sufficient to prevent # conjugate gradient from crashing. (i.e. pytest works for me!) # But using only 10 iterations is causing the tests on github actions to fail! @@ -5,6 +11,7 @@ steps: - grompp: in: + input_top_zip_path: ~top_zip_path config: mdp: integrator: steep diff --git a/examples/gromacs/topology.yml b/examples/gromacs/topology.yml index 4b4c3843..fe686c7a 100644 --- a/examples/gromacs/topology.yml +++ b/examples/gromacs/topology.yml @@ -1,10 +1,17 @@ +inputs: + crd_path: + type: File + format: + - edam:format_1476 + - edam:format_2033 + steps: # We want to perform vacuum minimization first, before solvation and ionization. # NOTE: Gromacs no longer supports non-periodic boundary conditions (!!!), # so we must emulate a vacuum by using a very large (1000 Angstrom) box. - editconf: in: - input_crd_path: '*complex_vac.pdb' + input_crd_path: ~crd_path output_crd_path: '&complex_box.g96' config: box_type: cubic diff --git a/examples/gromacs/tutorial.yml b/examples/gromacs/tutorial.yml index c3556ad9..23ad4fd4 100644 --- a/examples/gromacs/tutorial.yml +++ b/examples/gromacs/tutorial.yml @@ -6,7 +6,7 @@ steps: in: crd_path: '*complex_vac.pdb' top_zip_path: '*complex_vac.zip' - nsteps: 100000 + nsteps: 10000 dt: 0.002 temperature: 298.0 pressure: 1.0 diff --git a/examples/scripts/Dockerfile_combine_structure b/examples/scripts/Dockerfile_combine_structure index b8c746de..82258215 100644 --- a/examples/scripts/Dockerfile_combine_structure +++ b/examples/scripts/Dockerfile_combine_structure @@ -1,7 +1,7 @@ FROM condaforge/mambaforge # NOT mambaforge-pypy3 (rdkit is incompatible with pypy) -RUN mamba install -c conda-forge rdkit +RUN mamba install -c conda-forge rdkit mdanalysis gromacs ADD combine_structure.py . ADD Dockerfile_combine_structure . \ No newline at end of file diff --git a/examples/scripts/combine_structure.py b/examples/scripts/combine_structure.py index f16ed703..516f0f18 100644 --- a/examples/scripts/combine_structure.py +++ b/examples/scripts/combine_structure.py @@ -1,9 +1,15 @@ import sys import os +import zipfile import argparse from typing import Optional from rdkit import Chem +import MDAnalysis as mda + + +GROMACS_INCLUDE_PATH = os.path.realpath('/opt/conda/share/gromacs/top') +TEMP_PATH = os.path.realpath('./') def parse_arguments() -> argparse.Namespace: @@ -15,6 +21,7 @@ def parse_arguments() -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument('--input_structure1', required=True) parser.add_argument('--input_structure2', required=True) + parser.add_argument('--input_top_zip_path', required=True) parser.add_argument('--output_structure_path', required=True) args = parser.parse_args() return args @@ -38,22 +45,43 @@ def read_xyz_rdkit(input_structure_path: str) -> Optional[Chem.rdchem.Mol]: # p return xyz -def combine_structure_rdkit(input_structure1_path: str, input_structure2_path: str, output_structure_path: str) -> None: - """ Combine two structures into a single PDB file using RDKit +def fix_atom_names(input_top_zip_path: str, output_structure_path: str) -> None: + """ Modifies the atom names based on the topology file Args: - input_structure1_path (str): The path to the xyz structure 1 - input_structure2_path (str): The path to the xyz structure 2 + input_top_zip_path (str): The path to the input zip file output_structure_path (str): The path to the output combined structure """ + with zipfile.ZipFile(input_top_zip_path, "r") as zip_read: + zip_read.extractall(path=TEMP_PATH) + + file_list = [str(os.path.join(TEMP_PATH, f)) for f in zip_read.namelist()] + top_file = next(name for name in file_list if name.endswith(".top")) + + u1 = mda.Universe(top_file, topology_format='ITP', include_dir=GROMACS_INCLUDE_PATH) + u2 = mda.Universe(os.path.join(TEMP_PATH, 'temp_out.pdb')) + + u2.atoms.names = u1.atoms.names + all_atoms = u2.select_atoms("all") + all_atoms.write(output_structure_path) + + +def combine_structure_rdkit(input_structure1_path: str, input_structure2_path: str) -> None: + """ Combine two structures into a single PDB file using RDKit and saves it in the temporary PDB file + + Args: + input_structure1_path (str): The path to the xyz structure 1 + input_structure2_path (str): The path to the xyz structure 2 + """ + structure1 = read_xyz_rdkit(input_structure1_path) structure2 = read_xyz_rdkit(input_structure2_path) if structure1 and structure2: combo = Chem.CombineMols(structure1, structure2) # pylint: disable=no-member - with Chem.PDBWriter(output_structure_path) as writer: # pylint: disable=no-member + with Chem.PDBWriter(os.path.join(TEMP_PATH, 'temp_out.pdb')) as writer: writer.write(combo) @@ -70,7 +98,8 @@ def main() -> None: print(f'Error: Can not find file {args.input_structure2}') sys.exit(1) - combine_structure_rdkit(args.input_structure1, args.input_structure2, args.output_structure_path) + combine_structure_rdkit(args.input_structure1, args.input_structure2) + fix_atom_names(args.input_top_zip_path, args.output_structure_path) if __name__ == '__main__':