[Enhancement][1:1 docking] Add CUDA stream based acceleration for 1:1…

… dock - Adds argument paired_batch_size for specifying fast 1:1 pairing - Adds custom handlers for paired_batch_size - Defines JSON schema for specifying 1:1 combinations and config - Adds use_fast_math support in cmake build (default enabled) - Fix memory leaks in Autodock Vina scoring_function reported by Valgrind - Fix missing CUDA memory initialisation that causes crashes The original code flow is retained when paired_batch_size is not specified. Please refer the README for using the paired_batch_size argument.
dptech-corp · Jan 9, 2024 · 3bbf2e0 · 3bbf2e0
1 parent 42da4fa
commit 3bbf2e0
Show file tree

Hide file tree

Showing 13 changed files with 1,616 additions and 43 deletions.
diff --git a/unidock/CMakeLists.txt b/unidock/CMakeLists.txt
@@ -26,6 +26,13 @@ if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
 	)
 endif()
 
+# Add fast math
+option(BUILD_FAST_MATH "Build in fast math mode" ON)
+if(BUILD_FAST_MATH)
+	set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} --use_fast_math")
+	message("CMAKE_CUDA_FLAGS_RELEASE updated for fast_math: ${CMAKE_CUDA_FLAGS_RELEASE}")
+endif()
+
 set(VINA_BIN_NAME unidock)
 add_compile_definitions(ENABLE_CUDA)
 add_compile_definitions(VERSION="v${PROJECT_VERSION}")
@@ -61,4 +68,4 @@ add_custom_target(
   COMMAND ${CLANG_FORMAT} -i ${sources}
   COMMENT "Running clang-format"
   VERBATIM
-)
+)
diff --git a/unidock/README.md b/unidock/README.md
@@ -235,3 +235,28 @@ DOI 10.1002/jcc.21334
 3. Uni-Dock computes slowly for few (<10) ligands.
 
      The optimal application of Uni-Dock occurs in scenarios where one binding pocket interacts with numerous (in an order of 1000) ligands. As the number of ligands within a single computational batch increases, the average processing speed improves. In instances where only a few ligands are present for one binding pocket, the overhead proportion becomes considerably large, leading to slower computational performance.
+     
+### Addendum to "FAQ 3 - Uni-Dock computes slowly for few (<10) ligands.":
+
+The `paired_batch` mode provides a mechanism to accelerate simultaneous 1:1 docking in batches using Vina scoring, using CUDA streams. To run docking using this mode, invoke unidock with the parameter `--paired_batch_size` value >0, with the protein:ligand configurations passed in JSON form using `--ligand_index`. The JSON file should use schema defined in paired_batching.schema.json.
+
+A sample input data.json is as below, complying to the schema:
+```
+{
+    "7LCU": {
+        "protein": "molecular_docking/PoseBuster/7LCU/7LCU_receptor.pdbqt",
+        "ligand": "molecular_docking/PoseBuster/7LCU/7LCU_ligand_prep.sdf",
+        "ligand_config": "molecular_docking/PoseBuster/7LCU/docking_grid.json"
+    },
+    "7KZ9": {
+        "protein": "molecular_docking/PoseBuster/7KZ9/7KZ9_receptor.pdbqt",
+        "ligand": "molecular_docking/PoseBuster/7KZ9/7KZ9_ligand_prep.sdf",
+        "ligand_config": "molecular_docking/PoseBuster/7KZ9/docking_grid.json"
+    }
+}
+```
+
+A typical usage using paired_batch mode is as shown below, with batch size of 10.
+
+`build/unidock --paired_batch_size 10 --ligand_index data_pb1.json --size_x 25 --size_y 25 --size_z 25 --dir test/prof_25_1024_80 --exhaustiveness 1024 --max_step 80 --seed 5`
+
diff --git a/unidock/src/cuda/monte_carlo.cu b/unidock/src/cuda/monte_carlo.cu
diff --git a/unidock/src/lib/monte_carlo.h b/unidock/src/lib/monte_carlo.h
@@ -63,6 +63,12 @@ struct monte_carlo {
                     const igrid& ig, const vec& corner1, const vec& corner2, rng& generator,
                     int verbosity, unsigned long long seed,
                     std::vector<std::vector<bias_element> >& bias_batch_list) const;
+    void mc_stream(std::vector<model>& m, std::vector<output_container>& out,
+                    std::vector<precalculate_byatom>& p, triangular_matrix_cuda_t* m_data_list_gpu,
+                    const igrid& ig, const vec& corner1, const vec& corner2, rng& generator,
+                    int verbosity, unsigned long long seed,
+                    std::vector<std::vector<bias_element> >& bias_batch_list) const;
+
     std::vector<output_type> cuda_to_vina(output_type_cuda_t* results_p, int thread) const;
 };
 

diff --git a/unidock/src/lib/scoring_function.h b/unidock/src/lib/scoring_function.h
@@ -37,7 +37,11 @@ enum scoring_function_choice { SF_VINA, SF_AD42, SF_VINARDO };
 
 class ScoringFunction {
 public:
-    ScoringFunction() {}
+    ScoringFunction() {
+        m_num_potentials = 0;
+        m_num_conf_independents = 0;
+        m_sf_choice = 0;
+    }
     ScoringFunction(const scoring_function_choice sf_choice, const flv& weights) {
         switch (sf_choice) {
             case SF_VINA: {
@@ -81,7 +85,6 @@ class ScoringFunction {
                 break;
             }
             default: {
-                std::cout << "INSIDE everything::everything()   sfchoice = " << sf_choice << "\n";
                 VINA_CHECK(false);
                 break;
             }
@@ -90,7 +93,24 @@ class ScoringFunction {
         m_num_conf_independents = m_conf_independents.size();
         m_weights = weights;
     };
-    ~ScoringFunction() {}
+    void Destroy()
+    {
+        for (auto p : m_potentials)
+        {
+            delete p;
+        }
+        m_potentials.clear();
+        m_num_potentials = 0;
+        for (auto p : m_conf_independents)
+        {
+            delete p;
+        }
+        m_conf_independents.clear();
+        m_num_conf_independents = 0;
+    }
+    ~ScoringFunction() {
+        Destroy();
+    }
     fl eval(atom& a, atom& b, fl r) const {  // intentionally not checking for cutoff
         fl acc = 0;
         VINA_FOR(i, m_num_potentials) { acc += m_weights[i] * m_potentials[i]->eval(a, b, r); }

diff --git a/unidock/src/lib/vina.cpp b/unidock/src/lib/vina.cpp
@@ -85,10 +85,10 @@ void Vina::set_receptor(const std::string& rigid_name, const std::string& flex_n
     // CONDITIONS 4, 5, 6, 7 (rigid_name and flex_name are empty strings per default)
     if (rigid_name.find("pdbqt") || flex_name.find("pdbqt")) {
         m_receptor
-            = parse_receptor_pdbqt(rigid_name, flex_name, m_scoring_function.get_atom_typing());
+            = parse_receptor_pdbqt(rigid_name, flex_name, m_scoring_function->get_atom_typing());
     } else if (rigid_name.find("pdb") && (!rigid_name.find("pdbqt"))) {
         m_receptor
-            = parse_receptor_pdb(rigid_name, flex_name, m_scoring_function.get_atom_typing());
+            = parse_receptor_pdb(rigid_name, flex_name, m_scoring_function->get_atom_typing());
     }
 
     m_model = m_receptor;
@@ -106,7 +106,7 @@ void Vina::set_ligand_from_string(const std::string& ligand_string) {
         exit(EXIT_FAILURE);
     }
 
-    atom_type::t atom_typing = m_scoring_function.get_atom_typing();
+    atom_type::t atom_typing = m_scoring_function->get_atom_typing();
 
     if (!m_receptor_initialized) {
         // This situation will happen if we don't need a receptor and we are using affinity maps
@@ -122,7 +122,7 @@ void Vina::set_ligand_from_string(const std::string& ligand_string) {
     m_model.append(parse_ligand_pdbqt_from_string(ligand_string, atom_typing));
 
     // Because we precalculate ligand atoms interactions
-    precalculate_byatom precalculated_byatom(m_scoring_function, m_model);
+    precalculate_byatom precalculated_byatom(*m_scoring_function, m_model);
 
     // Check that all atom types are in the grid (if initialized)
     if (m_map_initialized) {
@@ -149,7 +149,7 @@ void Vina::set_ligand_from_string(const std::vector<std::string>& ligand_string)
         exit(EXIT_FAILURE);
     }
 
-    atom_type::t atom_typing = m_scoring_function.get_atom_typing();
+    atom_type::t atom_typing = m_scoring_function->get_atom_typing();
 
     if (!m_receptor_initialized) {
         // This situation will happen if we don't need a receptor and we are using affinity maps
@@ -165,7 +165,7 @@ void Vina::set_ligand_from_string(const std::vector<std::string>& ligand_string)
     m_model.append(parse_ligand_pdbqt_from_string(ligand_string[i], atom_typing));
 
     // Because we precalculate ligand atoms interactions
-    precalculate_byatom precalculated_byatom(m_scoring_function, m_model);
+    precalculate_byatom precalculated_byatom(*m_scoring_function, m_model);
 
     // Check that all atom types are in the grid (if initialized)
     if (m_map_initialized) {
@@ -193,7 +193,7 @@ void Vina::set_ligand_from_string_gpu(const std::vector<std::string>& ligand_str
         exit(EXIT_FAILURE);
     }
 
-    atom_type::t atom_typing = m_scoring_function.get_atom_typing();
+    atom_type::t atom_typing = m_scoring_function->get_atom_typing();
 
     if (!m_receptor_initialized) {
         // This situation will happen if we don't need a receptor and we are using affinity maps
@@ -210,7 +210,7 @@ void Vina::set_ligand_from_string_gpu(const std::vector<std::string>& ligand_str
     for (int i = 0; i < ligand_string.size(); ++i) {
         m_model_gpu[i].append(
             parse_ligand_pdbqt_from_string_no_failure(ligand_string[i], atom_typing));
-        m_precalculated_byatom_gpu[i].init_without_calculation(m_scoring_function, m_model_gpu[i]);
+        m_precalculated_byatom_gpu[i].init_without_calculation(*m_scoring_function, m_model_gpu[i]);
     }
 
     // calculate common rs data
@@ -219,7 +219,7 @@ void Vina::set_ligand_from_string_gpu(const std::vector<std::string>& ligand_str
     // Because we precalculate ligand atoms interactions, which should be done in parallel
     int precalculate_thread_num = ligand_string.size();
 
-    precalculate_parallel(m_data_list_gpu, m_precalculated_byatom_gpu, m_scoring_function,
+    precalculate_parallel(m_data_list_gpu, m_precalculated_byatom_gpu, *m_scoring_function,
                           m_model_gpu, common_rs, precalculate_thread_num);
 
     VINA_RANGE(i, 0, ligand_string.size()) {
@@ -250,7 +250,7 @@ void Vina::set_ligand_from_object_gpu(const std::vector<model>& ligands) {
         exit(EXIT_FAILURE);
     }
 
-    atom_type::t atom_typing = m_scoring_function.get_atom_typing();
+    atom_type::t atom_typing = m_scoring_function->get_atom_typing();
 
     if (!m_receptor_initialized) {
         // This situation will happen if we don't need a receptor and we are using affinity maps
@@ -269,7 +269,7 @@ void Vina::set_ligand_from_object_gpu(const std::vector<model>& ligands) {
         if (multi_bias) {
             m_model_gpu[i].bias_list = bias_batch_list[i];
         }
-        m_precalculated_byatom_gpu[i].init_without_calculation(m_scoring_function, m_model_gpu[i]);
+        m_precalculated_byatom_gpu[i].init_without_calculation(*m_scoring_function, m_model_gpu[i]);
     }
 
     // calculate common rs data
@@ -278,7 +278,7 @@ void Vina::set_ligand_from_object_gpu(const std::vector<model>& ligands) {
     // Because we precalculate ligand atoms interactions, which should be done in parallel
     int precalculate_thread_num = ligands.size();
 
-    precalculate_parallel(m_data_list_gpu, m_precalculated_byatom_gpu, m_scoring_function,
+    precalculate_parallel(m_data_list_gpu, m_precalculated_byatom_gpu, *m_scoring_function,
                           m_model_gpu, common_rs, precalculate_thread_num);
 
     VINA_RANGE(i, 0, ligands.size()) {
@@ -309,7 +309,7 @@ void Vina::set_ligand_from_object(const std::vector<model>& ligands) {
         exit(EXIT_FAILURE);
     }
 
-    atom_type::t atom_typing = m_scoring_function.get_atom_typing();
+    atom_type::t atom_typing = m_scoring_function->get_atom_typing();
 
     if (!m_receptor_initialized) {
         // This situation will happen if we don't need a receptor and we are using affinity maps
@@ -325,7 +325,7 @@ void Vina::set_ligand_from_object(const std::vector<model>& ligands) {
     m_model.append(ligands[i]);
 
     // Because we precalculate ligand atoms interactions
-    precalculate_byatom precalculated_byatom(m_scoring_function, m_model);
+    precalculate_byatom precalculated_byatom(*m_scoring_function, m_model);
 
     // Check that all atom types are in the grid (if initialized)
     if (m_map_initialized) {
@@ -453,9 +453,8 @@ void Vina::set_ad4_weights(double weight_ad4_vdw, double weight_ad4_hb, double w
 }
 
 void Vina::set_forcefield() {
-    ScoringFunction scoring_function(m_sf_choice, m_weights);
     // Store in Vina object
-    m_scoring_function = scoring_function;
+    m_scoring_function = std::make_shared<ScoringFunction>(m_sf_choice, m_weights);
 }
 
 std::vector<double> Vina::grid_dimensions_from_ligand(double buffer_size) {
@@ -514,7 +513,7 @@ void Vina::compute_vina_maps(double center_x, double center_y, double center_z,
     vec center(center_x, center_y, center_z);
     const fl slope = 1e6;  // FIXME: too large? used to be 100
     szv atom_types;
-    atom_type::t atom_typing = m_scoring_function.get_atom_typing();
+    atom_type::t atom_typing = m_scoring_function->get_atom_typing();
 
     /* Atom types initialization
     If a ligand was defined before, we only use those present in the ligand
@@ -523,7 +522,7 @@ void Vina::compute_vina_maps(double center_x, double center_y, double center_z,
     if (m_ligand_initialized)
         atom_types = m_model.get_movable_atom_types(atom_typing);
     else
-        atom_types = m_scoring_function.get_atom_types();
+        atom_types = m_scoring_function->get_atom_types();
 
     // Grid dimensions
     VINA_FOR_IN(i, gd) {
@@ -540,7 +539,7 @@ void Vina::compute_vina_maps(double center_x, double center_y, double center_z,
     }
 
     // Initialize the scoring function
-    precalculate precalculated_sf(m_scoring_function);
+    precalculate precalculated_sf(*m_scoring_function);
     // Store it now in Vina object because of non_cache
     m_precalculated_sf = precalculated_sf;
 
@@ -593,7 +592,7 @@ void Vina::load_maps(std::string maps) {
     // Check that all the affinity map are present for ligands/flex residues (if initialized
     // already)
     if (m_ligand_initialized) {
-        atom_type::t atom_typing = m_scoring_function.get_atom_typing();
+        atom_type::t atom_typing = m_scoring_function->get_atom_typing();
         szv atom_types = m_model.get_movable_atom_types(atom_typing);
 
         if (m_sf_choice == SF_VINA || m_sf_choice == SF_VINARDO) {
@@ -615,12 +614,12 @@ void Vina::write_maps(const std::string& map_prefix, const std::string& gpf_file
     }
 
     szv atom_types;
-    atom_type::t atom_typing = m_scoring_function.get_atom_typing();
+    atom_type::t atom_typing = m_scoring_function->get_atom_typing();
 
     if (m_ligand_initialized)
         atom_types = m_model.get_movable_atom_types(atom_typing);
     else
-        atom_types = m_scoring_function.get_atom_types();
+        atom_types = m_scoring_function->get_atom_types();
 
     if (m_sf_choice == SF_VINA || m_sf_choice == SF_VINARDO) {
         doing("Writing Vina maps", m_verbosity, 0);
@@ -1201,7 +1200,7 @@ std::vector<double> Vina::score(double intramolecular_energy) {
         lig_intra = m_model.evali(m_precalculated_byatom, authentic_v);  // [2] ligand_i -- ligand_i
         intra = flex_grids + intra_pairs + lig_intra;
         // Total
-        total = m_scoring_function.conf_independent(
+        total = m_scoring_function->conf_independent(
             m_model,
             inter + intra
                 - intramolecular_energy);  // we pass intermolecular energy from the best pose
@@ -1220,7 +1219,7 @@ std::vector<double> Vina::score(double intramolecular_energy) {
         lig_intra = m_model.evali(m_precalculated_byatom, authentic_v);  // [2] ligand_i -- ligand_i
         intra = flex_grids + intra_pairs + lig_intra;
         // Torsion
-        conf_independent = m_scoring_function.conf_independent(
+        conf_independent = m_scoring_function->conf_independent(
             m_model, 0);  // [3] we can pass e=0 because we do not modify the energy like in vina
         // Total
         total = inter + conf_independent;  // (+ intra - intra)
@@ -1282,7 +1281,7 @@ std::vector<double> Vina::score_gpu(int i, double intramolecular_energy) {
                                          authentic_v);  // [2] ligand_i -- ligand_i
         intra = flex_grids + intra_pairs + lig_intra;
         // Total
-        total = m_scoring_function.conf_independent(
+        total = m_scoring_function->conf_independent(
             m_model_gpu[i],
             inter + intra
                 - intramolecular_energy);  // we pass intermolecular energy from the best pose
@@ -1303,7 +1302,7 @@ std::vector<double> Vina::score_gpu(int i, double intramolecular_energy) {
                                          authentic_v);  // [2] ligand_i -- ligand_i
         intra = flex_grids + intra_pairs + lig_intra;
         // Torsion
-        conf_independent = m_scoring_function.conf_independent(
+        conf_independent = m_scoring_function->conf_independent(
             m_model_gpu[i],
             0);  // [3] we can pass e=0 because we do not modify the energy like in vina
         // Total
@@ -1656,7 +1655,7 @@ void Vina::global_search(const int exhaustiveness, const int n_poses, const doub
 void Vina::global_search_gpu(const int exhaustiveness, const int n_poses, const double min_rmsd,
                              const int max_evals, const int max_step, int num_of_ligands,
                              unsigned long long seed, const int refine_step,
-                             const bool local_only) {
+                             const bool local_only, const bool create_new_stream) {
     // Vina search (Monte-carlo and local optimization)
     // Check if ff, box and ligand were initialized
     if (!m_ligand_initialized) {
@@ -1717,8 +1716,16 @@ void Vina::global_search_gpu(const int exhaustiveness, const int n_poses, const
     doing(sstm.str(), m_verbosity, 0);
     auto start = std::chrono::system_clock::now();
     if (m_sf_choice == SF_VINA || m_sf_choice == SF_VINARDO) {
-        mc(m_model_gpu, poses_gpu, m_precalculated_byatom_gpu, m_data_list_gpu, m_grid,
-           m_grid.corner1(), m_grid.corner2(), generator, m_verbosity, seed, bias_batch_list);
+        if (create_new_stream)
+        {
+            mc.mc_stream(m_model_gpu, poses_gpu, m_precalculated_byatom_gpu, m_data_list_gpu, m_grid,
+                m_grid.corner1(), m_grid.corner2(), generator, m_verbosity, seed, bias_batch_list);            
+        }
+        else
+        {
+            mc(m_model_gpu, poses_gpu, m_precalculated_byatom_gpu, m_data_list_gpu, m_grid,
+                m_grid.corner1(), m_grid.corner2(), generator, m_verbosity, seed, bias_batch_list);
+        }
     } else {
         mc(m_model_gpu, poses_gpu, m_precalculated_byatom_gpu, m_data_list_gpu, m_ad4grid,
            m_ad4grid.corner1(), m_ad4grid.corner2(), generator, m_verbosity, seed, bias_batch_list);
@@ -1864,7 +1871,6 @@ Vina::~Vina() {
     // scoring function
     scoring_function_choice m_sf_choice;
     flv m_weights;
-    ScoringFunction m_scoring_function;
     precalculate_byatom m_precalculated_byatom;
     precalculate m_precalculated_sf;
     // maps

diff --git a/unidock/src/lib/vina.h b/unidock/src/lib/vina.h
@@ -55,6 +55,7 @@
 #include "scoring_function.h"
 #include "precalculate.h"
 #include "bias.h"
+#include <memory>
 
 #ifdef DEBUG
 #    define DEBUG_PRINTF printf
@@ -75,7 +76,6 @@ class Vina {
         m_no_refine = no_refine;
         m_progress_callback = progress_callback;
         gpu = false;
-
         // Look for the number of cpu
         if (cpu <= 0) {
             unsigned num_cpus = boost::thread::hardware_concurrency();
@@ -149,7 +149,8 @@ class Vina {
                            const double min_rmsd = 1.0, const int max_evals = 0,
                            const int max_step = 0, int num_of_ligands = 1,
                            unsigned long long seed = 181129, const int refine_step = 5,
-                           const bool local_only = false);
+                           const bool local_only = false,
+                           const bool create_new_stream = false);
     std::string get_poses(int how_many = 9, double energy_range = 3.0);
     std::string get_sdf_poses(int how_many = 9, double energy_range = 3.0);
     std::string get_poses_gpu(int ligand_id, int how_many = 9, double energy_range = 3.0);
@@ -189,7 +190,7 @@ class Vina {
     // scoring function
     scoring_function_choice m_sf_choice;
     flv m_weights;
-    ScoringFunction m_scoring_function;
+    std::shared_ptr<ScoringFunction> m_scoring_function;
     precalculate_byatom m_precalculated_byatom;
     precalculate m_precalculated_sf;
     // gpu scoring function precalculated