From 3bbf2e05cf7669c69f5b305dcc7ebfcf858c6910 Mon Sep 17 00:00:00 2001 From: Prabindh Sundareson Date: Tue, 9 Jan 2024 18:11:00 +0530 Subject: [PATCH] [Enhancement][1:1 docking] Add CUDA stream based acceleration for 1:1 dock - Adds argument paired_batch_size for specifying fast 1:1 pairing - Adds custom handlers for paired_batch_size - Defines JSON schema for specifying 1:1 combinations and config - Adds use_fast_math support in cmake build (default enabled) - Fix memory leaks in Autodock Vina scoring_function reported by Valgrind - Fix missing CUDA memory initialisation that causes crashes The original code flow is retained when paired_batch_size is not specified. Please refer the README for using the paired_batch_size argument. --- unidock/CMakeLists.txt | 9 +- unidock/README.md | 25 + unidock/src/cuda/monte_carlo.cu | 618 +++++++++++++++++++ unidock/src/lib/monte_carlo.h | 6 + unidock/src/lib/scoring_function.h | 26 +- unidock/src/lib/vina.cpp | 66 +- unidock/src/lib/vina.h | 7 +- unidock/src/main/complex_property.h | 111 ++++ unidock/src/main/main.cpp | 53 +- unidock/src/main/paired_batching.schema.json | 28 + unidock/src/main/simulation_container.h | 517 ++++++++++++++++ unidock/src/main/vina_cuda_worker.h | 191 ++++++ unidock/src/rocm/main.cu.hip | 2 +- 13 files changed, 1616 insertions(+), 43 deletions(-) create mode 100644 unidock/src/main/complex_property.h create mode 100644 unidock/src/main/paired_batching.schema.json create mode 100644 unidock/src/main/simulation_container.h create mode 100644 unidock/src/main/vina_cuda_worker.h diff --git a/unidock/CMakeLists.txt b/unidock/CMakeLists.txt index f920aca..ff2b616 100644 --- a/unidock/CMakeLists.txt +++ b/unidock/CMakeLists.txt @@ -26,6 +26,13 @@ if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) ) endif() +# Add fast math +option(BUILD_FAST_MATH "Build in fast math mode" ON) +if(BUILD_FAST_MATH) + set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} --use_fast_math") + message("CMAKE_CUDA_FLAGS_RELEASE updated for fast_math: ${CMAKE_CUDA_FLAGS_RELEASE}") +endif() + set(VINA_BIN_NAME unidock) add_compile_definitions(ENABLE_CUDA) add_compile_definitions(VERSION="v${PROJECT_VERSION}") @@ -61,4 +68,4 @@ add_custom_target( COMMAND ${CLANG_FORMAT} -i ${sources} COMMENT "Running clang-format" VERBATIM -) \ No newline at end of file +) diff --git a/unidock/README.md b/unidock/README.md index 3fb097f..efc925a 100644 --- a/unidock/README.md +++ b/unidock/README.md @@ -235,3 +235,28 @@ DOI 10.1002/jcc.21334 3. Uni-Dock computes slowly for few (<10) ligands. The optimal application of Uni-Dock occurs in scenarios where one binding pocket interacts with numerous (in an order of 1000) ligands. As the number of ligands within a single computational batch increases, the average processing speed improves. In instances where only a few ligands are present for one binding pocket, the overhead proportion becomes considerably large, leading to slower computational performance. + +### Addendum to "FAQ 3 - Uni-Dock computes slowly for few (<10) ligands.": + +The `paired_batch` mode provides a mechanism to accelerate simultaneous 1:1 docking in batches using Vina scoring, using CUDA streams. To run docking using this mode, invoke unidock with the parameter `--paired_batch_size` value >0, with the protein:ligand configurations passed in JSON form using `--ligand_index`. The JSON file should use schema defined in paired_batching.schema.json. + +A sample input data.json is as below, complying to the schema: +``` +{ + "7LCU": { + "protein": "molecular_docking/PoseBuster/7LCU/7LCU_receptor.pdbqt", + "ligand": "molecular_docking/PoseBuster/7LCU/7LCU_ligand_prep.sdf", + "ligand_config": "molecular_docking/PoseBuster/7LCU/docking_grid.json" + }, + "7KZ9": { + "protein": "molecular_docking/PoseBuster/7KZ9/7KZ9_receptor.pdbqt", + "ligand": "molecular_docking/PoseBuster/7KZ9/7KZ9_ligand_prep.sdf", + "ligand_config": "molecular_docking/PoseBuster/7KZ9/docking_grid.json" + } +} +``` + +A typical usage using paired_batch mode is as shown below, with batch size of 10. + +`build/unidock --paired_batch_size 10 --ligand_index data_pb1.json --size_x 25 --size_y 25 --size_z 25 --dir test/prof_25_1024_80 --exhaustiveness 1024 --max_step 80 --seed 5` + diff --git a/unidock/src/cuda/monte_carlo.cu b/unidock/src/cuda/monte_carlo.cu index bb2c408..b0ad2e0 100644 --- a/unidock/src/cuda/monte_carlo.cu +++ b/unidock/src/cuda/monte_carlo.cu @@ -212,6 +212,604 @@ __global__ __launch_bounds__(MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP) void kern /* Above based on kernel.cl */ + +__host__ void monte_carlo::mc_stream( + std::vector &m_gpu, std::vector &out_gpu, + std::vector &p_gpu, triangular_matrix_cuda_t *m_data_list_gpu, + const igrid &ig, const vec &corner1, const vec &corner2, rng &generator, int verbosity, + unsigned long long seed, std::vector> &bias_batch_list) const { + /* Definitions from vina1.2 */ + DEBUG_PRINTF("entering CUDA monte_carlo search\n"); // debug + + cudaStream_t curr_stream = 0; + checkCUDA(cudaStreamCreate ( &curr_stream)); + DEBUG_PRINTF("Stream created [0x%p]\n", curr_stream); + + vec authentic_v(1000, 1000, + 1000); // FIXME? this is here to avoid max_fl/max_fl + + quasi_newton quasi_newton_par; + const int quasi_newton_par_max_steps = local_steps; // no need to decrease step + + /* Allocate CPU memory and define new data structure */ + DEBUG_PRINTF("Allocating CPU memory\n"); // debug + m_cuda_t *m_cuda; + checkCUDA(cudaMallocHost(&m_cuda, sizeof(m_cuda_t))); + memset(m_cuda, 0, sizeof(m_cuda_t)); + + output_type_cuda_t *rand_molec_struc_tmp; + checkCUDA(cudaMallocHost(&rand_molec_struc_tmp, sizeof(output_type_cuda_t))); + memset(rand_molec_struc_tmp, 0, sizeof(output_type_cuda_t)); + + ig_cuda_t *ig_cuda_ptr; + checkCUDA(cudaMallocHost(&ig_cuda_ptr, sizeof(ig_cuda_t))); + memset(ig_cuda_ptr, 0, sizeof(ig_cuda_t)); + + p_cuda_t_cpu *p_cuda; + checkCUDA(cudaMallocHost(&p_cuda, sizeof(p_cuda_t_cpu))); + memset(p_cuda, 0, sizeof(p_cuda_t_cpu)); + + /* End CPU allocation */ + + /* Allocate GPU memory */ + DEBUG_PRINTF("Allocating GPU memory\n"); + size_t m_cuda_size = sizeof(m_cuda_t); + DEBUG_PRINTF("m_cuda_size=%lu\n", m_cuda_size); + size_t ig_cuda_size = sizeof(ig_cuda_t); + DEBUG_PRINTF("ig_cuda_size=%lu\n", ig_cuda_size); + DEBUG_PRINTF("p_cuda_size_cpu=%lu\n", sizeof(p_cuda_t_cpu)); + + size_t p_cuda_size_gpu = sizeof(p_cuda_t); + DEBUG_PRINTF("p_cuda_size_gpu=%lu\n", p_cuda_size_gpu); + + // rand_molec_struc_gpu + float *rand_molec_struc_gpu; + checkCUDA(cudaMalloc(&rand_molec_struc_gpu, thread * SIZE_OF_MOLEC_STRUC)); + checkCUDA(cudaMemsetAsync(rand_molec_struc_gpu, 0, thread * SIZE_OF_MOLEC_STRUC, curr_stream)); + + float epsilon_fl_float = static_cast(epsilon_fl); + + // use cuRand to generate random values on GPU + curandStatePhilox4_32_10_t *states; + DEBUG_PRINTF("random states size=%lu\n", sizeof(curandStatePhilox4_32_10_t) * thread); + checkCUDA(cudaMalloc(&states, sizeof(curandStatePhilox4_32_10_t) * thread)); + checkCUDA(cudaMemsetAsync(states, 0, sizeof(curandStatePhilox4_32_10_t) * thread, curr_stream)); + + // hunt_cap_gpu + float *hunt_cap_gpu; + float hunt_cap_float[3] = {static_cast(hunt_cap[0]), static_cast(hunt_cap[1]), + static_cast(hunt_cap[2])}; + + checkCUDA(cudaMalloc(&hunt_cap_gpu, 3 * sizeof(float))); + checkCUDA(cudaMemsetAsync(hunt_cap_gpu, 0, 3 * sizeof(float), curr_stream)); + // Preparing m related data + m_cuda_t *m_cuda_gpu; + DEBUG_PRINTF("m_cuda_size=%lu", m_cuda_size); + checkCUDA(cudaMalloc(&m_cuda_gpu, num_of_ligands * m_cuda_size)); + checkCUDA(cudaMemsetAsync(m_cuda_gpu, 0, num_of_ligands * m_cuda_size, curr_stream)); + // Preparing p related data + + p_cuda_t *p_cuda_gpu; + checkCUDA(cudaMalloc(&p_cuda_gpu, num_of_ligands * p_cuda_size_gpu)); + checkCUDA(cudaMemsetAsync(p_cuda_gpu, 0, num_of_ligands * p_cuda_size_gpu, curr_stream)); + DEBUG_PRINTF("p_cuda_gpu=%p\n", p_cuda_gpu); + // Preparing ig related data (cache related data) + ig_cuda_t *ig_cuda_gpu; + + float *authentic_v_gpu; + float authentic_v_float[3] + = {static_cast(authentic_v[0]), static_cast(authentic_v[1]), + static_cast(authentic_v[2])}; + + checkCUDA(cudaMalloc(&authentic_v_gpu, sizeof(authentic_v_float))); + checkCUDA(cudaMemsetAsync(authentic_v_gpu, 0, sizeof(authentic_v_float), curr_stream)); + // Preparing result data + output_type_cuda_t *results_gpu; + checkCUDA(cudaMalloc(&results_gpu, thread * sizeof(output_type_cuda_t))); + checkCUDA(cudaMemsetAsync(results_gpu, 0, thread * sizeof(output_type_cuda_t), curr_stream)); + + m_cuda_t *m_cuda_global; + checkCUDA(cudaMalloc(&m_cuda_global, thread * sizeof(m_cuda_t))); + checkCUDA(cudaMemsetAsync(m_cuda_global, 0, thread * sizeof(m_cuda_t), curr_stream)); + + matrix_d *h_cuda_global; + checkCUDA(cudaMalloc(&h_cuda_global, thread * sizeof(matrix_d))); + checkCUDA(cudaMemsetAsync(h_cuda_global, 0, thread * sizeof(matrix_d), curr_stream)); + + /* End Allocating GPU Memory */ + + assert(num_of_ligands <= MAX_LIGAND_NUM); + assert(thread <= MAX_THREAD); + + struct tmp_struct { + int start_index = 0; + int parent_index = 0; + void store_node(tree &child_ptr, rigid_cuda_t &rigid) { + start_index++; // start with index 1, index 0 is root node + rigid.parent[start_index] = parent_index; + rigid.atom_range[start_index][0] = child_ptr.node.begin; + rigid.atom_range[start_index][1] = child_ptr.node.end; + for (int i = 0; i < 9; i++) + rigid.orientation_m[start_index][i] = child_ptr.node.get_orientation_m().data[i]; + rigid.orientation_q[start_index][0] = child_ptr.node.orientation().R_component_1(); + rigid.orientation_q[start_index][1] = child_ptr.node.orientation().R_component_2(); + rigid.orientation_q[start_index][2] = child_ptr.node.orientation().R_component_3(); + rigid.orientation_q[start_index][3] = child_ptr.node.orientation().R_component_4(); + for (int i = 0; i < 3; i++) { + rigid.origin[start_index][i] = child_ptr.node.get_origin()[i]; + rigid.axis[start_index][i] = child_ptr.node.get_axis()[i]; + rigid.relative_axis[start_index][i] = child_ptr.node.relative_axis[i]; + rigid.relative_origin[start_index][i] = child_ptr.node.relative_origin[i]; + } + if (child_ptr.children.size() == 0) + return; + else { + assert(start_index < MAX_NUM_OF_RIGID); + int parent_index_tmp = start_index; + for (int i = 0; i < child_ptr.children.size(); i++) { + this->parent_index = parent_index_tmp; // Update parent index + this->store_node(child_ptr.children[i], rigid); + } + } + } + }; + + for (int l = 0; l < num_of_ligands; ++l) { + model &m = m_gpu[l]; + const precalculate_byatom &p = p_gpu[l]; + + /* Prepare m related data */ + conf_size s = m.get_size(); + change g(s); + output_type tmp(s, 0); + tmp.c = m.get_initial_conf(); + + assert(m.atoms.size() < MAX_NUM_OF_ATOMS); + + // Preparing ligand data + DEBUG_PRINTF("prepare ligand data\n"); + assert(m.num_other_pairs() == 0); // m.other_pairs is not supported! + assert(m.ligands.size() <= 1); // Only one ligand supported! + + if (m.ligands.size() == 0) { // ligand parsing error + m_cuda->m_num_movable_atoms = -1; + DEBUG_PRINTF("copy m_cuda to gpu, size=%lu\n", sizeof(m_cuda_t)); + checkCUDA(cudaMemcpyAsync(m_cuda_gpu + l, m_cuda, sizeof(m_cuda_t), cudaMemcpyHostToDevice, curr_stream)); + } else { + for (int i = 0; i < m.atoms.size(); i++) { + m_cuda->atoms[i].types[0] + = m.atoms[i].el; // To store 4 atoms types (el, ad, xs, sy) + m_cuda->atoms[i].types[1] = m.atoms[i].ad; + m_cuda->atoms[i].types[2] = m.atoms[i].xs; + m_cuda->atoms[i].types[3] = m.atoms[i].sy; + for (int j = 0; j < 3; j++) { + m_cuda->atoms[i].coords[j] = m.atoms[i].coords[j]; // To store atom coords + } + } + + // To store atoms coords + for (int i = 0; i < m.coords.size(); i++) { + for (int j = 0; j < 3; j++) { + m_cuda->m_coords.coords[i][j] = m.coords[i].data[j]; + } + } + + // To store minus forces + for (int i = 0; i < m.coords.size(); i++) { + for (int j = 0; j < 3; j++) { + m_cuda->minus_forces.coords[i][j] = m.minus_forces[i].data[j]; + } + } + + m_cuda->ligand.pairs.num_pairs = m.ligands[0].pairs.size(); + for (int i = 0; i < m_cuda->ligand.pairs.num_pairs; i++) { + m_cuda->ligand.pairs.type_pair_index[i] = m.ligands[0].pairs[i].type_pair_index; + m_cuda->ligand.pairs.a[i] = m.ligands[0].pairs[i].a; + m_cuda->ligand.pairs.b[i] = m.ligands[0].pairs[i].b; + } + m_cuda->ligand.begin = m.ligands[0].begin; // 0 + m_cuda->ligand.end = m.ligands[0].end; // 29 + ligand &m_ligand = m.ligands[0]; // Only support one ligand + DEBUG_PRINTF("m_ligand.end=%lu, MAX_NUM_OF_ATOMS=%d\n", m_ligand.end, MAX_NUM_OF_ATOMS); + assert(m_ligand.end < MAX_NUM_OF_ATOMS); + + // Store root node + m_cuda->ligand.rigid.atom_range[0][0] = m_ligand.node.begin; + m_cuda->ligand.rigid.atom_range[0][1] = m_ligand.node.end; + for (int i = 0; i < 3; i++) + m_cuda->ligand.rigid.origin[0][i] = m_ligand.node.get_origin()[i]; + for (int i = 0; i < 9; i++) + m_cuda->ligand.rigid.orientation_m[0][i] + = m_ligand.node.get_orientation_m().data[i]; + m_cuda->ligand.rigid.orientation_q[0][0] = m_ligand.node.orientation().R_component_1(); + m_cuda->ligand.rigid.orientation_q[0][1] = m_ligand.node.orientation().R_component_2(); + m_cuda->ligand.rigid.orientation_q[0][2] = m_ligand.node.orientation().R_component_3(); + m_cuda->ligand.rigid.orientation_q[0][3] = m_ligand.node.orientation().R_component_4(); + for (int i = 0; i < 3; i++) { + m_cuda->ligand.rigid.axis[0][i] = 0; + m_cuda->ligand.rigid.relative_axis[0][i] = 0; + m_cuda->ligand.rigid.relative_origin[0][i] = 0; + } + + // Store children nodes (in depth-first order) + DEBUG_PRINTF("store children nodes\n"); + + tmp_struct ts; + for (int i = 0; i < m_ligand.children.size(); i++) { + ts.parent_index = 0; // Start a new branch, whose parent is 0 + ts.store_node(m_ligand.children[i], m_cuda->ligand.rigid); + } + m_cuda->ligand.rigid.num_children = ts.start_index; + + // set children map + DEBUG_PRINTF("set children map\n"); + for (int i = 0; i < MAX_NUM_OF_RIGID; i++) + for (int j = 0; j < MAX_NUM_OF_RIGID; j++) { + m_cuda->ligand.rigid.children_map[i][j] = false; + m_cuda->ligand.rigid.descendant_map[i][j] = false; + } + + for (int i = MAX_NUM_OF_RIGID - 1; i >= 0; i--) { + if (i > 0) { + m_cuda->ligand.rigid.children_map[m_cuda->ligand.rigid.parent[i]][i] = true; + m_cuda->ligand.rigid.descendant_map[m_cuda->ligand.rigid.parent[i]][i] = true; + } + for (int j = i + 1; j < MAX_NUM_OF_RIGID; j++) { + if (m_cuda->ligand.rigid.descendant_map[i][j]) + m_cuda->ligand.rigid.descendant_map[m_cuda->ligand.rigid.parent[i]][j] + = true; + } + } + m_cuda->m_num_movable_atoms = m.num_movable_atoms(); + + DEBUG_PRINTF("copy m_cuda to gpu, size=%lu\n", sizeof(m_cuda_t)); + checkCUDA(cudaMemcpyAsync(m_cuda_gpu + l, m_cuda, sizeof(m_cuda_t), cudaMemcpyHostToDevice, curr_stream)); + + /* Prepare rand_molec_struc data */ + int lig_torsion_size = tmp.c.ligands[0].torsions.size(); + DEBUG_PRINTF("lig_torsion_size=%d\n", lig_torsion_size); + int flex_torsion_size; + if (tmp.c.flex.size() != 0) + flex_torsion_size = tmp.c.flex[0].torsions.size(); + else + flex_torsion_size = 0; + // std::vector uniform_data; + // uniform_data.resize(thread); + + for (int i = 0; i < threads_per_ligand; ++i) { + if (!local_only) { + tmp.c.randomize(corner1, corner2, + generator); // generate a random structure, + // can move to GPU if necessary + } + for (int j = 0; j < 3; j++) + rand_molec_struc_tmp->position[j] = tmp.c.ligands[0].rigid.position[j]; + assert(lig_torsion_size <= MAX_NUM_OF_LIG_TORSION); + for (int j = 0; j < lig_torsion_size; j++) + rand_molec_struc_tmp->lig_torsion[j] + = tmp.c.ligands[0].torsions[j]; // Only support one ligand + assert(flex_torsion_size <= MAX_NUM_OF_FLEX_TORSION); + for (int j = 0; j < flex_torsion_size; j++) + rand_molec_struc_tmp->flex_torsion[j] + = tmp.c.flex[0].torsions[j]; // Only support one flex + + rand_molec_struc_tmp->orientation[0] + = (float)tmp.c.ligands[0].rigid.orientation.R_component_1(); + rand_molec_struc_tmp->orientation[1] + = (float)tmp.c.ligands[0].rigid.orientation.R_component_2(); + rand_molec_struc_tmp->orientation[2] + = (float)tmp.c.ligands[0].rigid.orientation.R_component_3(); + rand_molec_struc_tmp->orientation[3] + = (float)tmp.c.ligands[0].rigid.orientation.R_component_4(); + + rand_molec_struc_tmp->lig_torsion_size = lig_torsion_size; + + float *rand_molec_struc_gpu_tmp + = rand_molec_struc_gpu + + (l * threads_per_ligand + i) * SIZE_OF_MOLEC_STRUC / sizeof(float); + checkCUDA(cudaMemcpyAsync(rand_molec_struc_gpu_tmp, rand_molec_struc_tmp, + SIZE_OF_MOLEC_STRUC, cudaMemcpyHostToDevice, curr_stream)); + } + + /* Preparing p related data */ + DEBUG_PRINTF("Preaparing p related data\n"); // debug + + // copy pointer instead of data + p_cuda->m_cutoff_sqr = p.m_cutoff_sqr; + p_cuda->factor = p.m_factor; + p_cuda->n = p.m_n; + p_cuda->m_data_size = p.m_data.m_data.size(); + checkCUDA(cudaMemcpyAsync(p_cuda_gpu + l, p_cuda, sizeof(p_cuda_t), cudaMemcpyHostToDevice, curr_stream)); + checkCUDA(cudaMemcpyAsync(&(p_cuda_gpu[l].m_data), &(m_data_list_gpu[l].p_data), + sizeof(p_m_data_cuda_t *), + cudaMemcpyHostToDevice, curr_stream)); // check if fl == float + } + } + + /* Prepare data only concerns rigid receptor */ + + // Preparing igrid related data + DEBUG_PRINTF("Preparing ig related data\n"); // debug + + bool multi_bias = (bias_batch_list.size() == num_of_ligands); + if (multi_bias) { + // multi bias mode + std::cout << "with multi bias "; + + checkCUDA(cudaMalloc(&ig_cuda_gpu, ig_cuda_size * num_of_ligands)); + checkCUDA(cudaMemsetAsync(ig_cuda_gpu, 0, ig_cuda_size * num_of_ligands, curr_stream)); + for (int l = 0; l < num_of_ligands; ++l) { + if (ig.get_atu() == atom_type::XS) { + cache ig_tmp(ig.get_gd(), ig.get_slope()); + ig_tmp.m_grids = ig.get_grids(); + // // debug + // if (l == 1){ + // std::cout << "writing original grid map\n"; + // ig_tmp.write(std::string("./ori"), szv(1,0)); + // } + ig_tmp.compute_bias(m_gpu[l], bias_batch_list[l]); + // // debug + // std::cout << "writing bias\n"; + // ig_tmp.write(std::string("./")+std::to_string(l), szv(1,0)); + ig_cuda_ptr->atu = ig.get_atu(); // atu + DEBUG_PRINTF("ig_cuda_ptr->atu=%d\n", ig_cuda_ptr->atu); + ig_cuda_ptr->slope = ig.get_slope(); // slope + std::vector tmp_grids = ig.get_grids(); + int grid_size = tmp_grids.size(); + DEBUG_PRINTF("ig.size()=%d, GRIDS_SIZE=%d, should be 33\n", grid_size, GRIDS_SIZE); + + for (int i = 0; i < grid_size; i++) { + // DEBUG_PRINTF("i=%d\n",i); //debug + for (int j = 0; j < 3; j++) { + ig_cuda_ptr->grids[i].m_init[j] = tmp_grids[i].m_init[j]; + ig_cuda_ptr->grids[i].m_factor[j] = tmp_grids[i].m_factor[j]; + ig_cuda_ptr->grids[i].m_dim_fl_minus_1[j] + = tmp_grids[i].m_dim_fl_minus_1[j]; + ig_cuda_ptr->grids[i].m_factor_inv[j] = tmp_grids[i].m_factor_inv[j]; + } + if (tmp_grids[i].m_data.dim0() != 0) { + ig_cuda_ptr->grids[i].m_i = tmp_grids[i].m_data.dim0(); + assert(MAX_NUM_OF_GRID_MI >= ig_cuda_ptr->grids[i].m_i); + ig_cuda_ptr->grids[i].m_j = tmp_grids[i].m_data.dim1(); + assert(MAX_NUM_OF_GRID_MJ >= ig_cuda_ptr->grids[i].m_j); + ig_cuda_ptr->grids[i].m_k = tmp_grids[i].m_data.dim2(); + assert(MAX_NUM_OF_GRID_MK >= ig_cuda_ptr->grids[i].m_k); + + assert(tmp_grids[i].m_data.m_data.size() + == ig_cuda_ptr->grids[i].m_i * ig_cuda_ptr->grids[i].m_j + * ig_cuda_ptr->grids[i].m_k); + assert(tmp_grids[i].m_data.m_data.size() <= MAX_NUM_OF_GRID_POINT); + memcpy(ig_cuda_ptr->grids[i].m_data, tmp_grids[i].m_data.m_data.data(), + tmp_grids[i].m_data.m_data.size() * sizeof(fl)); + } else { + ig_cuda_ptr->grids[i].m_i = 0; + ig_cuda_ptr->grids[i].m_j = 0; + ig_cuda_ptr->grids[i].m_k = 0; + } + } + } else { + ad4cache ig_tmp(ig.get_slope()); + ig_tmp.m_grids = ig.get_grids(); + // // debug + // if (l == 1){ + // std::cout << "writing original grid map\n"; + // ig_tmp.write(std::string("./ori"), szv(1,0)); + // } + ig_tmp.set_bias(bias_batch_list[l]); + // // debug + // std::cout << "writing bias\n"; + // ig_tmp.write(std::string("./")+std::to_string(l), szv(1,0)); + ig_cuda_ptr->atu = ig.get_atu(); // atu + DEBUG_PRINTF("ig_cuda_ptr->atu=%d\n", ig_cuda_ptr->atu); + ig_cuda_ptr->slope = ig.get_slope(); // slope + std::vector tmp_grids = ig.get_grids(); + int grid_size = tmp_grids.size(); + DEBUG_PRINTF("ig.size()=%d, GRIDS_SIZE=%d, should be 33\n", grid_size, GRIDS_SIZE); + + for (int i = 0; i < grid_size; i++) { + // DEBUG_PRINTF("i=%d\n",i); //debug + for (int j = 0; j < 3; j++) { + ig_cuda_ptr->grids[i].m_init[j] = tmp_grids[i].m_init[j]; + ig_cuda_ptr->grids[i].m_factor[j] = tmp_grids[i].m_factor[j]; + ig_cuda_ptr->grids[i].m_dim_fl_minus_1[j] + = tmp_grids[i].m_dim_fl_minus_1[j]; + ig_cuda_ptr->grids[i].m_factor_inv[j] = tmp_grids[i].m_factor_inv[j]; + } + if (tmp_grids[i].m_data.dim0() != 0) { + ig_cuda_ptr->grids[i].m_i = tmp_grids[i].m_data.dim0(); + assert(MAX_NUM_OF_GRID_MI >= ig_cuda_ptr->grids[i].m_i); + ig_cuda_ptr->grids[i].m_j = tmp_grids[i].m_data.dim1(); + assert(MAX_NUM_OF_GRID_MJ >= ig_cuda_ptr->grids[i].m_j); + ig_cuda_ptr->grids[i].m_k = tmp_grids[i].m_data.dim2(); + assert(MAX_NUM_OF_GRID_MK >= ig_cuda_ptr->grids[i].m_k); + + assert(tmp_grids[i].m_data.m_data.size() + == ig_cuda_ptr->grids[i].m_i * ig_cuda_ptr->grids[i].m_j + * ig_cuda_ptr->grids[i].m_k); + memcpy(ig_cuda_ptr->grids[i].m_data, tmp_grids[i].m_data.m_data.data(), + tmp_grids[i].m_data.m_data.size() * sizeof(fl)); + } else { + ig_cuda_ptr->grids[i].m_i = 0; + ig_cuda_ptr->grids[i].m_j = 0; + ig_cuda_ptr->grids[i].m_k = 0; + } + } + } + + checkCUDA( + cudaMemcpyAsync(ig_cuda_gpu + l, ig_cuda_ptr, ig_cuda_size, cudaMemcpyHostToDevice, curr_stream)); + } + std::cout << "set\n"; + } else { + ig_cuda_ptr->atu = ig.get_atu(); // atu + DEBUG_PRINTF("ig_cuda_ptr->atu=%d\n", ig_cuda_ptr->atu); + ig_cuda_ptr->slope = ig.get_slope(); // slope + std::vector tmp_grids = ig.get_grids(); + int grid_size = tmp_grids.size(); + DEBUG_PRINTF("ig.size()=%d, GRIDS_SIZE=%d, should be 33\n", grid_size, GRIDS_SIZE); + + for (int i = 0; i < grid_size; i++) { + // DEBUG_PRINTF("i=%d\n",i); //debug + for (int j = 0; j < 3; j++) { + ig_cuda_ptr->grids[i].m_init[j] = tmp_grids[i].m_init[j]; + ig_cuda_ptr->grids[i].m_factor[j] = tmp_grids[i].m_factor[j]; + ig_cuda_ptr->grids[i].m_dim_fl_minus_1[j] = tmp_grids[i].m_dim_fl_minus_1[j]; + ig_cuda_ptr->grids[i].m_factor_inv[j] = tmp_grids[i].m_factor_inv[j]; + } + if (tmp_grids[i].m_data.dim0() != 0) { + ig_cuda_ptr->grids[i].m_i = tmp_grids[i].m_data.dim0(); + assert(MAX_NUM_OF_GRID_MI >= ig_cuda_ptr->grids[i].m_i); + ig_cuda_ptr->grids[i].m_j = tmp_grids[i].m_data.dim1(); + assert(MAX_NUM_OF_GRID_MJ >= ig_cuda_ptr->grids[i].m_j); + ig_cuda_ptr->grids[i].m_k = tmp_grids[i].m_data.dim2(); + assert(MAX_NUM_OF_GRID_MK >= ig_cuda_ptr->grids[i].m_k); + + assert(tmp_grids[i].m_data.m_data.size() + == ig_cuda_ptr->grids[i].m_i * ig_cuda_ptr->grids[i].m_j + * ig_cuda_ptr->grids[i].m_k); + memcpy(ig_cuda_ptr->grids[i].m_data, tmp_grids[i].m_data.m_data.data(), + tmp_grids[i].m_data.m_data.size() * sizeof(fl)); + } else { + ig_cuda_ptr->grids[i].m_i = 0; + ig_cuda_ptr->grids[i].m_j = 0; + ig_cuda_ptr->grids[i].m_k = 0; + } + } + DEBUG_PRINTF("memcpy ig_cuda, ig_cuda_size=%lu\n", ig_cuda_size); + checkCUDA(cudaMalloc(&ig_cuda_gpu, ig_cuda_size)); + checkCUDA(cudaMemcpyAsync(ig_cuda_gpu, ig_cuda_ptr, ig_cuda_size, cudaMemcpyHostToDevice, curr_stream)); + } + + float mutation_amplitude_float = static_cast(mutation_amplitude); + + checkCUDA(cudaMemcpyAsync(hunt_cap_gpu, hunt_cap_float, 3 * sizeof(float), cudaMemcpyHostToDevice, curr_stream)); + + checkCUDA(cudaMemcpyAsync(authentic_v_gpu, authentic_v_float, sizeof(authentic_v_float), + cudaMemcpyHostToDevice, curr_stream)); + + /* Add timing */ + cudaEvent_t start, stop; + checkCUDA(cudaEventCreate(&start)); + checkCUDA(cudaEventCreate(&stop)); + checkCUDA(cudaEventRecord(start, curr_stream)); + + /* Launch kernel */ + DEBUG_PRINTF("launch kernel, global_steps=%d, thread=%d, num_of_ligands=%d\n", global_steps, + thread, num_of_ligands); + + output_type_cuda_t *results_aux; + checkCUDA(cudaMalloc(&results_aux, 5 * thread * sizeof(output_type_cuda_t))); + checkCUDA(cudaMemsetAsync(results_aux, 0, 5 * thread * sizeof(output_type_cuda_t), curr_stream)); + change_cuda_t *change_aux; + checkCUDA(cudaMalloc(&change_aux, 6 * thread * sizeof(change_cuda_t))); + checkCUDA(cudaMemsetAsync(change_aux, 0, 6 * thread * sizeof(change_cuda_t), curr_stream)); + pot_cuda_t *pot_aux; + checkCUDA(cudaMalloc(&pot_aux, thread * sizeof(pot_cuda_t))); + checkCUDA(cudaMemsetAsync(pot_aux, 0, thread * sizeof(pot_cuda_t), curr_stream)); + + kernel<32><<>>(m_cuda_gpu, ig_cuda_gpu, p_cuda_gpu, rand_molec_struc_gpu, + quasi_newton_par_max_steps, mutation_amplitude_float, states, seed, + epsilon_fl_float, hunt_cap_gpu, authentic_v_gpu, results_gpu, + results_aux, change_aux, pot_aux, h_cuda_global, m_cuda_global, + global_steps, num_of_ligands, threads_per_ligand, multi_bias); + + + // Wait for stream operations to complete + checkCUDA(cudaStreamSynchronize(curr_stream)); + + // Device to Host memcpy of precalculated_byatom, copy back data to p_gpu + p_m_data_cuda_t *p_data; + checkCUDA(cudaMallocHost(&p_data, sizeof(p_m_data_cuda_t) * MAX_P_DATA_M_DATA_SIZE)); + memset(p_data, 0, sizeof(p_m_data_cuda_t) * MAX_P_DATA_M_DATA_SIZE); + output_type_cuda_t *results; + checkCUDA(cudaMallocHost(&results, thread * sizeof(output_type_cuda_t))); + memset(results, 0, thread * sizeof(output_type_cuda_t)); + + for (int l = 0; l < num_of_ligands; ++l) { + // copy data to m_data on CPU, then to p_gpu[l] + int pnum = p_gpu[l].m_data.m_data.size(); + checkCUDA(cudaMemcpy(p_data, m_data_list_gpu[l].p_data, sizeof(p_m_data_cuda_t) * pnum, + cudaMemcpyDeviceToHost)); + checkCUDA(cudaFree(m_data_list_gpu[l].p_data)); // free m_cuda pointers in p_cuda + for (int i = 0; i < pnum; ++i) { + memcpy(&p_gpu[l].m_data.m_data[i].fast[0], p_data[i].fast, sizeof(p_data[i].fast)); + memcpy(&p_gpu[l].m_data.m_data[i].smooth[0], p_data[i].smooth, + sizeof(p_data[i].smooth)); + } + } + // DEBUG_PRINTF("energies about the first ligand on GPU:\n"); + // for (int i = 0;i < 20; ++i){ + // DEBUG_PRINTF("precalculated_byatom.m_data.m_data[%d]: (smooth.first, + // smooth.second, fast) ", i); for (int j = 0;j < FAST_SIZE; ++j){ + // DEBUG_PRINTF("(%f, %f, %f) ", + // p_gpu[0].m_data.m_data[i].smooth[j].first, + // p_gpu[0].m_data.m_data[i].smooth[j].second, + // p_gpu[0].m_data.m_data[i].fast[j]); + // } + // DEBUG_PRINTF("\n"); + // } + + /* Timing output */ + + checkCUDA(cudaEventRecord(stop, curr_stream)); + cudaEventSynchronize(stop); + float msecTotal = 0.0f; + cudaEventElapsedTime(&msecTotal, start, stop); + DEBUG_PRINTF("Time spend on GPU is %f ms\n", msecTotal); + + /* Convert result data. Can be improved by mapping memory + */ + DEBUG_PRINTF("cuda to vina\n"); + + checkCUDA(cudaMemcpy(results, results_gpu, thread * sizeof(output_type_cuda_t), + cudaMemcpyDeviceToHost)); + + std::vector result_vina = cuda_to_vina(results, thread); + + DEBUG_PRINTF("result size=%lu\n", result_vina.size()); + + for (int i = 0; i < thread; ++i) { + add_to_output_container(out_gpu[i / threads_per_ligand], result_vina[i], min_rmsd, + num_saved_mins); + } + for (int i = 0; i < num_of_ligands; ++i) { + DEBUG_PRINTF("output poses size = %lu\n", out_gpu[i].size()); + if (out_gpu[i].size() == 0) continue; + DEBUG_PRINTF("output poses energy from gpu ="); + for (int j = 0; j < out_gpu[i].size(); ++j) DEBUG_PRINTF("%f ", out_gpu[i][j].e); + DEBUG_PRINTF("\n"); + } + + /* Free memory */ + checkCUDA(cudaFree(m_cuda_gpu)); + checkCUDA(cudaFree(ig_cuda_gpu)); + checkCUDA(cudaFree(p_cuda_gpu)); + checkCUDA(cudaFree(rand_molec_struc_gpu)); + checkCUDA(cudaFree(hunt_cap_gpu)); + checkCUDA(cudaFree(authentic_v_gpu)); + checkCUDA(cudaFree(results_gpu)); + checkCUDA(cudaFree(change_aux)); + checkCUDA(cudaFree(results_aux)); + checkCUDA(cudaFree(pot_aux)); + checkCUDA(cudaFree(states)); + checkCUDA(cudaFree(h_cuda_global)); + checkCUDA(cudaFree(m_cuda_global)); + checkCUDA(cudaFreeHost(m_cuda)); + checkCUDA(cudaFreeHost(rand_molec_struc_tmp)); + checkCUDA(cudaFreeHost(ig_cuda_ptr)); + checkCUDA(cudaFreeHost(p_cuda)); + checkCUDA(cudaFreeHost(p_data)); + checkCUDA(cudaFreeHost(results)); + + checkCUDA(cudaEventDestroy(start)); + checkCUDA(cudaEventDestroy(stop)); + checkCUDA(cudaStreamDestroy(curr_stream)); + curr_stream = 0; + + DEBUG_PRINTF("exit monte_carlo\n"); + +} + /* Below based on monte-carlo.cpp */ // #ifdef ENABLE_CUDA @@ -262,15 +860,19 @@ __host__ void monte_carlo::operator()( DEBUG_PRINTF("Allocating CPU memory\n"); // debug m_cuda_t *m_cuda; checkCUDA(cudaMallocHost(&m_cuda, sizeof(m_cuda_t))); + memset(m_cuda, 0, sizeof(m_cuda_t)); output_type_cuda_t *rand_molec_struc_tmp; checkCUDA(cudaMallocHost(&rand_molec_struc_tmp, sizeof(output_type_cuda_t))); + memset(rand_molec_struc_tmp, 0, sizeof(output_type_cuda_t)); ig_cuda_t *ig_cuda_ptr; checkCUDA(cudaMallocHost(&ig_cuda_ptr, sizeof(ig_cuda_t))); + memset(ig_cuda_ptr, 0, sizeof(ig_cuda_t)); p_cuda_t_cpu *p_cuda; checkCUDA(cudaMallocHost(&p_cuda, sizeof(p_cuda_t_cpu))); + memset(p_cuda, 0, sizeof(p_cuda_t_cpu)); /* End CPU allocation */ @@ -288,12 +890,15 @@ __host__ void monte_carlo::operator()( // rand_molec_struc_gpu float *rand_molec_struc_gpu; checkCUDA(cudaMalloc(&rand_molec_struc_gpu, thread * SIZE_OF_MOLEC_STRUC)); + checkCUDA(cudaMemset(rand_molec_struc_gpu, 0, thread * SIZE_OF_MOLEC_STRUC)); + float epsilon_fl_float = static_cast(epsilon_fl); // use cuRand to generate random values on GPU curandStatePhilox4_32_10_t *states; DEBUG_PRINTF("random states size=%lu\n", sizeof(curandStatePhilox4_32_10_t) * thread); checkCUDA(cudaMalloc(&states, sizeof(curandStatePhilox4_32_10_t) * thread)); + checkCUDA(cudaMemset(states, 0, sizeof(curandStatePhilox4_32_10_t) * thread)); // hunt_cap_gpu float *hunt_cap_gpu; @@ -301,14 +906,17 @@ __host__ void monte_carlo::operator()( static_cast(hunt_cap[2])}; checkCUDA(cudaMalloc(&hunt_cap_gpu, 3 * sizeof(float))); + checkCUDA(cudaMemset(hunt_cap_gpu, 0, 3 * sizeof(float))); // Preparing m related data m_cuda_t *m_cuda_gpu; DEBUG_PRINTF("m_cuda_size=%lu", m_cuda_size); checkCUDA(cudaMalloc(&m_cuda_gpu, num_of_ligands * m_cuda_size)); + checkCUDA(cudaMemset(m_cuda_gpu, 0, num_of_ligands * m_cuda_size)); // Preparing p related data p_cuda_t *p_cuda_gpu; checkCUDA(cudaMalloc(&p_cuda_gpu, num_of_ligands * p_cuda_size_gpu)); + checkCUDA(cudaMemset(p_cuda_gpu, 0, num_of_ligands * p_cuda_size_gpu)); DEBUG_PRINTF("p_cuda_gpu=%p\n", p_cuda_gpu); // Preparing ig related data (cache related data) ig_cuda_t *ig_cuda_gpu; @@ -319,15 +927,19 @@ __host__ void monte_carlo::operator()( static_cast(authentic_v[2])}; checkCUDA(cudaMalloc(&authentic_v_gpu, sizeof(authentic_v_float))); + checkCUDA(cudaMemset(authentic_v_gpu, 0, sizeof(authentic_v_float))); // Preparing result data output_type_cuda_t *results_gpu; checkCUDA(cudaMalloc(&results_gpu, thread * sizeof(output_type_cuda_t))); + checkCUDA(cudaMemset(results_gpu, 0, thread * sizeof(output_type_cuda_t))); m_cuda_t *m_cuda_global; checkCUDA(cudaMalloc(&m_cuda_global, thread * sizeof(m_cuda_t))); + checkCUDA(cudaMemset(m_cuda_global, 0, thread * sizeof(m_cuda_t))); matrix_d *h_cuda_global; checkCUDA(cudaMalloc(&h_cuda_global, thread * sizeof(matrix_d))); + checkCUDA(cudaMemset(h_cuda_global, 0, thread * sizeof(matrix_d))); /* End Allocating GPU Memory */ @@ -550,6 +1162,7 @@ __host__ void monte_carlo::operator()( std::cout << "with multi bias "; checkCUDA(cudaMalloc(&ig_cuda_gpu, ig_cuda_size * num_of_ligands)); + checkCUDA(cudaMemset(&ig_cuda_gpu, 0, ig_cuda_size * num_of_ligands)); for (int l = 0; l < num_of_ligands; ++l) { if (ig.get_atu() == atom_type::XS) { cache ig_tmp(ig.get_gd(), ig.get_slope()); @@ -713,10 +1326,13 @@ __host__ void monte_carlo::operator()( output_type_cuda_t *results_aux; checkCUDA(cudaMalloc(&results_aux, 5 * thread * sizeof(output_type_cuda_t))); + checkCUDA(cudaMemset(results_aux, 0, 5 * thread * sizeof(output_type_cuda_t))); change_cuda_t *change_aux; checkCUDA(cudaMalloc(&change_aux, 6 * thread * sizeof(change_cuda_t))); + checkCUDA(cudaMemset(change_aux, 0, 6 * thread * sizeof(change_cuda_t))); pot_cuda_t *pot_aux; checkCUDA(cudaMalloc(&pot_aux, thread * sizeof(pot_cuda_t))); + checkCUDA(cudaMemset(pot_aux, 0, thread * sizeof(pot_cuda_t))); kernel<32><<>>(m_cuda_gpu, ig_cuda_gpu, p_cuda_gpu, rand_molec_struc_gpu, quasi_newton_par_max_steps, mutation_amplitude_float, states, seed, @@ -727,8 +1343,10 @@ __host__ void monte_carlo::operator()( // Device to Host memcpy of precalculated_byatom, copy back data to p_gpu p_m_data_cuda_t *p_data; checkCUDA(cudaMallocHost(&p_data, sizeof(p_m_data_cuda_t) * MAX_P_DATA_M_DATA_SIZE)); + memset(p_data, 0, sizeof(p_m_data_cuda_t) * MAX_P_DATA_M_DATA_SIZE); output_type_cuda_t *results; checkCUDA(cudaMallocHost(&results, thread * sizeof(output_type_cuda_t))); + memset(results, 0, thread * sizeof(output_type_cuda_t)); for (int l = 0; l < num_of_ligands; ++l) { // copy data to m_data on CPU, then to p_gpu[l] diff --git a/unidock/src/lib/monte_carlo.h b/unidock/src/lib/monte_carlo.h index feca0b4..8be77a2 100644 --- a/unidock/src/lib/monte_carlo.h +++ b/unidock/src/lib/monte_carlo.h @@ -63,6 +63,12 @@ struct monte_carlo { const igrid& ig, const vec& corner1, const vec& corner2, rng& generator, int verbosity, unsigned long long seed, std::vector >& bias_batch_list) const; + void mc_stream(std::vector& m, std::vector& out, + std::vector& p, triangular_matrix_cuda_t* m_data_list_gpu, + const igrid& ig, const vec& corner1, const vec& corner2, rng& generator, + int verbosity, unsigned long long seed, + std::vector >& bias_batch_list) const; + std::vector cuda_to_vina(output_type_cuda_t* results_p, int thread) const; }; diff --git a/unidock/src/lib/scoring_function.h b/unidock/src/lib/scoring_function.h index 71b9a58..77c119e 100644 --- a/unidock/src/lib/scoring_function.h +++ b/unidock/src/lib/scoring_function.h @@ -37,7 +37,11 @@ enum scoring_function_choice { SF_VINA, SF_AD42, SF_VINARDO }; class ScoringFunction { public: - ScoringFunction() {} + ScoringFunction() { + m_num_potentials = 0; + m_num_conf_independents = 0; + m_sf_choice = 0; + } ScoringFunction(const scoring_function_choice sf_choice, const flv& weights) { switch (sf_choice) { case SF_VINA: { @@ -81,7 +85,6 @@ class ScoringFunction { break; } default: { - std::cout << "INSIDE everything::everything() sfchoice = " << sf_choice << "\n"; VINA_CHECK(false); break; } @@ -90,7 +93,24 @@ class ScoringFunction { m_num_conf_independents = m_conf_independents.size(); m_weights = weights; }; - ~ScoringFunction() {} + void Destroy() + { + for (auto p : m_potentials) + { + delete p; + } + m_potentials.clear(); + m_num_potentials = 0; + for (auto p : m_conf_independents) + { + delete p; + } + m_conf_independents.clear(); + m_num_conf_independents = 0; + } + ~ScoringFunction() { + Destroy(); + } fl eval(atom& a, atom& b, fl r) const { // intentionally not checking for cutoff fl acc = 0; VINA_FOR(i, m_num_potentials) { acc += m_weights[i] * m_potentials[i]->eval(a, b, r); } diff --git a/unidock/src/lib/vina.cpp b/unidock/src/lib/vina.cpp index 878b323..173acb5 100644 --- a/unidock/src/lib/vina.cpp +++ b/unidock/src/lib/vina.cpp @@ -85,10 +85,10 @@ void Vina::set_receptor(const std::string& rigid_name, const std::string& flex_n // CONDITIONS 4, 5, 6, 7 (rigid_name and flex_name are empty strings per default) if (rigid_name.find("pdbqt") || flex_name.find("pdbqt")) { m_receptor - = parse_receptor_pdbqt(rigid_name, flex_name, m_scoring_function.get_atom_typing()); + = parse_receptor_pdbqt(rigid_name, flex_name, m_scoring_function->get_atom_typing()); } else if (rigid_name.find("pdb") && (!rigid_name.find("pdbqt"))) { m_receptor - = parse_receptor_pdb(rigid_name, flex_name, m_scoring_function.get_atom_typing()); + = parse_receptor_pdb(rigid_name, flex_name, m_scoring_function->get_atom_typing()); } m_model = m_receptor; @@ -106,7 +106,7 @@ void Vina::set_ligand_from_string(const std::string& ligand_string) { exit(EXIT_FAILURE); } - atom_type::t atom_typing = m_scoring_function.get_atom_typing(); + atom_type::t atom_typing = m_scoring_function->get_atom_typing(); if (!m_receptor_initialized) { // This situation will happen if we don't need a receptor and we are using affinity maps @@ -122,7 +122,7 @@ void Vina::set_ligand_from_string(const std::string& ligand_string) { m_model.append(parse_ligand_pdbqt_from_string(ligand_string, atom_typing)); // Because we precalculate ligand atoms interactions - precalculate_byatom precalculated_byatom(m_scoring_function, m_model); + precalculate_byatom precalculated_byatom(*m_scoring_function, m_model); // Check that all atom types are in the grid (if initialized) if (m_map_initialized) { @@ -149,7 +149,7 @@ void Vina::set_ligand_from_string(const std::vector& ligand_string) exit(EXIT_FAILURE); } - atom_type::t atom_typing = m_scoring_function.get_atom_typing(); + atom_type::t atom_typing = m_scoring_function->get_atom_typing(); if (!m_receptor_initialized) { // This situation will happen if we don't need a receptor and we are using affinity maps @@ -165,7 +165,7 @@ void Vina::set_ligand_from_string(const std::vector& ligand_string) m_model.append(parse_ligand_pdbqt_from_string(ligand_string[i], atom_typing)); // Because we precalculate ligand atoms interactions - precalculate_byatom precalculated_byatom(m_scoring_function, m_model); + precalculate_byatom precalculated_byatom(*m_scoring_function, m_model); // Check that all atom types are in the grid (if initialized) if (m_map_initialized) { @@ -193,7 +193,7 @@ void Vina::set_ligand_from_string_gpu(const std::vector& ligand_str exit(EXIT_FAILURE); } - atom_type::t atom_typing = m_scoring_function.get_atom_typing(); + atom_type::t atom_typing = m_scoring_function->get_atom_typing(); if (!m_receptor_initialized) { // This situation will happen if we don't need a receptor and we are using affinity maps @@ -210,7 +210,7 @@ void Vina::set_ligand_from_string_gpu(const std::vector& ligand_str for (int i = 0; i < ligand_string.size(); ++i) { m_model_gpu[i].append( parse_ligand_pdbqt_from_string_no_failure(ligand_string[i], atom_typing)); - m_precalculated_byatom_gpu[i].init_without_calculation(m_scoring_function, m_model_gpu[i]); + m_precalculated_byatom_gpu[i].init_without_calculation(*m_scoring_function, m_model_gpu[i]); } // calculate common rs data @@ -219,7 +219,7 @@ void Vina::set_ligand_from_string_gpu(const std::vector& ligand_str // Because we precalculate ligand atoms interactions, which should be done in parallel int precalculate_thread_num = ligand_string.size(); - precalculate_parallel(m_data_list_gpu, m_precalculated_byatom_gpu, m_scoring_function, + precalculate_parallel(m_data_list_gpu, m_precalculated_byatom_gpu, *m_scoring_function, m_model_gpu, common_rs, precalculate_thread_num); VINA_RANGE(i, 0, ligand_string.size()) { @@ -250,7 +250,7 @@ void Vina::set_ligand_from_object_gpu(const std::vector& ligands) { exit(EXIT_FAILURE); } - atom_type::t atom_typing = m_scoring_function.get_atom_typing(); + atom_type::t atom_typing = m_scoring_function->get_atom_typing(); if (!m_receptor_initialized) { // This situation will happen if we don't need a receptor and we are using affinity maps @@ -269,7 +269,7 @@ void Vina::set_ligand_from_object_gpu(const std::vector& ligands) { if (multi_bias) { m_model_gpu[i].bias_list = bias_batch_list[i]; } - m_precalculated_byatom_gpu[i].init_without_calculation(m_scoring_function, m_model_gpu[i]); + m_precalculated_byatom_gpu[i].init_without_calculation(*m_scoring_function, m_model_gpu[i]); } // calculate common rs data @@ -278,7 +278,7 @@ void Vina::set_ligand_from_object_gpu(const std::vector& ligands) { // Because we precalculate ligand atoms interactions, which should be done in parallel int precalculate_thread_num = ligands.size(); - precalculate_parallel(m_data_list_gpu, m_precalculated_byatom_gpu, m_scoring_function, + precalculate_parallel(m_data_list_gpu, m_precalculated_byatom_gpu, *m_scoring_function, m_model_gpu, common_rs, precalculate_thread_num); VINA_RANGE(i, 0, ligands.size()) { @@ -309,7 +309,7 @@ void Vina::set_ligand_from_object(const std::vector& ligands) { exit(EXIT_FAILURE); } - atom_type::t atom_typing = m_scoring_function.get_atom_typing(); + atom_type::t atom_typing = m_scoring_function->get_atom_typing(); if (!m_receptor_initialized) { // This situation will happen if we don't need a receptor and we are using affinity maps @@ -325,7 +325,7 @@ void Vina::set_ligand_from_object(const std::vector& ligands) { m_model.append(ligands[i]); // Because we precalculate ligand atoms interactions - precalculate_byatom precalculated_byatom(m_scoring_function, m_model); + precalculate_byatom precalculated_byatom(*m_scoring_function, m_model); // Check that all atom types are in the grid (if initialized) if (m_map_initialized) { @@ -453,9 +453,8 @@ void Vina::set_ad4_weights(double weight_ad4_vdw, double weight_ad4_hb, double w } void Vina::set_forcefield() { - ScoringFunction scoring_function(m_sf_choice, m_weights); // Store in Vina object - m_scoring_function = scoring_function; + m_scoring_function = std::make_shared(m_sf_choice, m_weights); } std::vector Vina::grid_dimensions_from_ligand(double buffer_size) { @@ -514,7 +513,7 @@ void Vina::compute_vina_maps(double center_x, double center_y, double center_z, vec center(center_x, center_y, center_z); const fl slope = 1e6; // FIXME: too large? used to be 100 szv atom_types; - atom_type::t atom_typing = m_scoring_function.get_atom_typing(); + atom_type::t atom_typing = m_scoring_function->get_atom_typing(); /* Atom types initialization If a ligand was defined before, we only use those present in the ligand @@ -523,7 +522,7 @@ void Vina::compute_vina_maps(double center_x, double center_y, double center_z, if (m_ligand_initialized) atom_types = m_model.get_movable_atom_types(atom_typing); else - atom_types = m_scoring_function.get_atom_types(); + atom_types = m_scoring_function->get_atom_types(); // Grid dimensions VINA_FOR_IN(i, gd) { @@ -540,7 +539,7 @@ void Vina::compute_vina_maps(double center_x, double center_y, double center_z, } // Initialize the scoring function - precalculate precalculated_sf(m_scoring_function); + precalculate precalculated_sf(*m_scoring_function); // Store it now in Vina object because of non_cache m_precalculated_sf = precalculated_sf; @@ -593,7 +592,7 @@ void Vina::load_maps(std::string maps) { // Check that all the affinity map are present for ligands/flex residues (if initialized // already) if (m_ligand_initialized) { - atom_type::t atom_typing = m_scoring_function.get_atom_typing(); + atom_type::t atom_typing = m_scoring_function->get_atom_typing(); szv atom_types = m_model.get_movable_atom_types(atom_typing); if (m_sf_choice == SF_VINA || m_sf_choice == SF_VINARDO) { @@ -615,12 +614,12 @@ void Vina::write_maps(const std::string& map_prefix, const std::string& gpf_file } szv atom_types; - atom_type::t atom_typing = m_scoring_function.get_atom_typing(); + atom_type::t atom_typing = m_scoring_function->get_atom_typing(); if (m_ligand_initialized) atom_types = m_model.get_movable_atom_types(atom_typing); else - atom_types = m_scoring_function.get_atom_types(); + atom_types = m_scoring_function->get_atom_types(); if (m_sf_choice == SF_VINA || m_sf_choice == SF_VINARDO) { doing("Writing Vina maps", m_verbosity, 0); @@ -1201,7 +1200,7 @@ std::vector Vina::score(double intramolecular_energy) { lig_intra = m_model.evali(m_precalculated_byatom, authentic_v); // [2] ligand_i -- ligand_i intra = flex_grids + intra_pairs + lig_intra; // Total - total = m_scoring_function.conf_independent( + total = m_scoring_function->conf_independent( m_model, inter + intra - intramolecular_energy); // we pass intermolecular energy from the best pose @@ -1220,7 +1219,7 @@ std::vector Vina::score(double intramolecular_energy) { lig_intra = m_model.evali(m_precalculated_byatom, authentic_v); // [2] ligand_i -- ligand_i intra = flex_grids + intra_pairs + lig_intra; // Torsion - conf_independent = m_scoring_function.conf_independent( + conf_independent = m_scoring_function->conf_independent( m_model, 0); // [3] we can pass e=0 because we do not modify the energy like in vina // Total total = inter + conf_independent; // (+ intra - intra) @@ -1282,7 +1281,7 @@ std::vector Vina::score_gpu(int i, double intramolecular_energy) { authentic_v); // [2] ligand_i -- ligand_i intra = flex_grids + intra_pairs + lig_intra; // Total - total = m_scoring_function.conf_independent( + total = m_scoring_function->conf_independent( m_model_gpu[i], inter + intra - intramolecular_energy); // we pass intermolecular energy from the best pose @@ -1303,7 +1302,7 @@ std::vector Vina::score_gpu(int i, double intramolecular_energy) { authentic_v); // [2] ligand_i -- ligand_i intra = flex_grids + intra_pairs + lig_intra; // Torsion - conf_independent = m_scoring_function.conf_independent( + conf_independent = m_scoring_function->conf_independent( m_model_gpu[i], 0); // [3] we can pass e=0 because we do not modify the energy like in vina // Total @@ -1656,7 +1655,7 @@ void Vina::global_search(const int exhaustiveness, const int n_poses, const doub void Vina::global_search_gpu(const int exhaustiveness, const int n_poses, const double min_rmsd, const int max_evals, const int max_step, int num_of_ligands, unsigned long long seed, const int refine_step, - const bool local_only) { + const bool local_only, const bool create_new_stream) { // Vina search (Monte-carlo and local optimization) // Check if ff, box and ligand were initialized if (!m_ligand_initialized) { @@ -1717,8 +1716,16 @@ void Vina::global_search_gpu(const int exhaustiveness, const int n_poses, const doing(sstm.str(), m_verbosity, 0); auto start = std::chrono::system_clock::now(); if (m_sf_choice == SF_VINA || m_sf_choice == SF_VINARDO) { - mc(m_model_gpu, poses_gpu, m_precalculated_byatom_gpu, m_data_list_gpu, m_grid, - m_grid.corner1(), m_grid.corner2(), generator, m_verbosity, seed, bias_batch_list); + if (create_new_stream) + { + mc.mc_stream(m_model_gpu, poses_gpu, m_precalculated_byatom_gpu, m_data_list_gpu, m_grid, + m_grid.corner1(), m_grid.corner2(), generator, m_verbosity, seed, bias_batch_list); + } + else + { + mc(m_model_gpu, poses_gpu, m_precalculated_byatom_gpu, m_data_list_gpu, m_grid, + m_grid.corner1(), m_grid.corner2(), generator, m_verbosity, seed, bias_batch_list); + } } else { mc(m_model_gpu, poses_gpu, m_precalculated_byatom_gpu, m_data_list_gpu, m_ad4grid, m_ad4grid.corner1(), m_ad4grid.corner2(), generator, m_verbosity, seed, bias_batch_list); @@ -1864,7 +1871,6 @@ Vina::~Vina() { // scoring function scoring_function_choice m_sf_choice; flv m_weights; - ScoringFunction m_scoring_function; precalculate_byatom m_precalculated_byatom; precalculate m_precalculated_sf; // maps diff --git a/unidock/src/lib/vina.h b/unidock/src/lib/vina.h index cab13d3..2f24f8e 100644 --- a/unidock/src/lib/vina.h +++ b/unidock/src/lib/vina.h @@ -55,6 +55,7 @@ #include "scoring_function.h" #include "precalculate.h" #include "bias.h" +#include #ifdef DEBUG # define DEBUG_PRINTF printf @@ -75,7 +76,6 @@ class Vina { m_no_refine = no_refine; m_progress_callback = progress_callback; gpu = false; - // Look for the number of cpu if (cpu <= 0) { unsigned num_cpus = boost::thread::hardware_concurrency(); @@ -149,7 +149,8 @@ class Vina { const double min_rmsd = 1.0, const int max_evals = 0, const int max_step = 0, int num_of_ligands = 1, unsigned long long seed = 181129, const int refine_step = 5, - const bool local_only = false); + const bool local_only = false, + const bool create_new_stream = false); std::string get_poses(int how_many = 9, double energy_range = 3.0); std::string get_sdf_poses(int how_many = 9, double energy_range = 3.0); std::string get_poses_gpu(int ligand_id, int how_many = 9, double energy_range = 3.0); @@ -189,7 +190,7 @@ class Vina { // scoring function scoring_function_choice m_sf_choice; flv m_weights; - ScoringFunction m_scoring_function; + std::shared_ptr m_scoring_function; precalculate_byatom m_precalculated_byatom; precalculate m_precalculated_sf; // gpu scoring function precalculated diff --git a/unidock/src/main/complex_property.h b/unidock/src/main/complex_property.h new file mode 100644 index 0000000..2eab6c2 --- /dev/null +++ b/unidock/src/main/complex_property.h @@ -0,0 +1,111 @@ +/* + + Copyright (c) 2006-2010, The Scripps Research Institute + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Author: Dr. Oleg Trott , + The Olson Lab, + The Scripps Research Institute + +*/ +#pragma once + +#include +#include +#include // ligand paths +#include +#include +#include "vina.h" +#include "utils.h" +#include "scoring_function.h" + +#include +#include +#include +#include + +// Holds properties of each ligand complex + +struct complex_property +{ + double center_x = 0; + double center_y = 0; + double center_z = 0; + double box_x = 0; + double box_y = 0; + double box_z = 0; + std::string protein_name; + std::string ligand_name; + complex_property(double x, double y, double z, + double box_x, double box_y, double box_z, + std::string protein_name, std::string ligand_name): + center_x(x), + center_y(y), + center_z(z), + box_x(box_x), + box_y(box_y), + box_z(box_z), + protein_name(protein_name), + ligand_name(ligand_name){}; + complex_property(){}; +}; + +// Holds properties of all ligand complexs + +struct complex_property_holder +{ + int max_count; + complex_property* m_properties; + complex_property_holder(int N): + max_count(N), + m_properties(nullptr) + { + m_properties = new complex_property[N]; + } + ~complex_property_holder() + { + delete [] m_properties; + m_properties = nullptr; + } + complex_property* get_end() + { + return &m_properties[max_count]; + } + + struct complex_property_iterator + { + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = complex_property; + using pointer = complex_property*; + using reference = complex_property&; + + complex_property_iterator(pointer ptr) : m_ptr(ptr) {} + reference operator*() const { return *m_ptr; } + pointer operator->() { return m_ptr; } + + // Prefix increment + complex_property_iterator& operator++() { m_ptr++; return *this; } + + // Postfix increment + complex_property_iterator operator++(int) { complex_property_iterator tmp = *this; ++(*this); return tmp; } + + friend bool operator== (const complex_property_iterator& a, const complex_property_iterator& b) { return a.m_ptr == b.m_ptr; }; + friend bool operator!= (const complex_property_iterator& a, const complex_property_iterator& b) { return a.m_ptr != b.m_ptr; }; + private: + pointer m_ptr; + }; + complex_property_iterator begin() { return complex_property_iterator(&m_properties[0]); } + complex_property_iterator end() { return complex_property_iterator(get_end()); } +}; \ No newline at end of file diff --git a/unidock/src/main/main.cpp b/unidock/src/main/main.cpp index df45d48..f19a2ab 100644 --- a/unidock/src/main/main.cpp +++ b/unidock/src/main/main.cpp @@ -30,6 +30,7 @@ #include #include +#include "simulation_container.h" struct usage_error : public std::runtime_error { usage_error(const std::string& message) : std::runtime_error(message) {} @@ -163,6 +164,7 @@ bug reporting, license agreements, and more information. \n"; std::string out_maps; std::vector ligand_names; std::string ligand_index; // path to a text file, containing paths to ligands files + int paired_batch_size = 0; std::vector batch_ligand_names; std::vector gpu_batch_ligand_names; // std::vector gpu_batch_ligand_names_sdf; @@ -242,8 +244,10 @@ bug reporting, license agreements, and more information. \n"; "flex", value(&flex_name), "flexible side chains, if any (PDBQT or PDB)")( "ligand", value >(&ligand_names)->multitoken(), "ligand (PDBQT)")("ligand_index", value(&ligand_index), - "file containing paths to ligands (PDBQT or SDF")( - "batch", value >(&batch_ligand_names)->multitoken(), + "file containing paths to ligands (PDBQT or SDF") + ("paired_batch_size",value(&paired_batch_size), + "If > 0, uses batching for one-ligand-one-protein docking, with json config in ligand_index following paired_batching.schema.json") + ("batch", value >(&batch_ligand_names)->multitoken(), "batch ligand (PDBQT)")( "gpu_batch", value >(&gpu_batch_ligand_names)->multitoken(), "gpu batch ligand (PDBQT or SDF)") @@ -474,6 +478,45 @@ bug reporting, license agreements, and more information. \n"; max_step = 40; } + // Use multiple workers for 1:1 docking, and exit + if (paired_batch_size > 0) + { + if (0 == vm.count("ligand_index")){ + std::cout << "ERROR: Paired batch size set, but no config json specified via --ligand_index\n"; + return -1; + } + if (0 == vm.count("size_x") || 0 == vm.count("size_y") || 0 == vm.count("size_z")){ + std::cout << "WARN: Paired batch size set, but size_x/size_y/size_z not specified, using 25\n"; + size_x = size_y = size_z = 25; + } + if (0 == vm.count("dir")) { + std::cout << "ERROR: Need to specify an output directory for batch mode.\n"; + return -1; + } + + std::cout << "Entering paired batch mode\n"; + + std::vector box_size = {size_x, size_y, size_z}; + simulation_container sc(seed, num_modes, refine_step, out_dir, + ligand_index, paired_batch_size, box_size, local_only, max_step, verbosity, exhaustiveness); + + int res = sc.prime(); + if (res <= 0) + { + std::cout << "Error priming [" << res << "]\n"; + return res; + } + + auto start = std::chrono::steady_clock::now(); + + int err = sc.launch(); + + auto end = std::chrono::steady_clock::now(); + auto milliseconds = std::chrono::duration_cast(end - start).count(); + std::cout << "Completed Batched Operations in " << milliseconds << " mS with err = " << err << "\n"; + return err; + } + if (sf_name.compare("vina") == 0 || sf_name.compare("vinardo") == 0) { if (!vm.count("receptor") && !vm.count("maps")) { std::cerr << desc_simple @@ -631,7 +674,7 @@ bug reporting, license agreements, and more information. \n"; std::vector ligands; VINA_FOR_IN(i, ligand_names) { ligands.emplace_back(parse_ligand_from_file_no_failure( - ligand_names[i], v.m_scoring_function.get_atom_typing(), keep_H)); + ligand_names[i], v.m_scoring_function->get_atom_typing(), keep_H)); } v.set_ligand_from_object(ligands); @@ -700,7 +743,7 @@ bug reporting, license agreements, and more information. \n"; VINA_FOR_IN(i, ligand_names) { std::vector ligands; ligands.emplace_back(parse_ligand_from_file_no_failure( - ligand_names[i], v.m_scoring_function.get_atom_typing(), keep_H)); + ligand_names[i], v.m_scoring_function->get_atom_typing(), keep_H)); Vina v1(v); v1.set_ligand_from_object(ligands); std::vector energies; @@ -747,7 +790,7 @@ bug reporting, license agreements, and more information. \n"; ++ligand_count) { auto& ligand = ligand_names[ligand_count]; auto l = parse_ligand_from_file_no_failure( - ligand, v.m_scoring_function.get_atom_typing(), keep_H); + ligand, v.m_scoring_function->get_atom_typing(), keep_H); #pragma omp critical all_ligands.emplace_back(std::make_pair(ligand, l)); } diff --git a/unidock/src/main/paired_batching.schema.json b/unidock/src/main/paired_batching.schema.json new file mode 100644 index 0000000..e66054c --- /dev/null +++ b/unidock/src/main/paired_batching.schema.json @@ -0,0 +1,28 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "batch": { + "type": "object", + "properties": { + "protein": { + "type": "string" + }, + "ligand": { + "type": "string" + }, + "ligand_config": { + "type": "string" + } + }, + "required": [ + "protein", + "ligand", + "ligand_config" + ] + } + }, + "required": [ + "batch" + ] + } \ No newline at end of file diff --git a/unidock/src/main/simulation_container.h b/unidock/src/main/simulation_container.h new file mode 100644 index 0000000..00bad90 --- /dev/null +++ b/unidock/src/main/simulation_container.h @@ -0,0 +1,517 @@ +/* + + Copyright (c) 2006-2010, The Scripps Research Institute + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Author: Dr. Oleg Trott , + The Olson Lab, + The Scripps Research Institute + +*/ +#pragma once + +#include +#include +#include // ligand paths +#include +#include +#include "vina.h" +#include "utils.h" +#include "scoring_function.h" + +#include +#include +#include +#include +#include + +#include "vina_cuda_worker.h" + +#include +#include + + +// Information about current simulation +struct simulation_container +{ + std::string m_work_dir; + std::string m_input_path; + std::string m_out_phrase; + int m_batch_size; + std::vector m_box_size; + bool m_local_only; + int m_max_limits = 5000; + int m_max_global_steps; + int m_verbosity; + int m_exhaustiveness = 512; + bool m_isGPU; + int m_seed = 5; + int m_num_modes = 9; + int m_refine_steps = 3; + + std::vector m_complex_names; + std::string m_config_json_path; + std::vector m_ligand_paths; + std::vector m_ligand_config_paths; + std::vector m_protein_paths; + complex_property_holder * m_ptr_complex_property_holder; + int m_successful_property_count; + + simulation_container( + int seed, + int num_modes, + int refine_steps, + std::string out_dir, + std::string config_json_path, + int paired_batch_size, + std::vector box_size_xyz, + int local_only, + int max_step, + int verbosity, + int exh): + + m_seed(seed), + m_num_modes(num_modes), + m_refine_steps(refine_steps), + m_work_dir(out_dir), + m_config_json_path(config_json_path), + m_batch_size(paired_batch_size), + m_box_size(box_size_xyz), + m_local_only(local_only), + m_max_global_steps(max_step), + m_verbosity(verbosity), + m_exhaustiveness(exh), + m_isGPU(true), + m_successful_property_count (0) + { + //m_out_phrase = util_random_string(5); + } + ~simulation_container() + { + if (m_ptr_complex_property_holder) + { + delete m_ptr_complex_property_holder; + m_ptr_complex_property_holder = 0; + } + }; + + + std::string util_random_string(std::size_t length) + { + const std::string CHARACTERS = "iamafunnydogthatlaughsindeterministically"; + + std::random_device random_device; + std::mt19937 generator(random_device()); + std::uniform_int_distribution<> distribution(0, CHARACTERS.size() - 1); + + std::string rstring; + + for (std::size_t i = 0; i < length; ++i) + { + rstring += CHARACTERS[distribution(generator)]; + } + + return rstring; + } + + void add_rank_combinations_from_json(std::string filename) + { + int curr_entry_size = 0; + boost::property_tree::ptree tree_root; + boost::property_tree::read_json(filename, tree_root); + + using boost::property_tree::ptree; + ptree::const_iterator end = tree_root.end(); + + for (ptree::const_iterator it = tree_root.begin(); it != end; ++it) { + + m_complex_names.emplace_back(it->first); + + for (ptree::const_iterator it_entries = it->second.begin(); it_entries != it->second.end(); ++it_entries) + { + if (it_entries->first == "ligand") + { + m_ligand_paths.emplace_back(it_entries->second.get_value()); + } + if (it_entries->first == "protein") + { + m_protein_paths.emplace_back(it_entries->second.get_value()); + } + if (it_entries->first == "ligand_config") + { + m_ligand_config_paths.emplace_back(it_entries->second.get_value()); + } + } + curr_entry_size ++; + if (curr_entry_size >= m_max_limits) + { + std::cout << "Limiting number of ranked samples to max limits " << m_max_limits << "\n"; + break; + } + } + } + + std::string trim_eol(std::string line) + { + std::string newString; + + for (char ch : line) + { + if (ch == '\n' || ch == '\r') + continue; + newString += ch; + } + return newString; + } + + int fill_config_from_json(complex_property & cp, std::string path, std::string protein_name, std::string ligand_name) + { + boost::property_tree::ptree tree_root; + boost::property_tree::read_json(path, tree_root); + + // Default to provided box, update if in config file + cp.box_x = m_box_size[0]; + cp.box_y = m_box_size[1]; + cp.box_z = m_box_size[2]; + + try + { + cp.center_x = tree_root.get("center_x"); + cp.center_y = tree_root.get("center_y"); + cp.center_z = tree_root.get("center_z"); + cp.box_x = tree_root.get("size_x"); + cp.box_y = tree_root.get("size_y"); + cp.box_z = tree_root.get("size_z"); + } + catch(...) + { + std::cout << "Error parsing config json " << path << "\n"; + return -1; + } + + cp.protein_name = protein_name; + cp.ligand_name = ligand_name; + + return 0; + } + + int_least32_t fill_config(complex_property & cp, std::string path, std::string protein_name, std::string ligand_name) + { + // Default to provided box, update if in config file + cp.box_x = m_box_size[0]; + cp.box_y = m_box_size[1]; + cp.box_z = m_box_size[2]; + + if (path.empty()) + { + return -1; + } + else + { + std::ifstream ifs(path); + std::string line; + double vals[3]; + int id = 0; + while (std::getline(ifs, line)) + { + std::string trimmed(trim_eol(line)); + int pos = trimmed.find('='); + vals[id] = std::stod(trimmed.substr(pos+1, std::string::npos)); + id ++; + } + + cp.center_x = vals[0]; + cp.center_y = vals[1]; + cp.center_z = vals[2]; + + if (id > 3) + { + cp.box_x = vals[3]; + cp.box_y = vals[4]; + cp.box_z = vals[5]; + } + ifs.close(); + } + + cp.protein_name = protein_name; + cp.ligand_name = ligand_name; + + return 0; + } + + void add_rank_combinations(std::string effective_path) + { + int curr_entry_size = 0; + //search for complex_rank.pdbqt for ranked ligands + for (boost::filesystem::directory_entry& entry : boost::filesystem::recursive_directory_iterator(effective_path)) + { + int pos_rank = entry.path().string().find("_rank"); + int pos_config = entry.path().stem().string().find("_config"); + int pos_pdbqt = entry.path().extension().string().find(".pdbqt"); + + if (pos_rank != std::string::npos && + pos_pdbqt != std::string::npos && + pos_config == std::string::npos) + { + int pos_complex = entry.path().stem().string().find("_rank"); + std::string complex = entry.path().stem().string().substr(0, pos_complex); + m_complex_names.emplace_back(complex); + m_ligand_paths.emplace_back(entry.path()); + m_protein_paths.emplace_back(entry.path().parent_path() / boost::filesystem::path(complex + "_protein.pdbqt")); + m_ligand_config_paths.emplace_back(entry.path().parent_path() / boost::filesystem::path(entry.path().stem().string() + "_config.txt")); + + curr_entry_size ++; + if (curr_entry_size >= m_max_limits) + { + std::cout << "Limiting number of ranked samples to max limits " << m_max_limits << "\n"; + break; + } + } + } + } + void add_combinations(std::string effective_path) + { + int curr_entry_size = 0; + for (boost::filesystem::directory_entry& entry : boost::filesystem::recursive_directory_iterator(effective_path)) + { + int pos = entry.path().string().find("_protein.pdbqt"); + + if (pos != std::string::npos) + { + int pos_complex = entry.path().stem().string().find("_protein"); + std::string complex = entry.path().stem().string().substr(0, pos_complex); + + m_complex_names.emplace_back(complex); + m_protein_paths.emplace_back(entry.path()); + m_ligand_paths.emplace_back(entry.path().parent_path() / boost::filesystem::path(complex + "_ligand.pdbqt")); + m_ligand_config_paths.emplace_back(entry.path().parent_path() / boost::filesystem::path(complex + "_ligand_config.txt")); + + curr_entry_size ++; + if (curr_entry_size >= m_max_limits) + { + std::cout << "Limiting number of samples to max limits " << m_max_limits << "\n"; + break; + } + } + } + } + + void add_combinations(std::vector ligand_names) + { + int curr_entry_size = 0; + for (std::string& path : ligand_names) + { + int pos = path.find("_ligand.pdbqt"); + + if (pos != std::string::npos) + { + int pos_complex = path.find("_ligand"); + std::string complex = path.substr(0, pos_complex); + + m_complex_names.emplace_back(complex); + m_protein_paths.emplace_back(complex + "_protein.pdbqt"); + m_ligand_paths.emplace_back(path); + m_ligand_config_paths.emplace_back(complex + "_ligand_config.txt"); + + curr_entry_size ++; + if (curr_entry_size >= m_max_limits) + { + std::cout << "Limiting number of samples to max limits " << m_max_limits << "\n"; + break; + } + } + } + } + + int prime() + { + if (m_config_json_path.empty()) + { + std::cout << "Found nothing to prime.\n"; + return -1; + } + else + { + try + { + add_rank_combinations_from_json(m_config_json_path); + } + catch(const std::exception& e) + { + std::cerr << e.what() << '\n'; + std::cout << e.what() << '\n'; + return -1; + } + } + + std::cout << "Found " << m_complex_names.size() << " to be primed.\n"; + + m_ptr_complex_property_holder = new complex_property_holder(m_complex_names.size()); + + for (int id = 0;id < m_complex_names.size();id ++) + { + int success_filled = -1; + + complex_property& cp = m_ptr_complex_property_holder->m_properties[m_successful_property_count]; + + if (boost::filesystem::extension(m_ligand_config_paths[id].path().string()) == ".json") + { + try + { + success_filled = fill_config_from_json(cp, m_ligand_config_paths[id].path().string(), m_protein_paths[id].path().string(), m_ligand_paths[id].path().string()); + } + catch(const std::exception& e) + { + std::cout << "Error reading config json " << e.what() << "\n"; + success_filled = -1; + } + } + else + { + success_filled = fill_config(cp, m_ligand_config_paths[id].path().string(), m_protein_paths[id].path().string(), m_ligand_paths[id].path().string()); + } + + if (0 == success_filled) + { + m_successful_property_count ++; + } + } + std::cout << "Filled " << m_successful_property_count << " properties successfully.\n"; + return m_successful_property_count; + } + // Launch simulations + int launch() + { + if (0 == m_successful_property_count) + { + std::cout << "m_successful_property_count = 0\n"; + return -1; + } + int batches = m_successful_property_count/m_batch_size; + std::cout << "Parameters: exh = " << m_exhaustiveness << \ + ", box[0] = " << m_box_size[0] << \ + ", max_eval_steps global = " << m_max_global_steps << \ + ", num_modes = " << m_num_modes << \ + ", refine_steps = " << m_refine_steps << "\n"; + + std::cout << "To do [" << batches << "] batches\n"; + std::cout << "Batched output to " << m_work_dir << "\n"; + + if (!boost::filesystem::exists(m_work_dir)) + { + std::cout << "Creating work dir " << m_work_dir << "\n"; + boost::filesystem::create_directory(m_work_dir); + } + + + std::vector cp; + int total_err_count = 0; + for (int i = 0;i < batches;i ++) + { + for (int curr = 0;curr < m_batch_size;curr ++) + { + int index = i*m_batch_size + curr; + cp.emplace_back(m_ptr_complex_property_holder->m_properties[index]); + std::cout << "Processing " << m_ptr_complex_property_holder->m_properties[index].ligand_name << "\n"; + } + // run + int err_count = batch_dock_with_worker(cp, m_local_only, m_work_dir, m_input_path, m_out_phrase); + std::cout << "Batch [" << i+1 << "/" << batches << "] completed. " << err_count << " errors.\n"; + total_err_count += err_count; + cp.clear(); + } + // Remaining if any + int remaining = m_complex_names.size() - batches * m_batch_size; + if (remaining > 0) + { + for (int i = 0;i < remaining;i ++) + { + int index = i + batches * m_batch_size; + cp.emplace_back(m_ptr_complex_property_holder->m_properties[index]); + } + int err_count = batch_dock_with_worker(cp, m_local_only, m_work_dir, m_input_path, m_out_phrase); + total_err_count += err_count; + cp.clear(); + } + std::cout << "Remaining [" << remaining << "/" << m_complex_names.size() << "] completed.\n" << total_err_count << " Total errors\n"; + + return total_err_count; + }; + + struct err_counter + { + std::atomic err_count; + void update() + { + err_count ++; + } + int get() + { + return err_count; + } + void clear() + { + err_count = 0; + } + }; + err_counter counter; + +// Launches a batch of vcw workers in separate threads +// to perform 1:1 docking. +// Each launch uses CUDA stream for concurrent operation of the batches + + int batch_dock_with_worker( + std::vector props, + bool local_only, + std::string workdir, + std::string input_dir, + std::string out_phrase) + { + std::vector worker_threads; + + counter.clear(); + + for (int i = 0;i < props.size();i ++) + { + worker_threads.emplace_back(std::thread( + [=]() + { + vina_cuda_worker vcw(m_seed, m_num_modes, m_refine_steps, props[i].center_x, props[i].center_y, + props[i].center_z, props[i].protein_name,props[i].ligand_name, + local_only, std::vector{props[i].box_x, props[i].box_y, props[i].box_z}, m_max_global_steps, m_verbosity, + m_exhaustiveness, workdir, input_dir, out_phrase); + try + { + int ret = vcw.launch(); + if (ret) + { + counter.update(); + } + } + catch(const std::exception& e) + { + std::cerr << "Exception processing " << props[i].ligand_name << ", " << e.what() << "\n"; + counter.update(); + } + } + ) + ); + } + for (int i = 0;i < props.size();i ++) + { + worker_threads[i].join(); + } + return counter.get(); + } +}; // simulation_container diff --git a/unidock/src/main/vina_cuda_worker.h b/unidock/src/main/vina_cuda_worker.h new file mode 100644 index 0000000..d2151d7 --- /dev/null +++ b/unidock/src/main/vina_cuda_worker.h @@ -0,0 +1,191 @@ +/* + + Copyright (c) 2006-2010, The Scripps Research Institute + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Author: Dr. Oleg Trott , + The Olson Lab, + The Scripps Research Institute + +*/ +#pragma once + +#include +#include +#include // ligand paths +#include +#include + +//#define DEBUG + +#include "vina.h" +#include "utils.h" +#include "scoring_function.h" + +#include +#include +#include +#include + +#include "complex_property.h" +// Use vina sf, and accelerate operations with CUDA streams + +class vina_cuda_worker : public Vina +{ + int exhaustiveness = 512; + int num_modes = 1; + int min_rmsd = 0; + int max_evals = 0; + int max_step = 60; + int seed = 5; + int refine_step = 3; + bool local_only = false; + double energy_range = 3.0; + bool keep_H = true; + std::string sf_name = "vina"; + int cpu = 0; + bool no_refine = false; + double size_x = 25; + double size_y = 25; + double size_z = 25; + double grid_spacing = 0.375; + bool force_even_voxels = false; + // vina weights + double weight_gauss1 = -0.035579; + double weight_gauss2 = -0.005156; + double weight_repulsion = 0.840245; + double weight_hydrophobic = -0.035069; + double weight_hydrogen = -0.587439; + double weight_rot = 0.05846; + // macrocycle closure + double weight_glue = 50.000000; // linear attraction + std::vector gpu_out_name; + std::string workdir; + std::string input_dir; + std::string out_dir; + std::vector batch_ligands; + double center_x; + double center_y; + double center_z; + std::string protein_name; + std::string ligand_name; + void init(std::string out_phrase) + { + out_dir = workdir + "/" + out_phrase; + if (!boost::filesystem::exists(out_dir)) + { + boost::filesystem::create_directory(out_dir); + } + m_seed = seed; + } +public: + vina_cuda_worker( + int seed, + int num_modes, + int refine_steps, + double center_x, + double center_y, + double center_z, + std::string protein_name, + std::string ligand_name, + bool local_only, + std::vector box_size_xyz, + int max_step, + int verbosity, + int exh, + std::string workdir, + std::string input_dir, + std::string out_phrase): + + seed(seed), + num_modes(num_modes), + refine_step(refine_steps), + workdir(workdir), + input_dir(input_dir), + center_x(center_x), + center_y(center_y), + center_z(center_z), + size_x(box_size_xyz[0]), + size_y(box_size_xyz[1]), + size_z(box_size_xyz[2]), + max_step(max_step), + exhaustiveness(exh), + protein_name(protein_name), + ligand_name(ligand_name), + local_only(local_only), + out_dir(out_phrase), + Vina{"vina", 0, seed, verbosity, false, NULL} + { + init(out_phrase); + } + + ~vina_cuda_worker() + { + + } + + + // Performs CUDA Stream based docking of 1 ligand and 1 protein + + int launch() + { + multi_bias = false; + bias_batch_list.clear(); + + set_vina_weights(weight_gauss1, weight_gauss2, weight_repulsion, weight_hydrophobic, + weight_hydrogen, weight_glue, weight_rot); + std::string flex; + std::string rigid(protein_name); + + if (! boost::filesystem::exists( ligand_name ) ) + { + std::cout << "Input ligand file does not exist (" << ligand_name << ")\n"; + return -1; + } + if (! boost::filesystem::exists( rigid ) ) + { + std::cout << "Input (rigid) protein file does not exist (" << rigid << ")\n"; + return -1; + } + + set_receptor(rigid, flex); + + enable_gpu(); + compute_vina_maps(center_x, center_y, center_z, size_x, size_y, size_z, + grid_spacing, force_even_voxels); + + auto parsed_ligand = parse_ligand_from_file_no_failure( + ligand_name, m_scoring_function->get_atom_typing(), keep_H); + batch_ligands.emplace_back(parsed_ligand); + + set_ligand_from_object_gpu(batch_ligands); + + bool create_new_stream = true; + global_search_gpu( exhaustiveness, num_modes, min_rmsd, max_evals, max_step, + 1, (unsigned long long)seed, refine_step, + local_only, create_new_stream); + + std::vector gpu_out_name; + gpu_out_name.push_back( + default_output(get_filename(ligand_name), out_dir)); + write_poses_gpu(gpu_out_name, num_modes, energy_range); + + return 0; + } + + // Protectors + vina_cuda_worker (const vina_cuda_worker&); + vina_cuda_worker& operator=(const vina_cuda_worker&); +}; + diff --git a/unidock/src/rocm/main.cu.hip b/unidock/src/rocm/main.cu.hip index dbf6464..60482fa 100644 --- a/unidock/src/rocm/main.cu.hip +++ b/unidock/src/rocm/main.cu.hip @@ -606,7 +606,7 @@ Thank you!\n"; { auto& ligand=ligand_names[ligand_count]; auto l = parse_ligand_pdbqt_from_file_no_failure( - ligand, v.m_scoring_function.get_atom_typing()); + ligand, v.m_scoring_function->get_atom_typing()); #pragma omp critical all_ligands.emplace_back(std::make_pair(ligand,l)); }