Skip to content

Commit

Permalink
refactor: refactored opencl algorithms in order to support non-gpu ab…
Browse files Browse the repository at this point in the history
…le systems
  • Loading branch information
paolotron committed Mar 1, 2022
1 parent 041ae5d commit 8583e95
Show file tree
Hide file tree
Showing 19 changed files with 406 additions and 292 deletions.
21 changes: 18 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,30 @@ set(CMAKE_MODULE_PATH cmake_module)
option(BUILD_EXAMPLES "Build the examples showing how to use baylib" OFF)

# change this option to compile tests
option(BUILD_TESTS "Build tests for baylib" ON)
option(BUILD_TESTS "Build tests for baylib" OFF)

#CUDA
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
add_compile_definitions(CUDA_CMP_FOUND)
add_compile_definitions(BAYLIB_CUDA)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(CUDA_ARCHITECTURES OFF)
set(CMAKE_CUDA_ARCHITECTURES OFF)
endif()

#openCL
if(NOT TARGET OpenCL::OpenCL)
set(CMAKE_PREFIX_PATH "${OPENCL_ROOT}")
find_package(OpenCL)
if(OpenCL_FOUND)
set(BAYLIB_OPENCL 1)
add_compile_definitions(BAYLIB_OPENCL)
set_target_properties(OpenCL::OpenCL PROPERTIES INTERFACE_COMPILE_DEFINITIONS CL_TARGET_OPENCL_VERSION=220)
add_compile_definitions(BAYLIB_OPENCL)
else()
set(BAYLIB_OPENCL 0)
endif()
endif()


Expand Down
34 changes: 17 additions & 17 deletions baylib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,6 @@ if(NOT TARGET Boost::boost)
endif()

set(threading=multi)

#openCL
if(NOT TARGET OpenCL::OpenCL)
set(CMAKE_PREFIX_PATH "${OPENCL_ROOT}")
find_package(OpenCL REQUIRED)
set_target_properties(OpenCL::OpenCL PROPERTIES INTERFACE_COMPILE_DEFINITIONS CL_TARGET_OPENCL_VERSION=220)
endif()
find_package(Threads REQUIRED)
find_package(TBB REQUIRED)

Expand All @@ -36,9 +29,7 @@ set (src
inference/abstract_inference_algorithm.hpp
inference/gibbs_sampling.hpp
inference/likelihood_weighting.hpp
inference/logic_sampling.hpp
inference/rejection_sampling.hpp
inference/adaptive_importance_sampling.hpp
network/bayesian_net.hpp
network/bayesian_utils.hpp
network/random_variable.hpp
Expand All @@ -55,19 +46,27 @@ set (src
baylib_assert.h
baylib_concepts.hpp
)
set(src_opencl
inference/opencl/logic_sampling_opencl.hpp
inference/opencl/adaptive_importance_sampling_opencl.hpp
inference/opencl/vectorized_inference_opencl.hpp
)
set (src_cuda
inference/cuda/samplers_cuda.cuh
inference/cuda/samplers_cuda.cu
inference/logic_sampling_cuda.hpp
inference/likelihood_weighting_cuda.hpp
tools/gpu/cuda_utils.cuh
tools/gpu/cuda_utils.cu
tools/gpu/cuda_graph_adapter.cuh
inference/cuda/samplers_cuda.cuh
inference/cuda/samplers_cuda.cu
inference/cuda/logic_sampling_cuda.hpp
inference/cuda/likelihood_weighting_cuda.hpp
tools/gpu/cuda_utils.cuh
tools/gpu/cuda_utils.cu
tools/gpu/cuda_graph_adapter.cuh
)

if(CMAKE_CUDA_COMPILER)
list(APPEND src ${src_cuda})
endif()
if(BAYLIB_OPENCL)
list(APPEND src ${src_opencl})
endif()

set (BAYLIB_INCLUDE_LIBS ../baylib)

Expand All @@ -82,7 +81,8 @@ if(CMAKE_CUDA_COMPILER)
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
POSITION_INDEPENDENT_CODE ON
LINKER_LANGUAGE CXX)
LINKER_LANGUAGE CXX
)
endif()

target_include_directories(baylib
Expand Down
173 changes: 2 additions & 171 deletions baylib/inference/abstract_inference_algorithm.hpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
#ifndef BAYLIB_ABSTRACT_INFERENCE_ALGORITHM_HPP
#define BAYLIB_ABSTRACT_INFERENCE_ALGORITHM_HPP

#define CL_TARGET_OPENCL_VERSION 220


#include <baylib/network/bayesian_utils.hpp>
#include <baylib/probability/marginal_distribution.hpp>
#include <baylib/tools/random/random_generator.hpp>
#include <baylib/tools/gpu/gpu_utils.hpp>
#include <boost/compute/core.hpp>
#include <boost/compute.hpp>
#include <boost/compute/device.hpp>
#include <future>
#include <baylib/baylib_concepts.hpp>

Expand Down Expand Up @@ -65,9 +61,9 @@ namespace baylib {

void set_seed(unsigned int _seed) { seed = _seed; }

protected:
const network_type & bn;
unsigned long nsamples;
protected:
unsigned int seed;
};

Expand Down Expand Up @@ -154,171 +150,6 @@ namespace baylib {
unsigned int nthreads;
};


namespace compute = boost::compute;
using boost::compute::lambda::_1;
using boost::compute::lambda::_2;
/**
* This class models an approximate inference algorithm
* vectorized with a GPGPU approach.
* the method simulate_node samples a node given the results of
* previous simulations of its parents nodes
* @tparam Network_ : the type of bayesian network
*/
template < BNetDerived Network_ >
class vectorized_inference_algorithm : public inference_algorithm<Network_>
{
public:
using typename inference_algorithm<Network_>::network_type;
using typename inference_algorithm<Network_>::probability_type;
using inference_algorithm<Network_>::bn;

vectorized_inference_algorithm(
const network_type & bn,
ulong n_samples,
size_t memory,
uint seed = 0,
const compute::device &device = compute::system::default_device()
)
: inference_algorithm<Network_>(bn, n_samples, seed)
, memory(memory)
, device(device)
, context(device)
, queue(context, device)
, rand(queue, seed)
{}

using prob_v = boost::compute::vector<probability_type>;

protected:
compute::device device;
compute::context context;
compute::command_queue queue;
compute::default_random_engine rand;
size_t memory;

/**
* calculate the number of iterations needed for a complete simulation without exceeding the boundary set
* by the user
* @param bn network
* @return pair<number of samples per iteration, number of iteration>
*/
std::pair<ulong, ulong> calculate_iterations()
{
ulong sample_p = this->memory / (bn.number_of_variables() * sizeof(probability_type) + 3 * sizeof(cl_ushort)) * MEMORY_SLACK / 100;
if(sample_p < this->nsamples)
return {sample_p, this->nsamples / sample_p};
else
return {this->nsamples, 1};
}

std::vector<probability_type> accumulate_cpt(ulong v_id, baylib::cow::cpt<probability_type> cpt) {
auto factory = baylib::condition_factory(bn, v_id, bn.parents_of(v_id));
std::vector<probability_type> flat_cpt{};
uint n_states = bn[v_id].table().number_of_states();
do {
auto temp = cpt[factory.get()];
flat_cpt.insert(flat_cpt.end(), temp.begin(), temp.end());
} while (factory.has_next());

for (baylib::state_t i = 0; i < flat_cpt.size(); i += n_states)
for (baylib::state_t j = 1; j < n_states - 1; j++)
flat_cpt[i + j] += flat_cpt[i + j - 1];
return flat_cpt;
}

/**
* Simulations of a specific node using opencl
* @param cpt cpt of the node
* @param parents_result results of previous simulate_node calls
* @param dim number of samples of the simulation
* @return result of the simulation
*/
bcvec simulate_node(
ulong v_id,
const cow::cpt<probability_type> &cpt,
std::vector<bcvec*> &parents_result,
int dim
)
{
std::vector<probability_type> flat_cpt_accum = accumulate_cpt(v_id, cpt);
bcvec result(dim, cpt.number_of_states(), context);
prob_v device_cpt(flat_cpt_accum.size(), context);
prob_v threshold_vec(dim, context);
prob_v random_vec(dim, context);
compute::uniform_real_distribution<probability_type> distribution(0, 1);
compute::vector<int> index_vec(dim, context);

// Async copy of the cpt in gpu memory
compute::copy(flat_cpt_accum.begin(), flat_cpt_accum.end(), device_cpt.begin(), queue);

// cycle for deducing the row of the cpt given the parents state in the previous simulation
if(parents_result.empty())
compute::fill(index_vec.begin(), index_vec.end(), 0, queue);
else {
uint coeff = bn[v_id].table().number_of_states();
for (int i = 0; i < parents_result.size(); i++) {
if (i == 0)
compute::transform(parents_result[i]->state.begin(),
parents_result[i]->state.end(),
index_vec.begin(),
_1 * coeff, queue);
else
compute::transform(parents_result[i]->state.begin(),
parents_result[i]->state.end(),
index_vec.begin(),
index_vec.begin(),
_1 * coeff + _2, queue);
coeff *= parents_result[i]->cardinality;
}
}

// get the threshold corresponding to the specific row of the cpt for every single simulation
compute::gather(index_vec.begin(),
index_vec.end(),
device_cpt.begin(),
threshold_vec.begin(), queue);


// generate random vector
distribution.generate(random_vec.begin(),
random_vec.end(),
rand, queue);

// confront the random vector with the threshold
compute::transform(random_vec.begin(),
random_vec.end(),
threshold_vec.begin(),
result.state.begin(),
_1 > _2,
queue);

// generalization in case of more than 2 states
for (int i = 0; i + 2 < bn[v_id].table().number_of_states(); i++) {
compute::vector<int> temp(dim, context);
compute::transform(index_vec.begin(),
index_vec.end(),
index_vec.begin(),
_1 + 1, queue);
compute::gather(index_vec.begin(),
index_vec.end(),
device_cpt.begin(),
threshold_vec.begin(), queue);
compute::transform(random_vec.begin(),
random_vec.end(),
threshold_vec.begin(),
temp.begin(),
_1 > _2, queue);
compute::transform(temp.begin(),
temp.end(),
result.state.begin(),
result.state.begin(),
_1 + _2, queue);
}

return result;
}
};
} // namespace inference
} // namespace baylib

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <baylib/inference/cuda/samplers_cuda.cuh>
#include <baylib/tools/gpu/cuda_utils.cuh>
#include <baylib/network/bayesian_utils.hpp>
#include <baylib/tools/gpu/gpu_utils.hpp>

//! \file logic_sampling_cuda.hpp
//! \brief Logic Sampling implementation with cuda optimization
Expand Down Expand Up @@ -51,13 +52,13 @@ namespace baylib {
* @return : marginal distribution
*/
baylib::marginal_distribution<probability_type> make_inference(){
cuda_graph_adapter<probability_type> graph = make_cuda_graph_revised<probability_type>(this->bn);
cuda_graph_adapter<probability_type> graph = baylib::make_cuda_graph_revised<probability_type>(this->bn);
bool evidence = evidence_presence(this->bn);
auto vertex_queue = baylib::sampling_order(this->bn);
std::vector<uint> result_line = logic_sampler(
graph, vertex_queue, this->nsamples, evidence, this->seed
);
auto result = reshape_marginal<probability_type>(this->bn, vertex_queue, result_line);
auto result = baylib::reshape_marginal<probability_type>(this->bn, vertex_queue, result_line);
result.normalize();
return result;
}
Expand Down
Loading

0 comments on commit 8583e95

Please sign in to comment.