diff --git a/CMakeLists.txt b/CMakeLists.txt index 102c5c9..1950407 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,15 +16,30 @@ set(CMAKE_MODULE_PATH cmake_module) option(BUILD_EXAMPLES "Build the examples showing how to use baylib" OFF) # change this option to compile tests -option(BUILD_TESTS "Build tests for baylib" ON) +option(BUILD_TESTS "Build tests for baylib" OFF) +#CUDA check_language(CUDA) if(CMAKE_CUDA_COMPILER) enable_language(CUDA) - add_compile_definitions(CUDA_CMP_FOUND) + add_compile_definitions(BAYLIB_CUDA) set(CMAKE_CUDA_STANDARD 14) set(CMAKE_CUDA_STANDARD_REQUIRED ON) - set(CUDA_ARCHITECTURES OFF) + set(CMAKE_CUDA_ARCHITECTURES OFF) +endif() + +#openCL +if(NOT TARGET OpenCL::OpenCL) + set(CMAKE_PREFIX_PATH "${OPENCL_ROOT}") + find_package(OpenCL) + if(OpenCL_FOUND) + set(BAYLIB_OPENCL 1) + add_compile_definitions(BAYLIB_OPENCL) + set_target_properties(OpenCL::OpenCL PROPERTIES INTERFACE_COMPILE_DEFINITIONS CL_TARGET_OPENCL_VERSION=220) + add_compile_definitions(BAYLIB_OPENCL) + else() + set(BAYLIB_OPENCL 0) + endif() endif() diff --git a/baylib/CMakeLists.txt b/baylib/CMakeLists.txt index 513a49e..4cbd8cc 100644 --- a/baylib/CMakeLists.txt +++ b/baylib/CMakeLists.txt @@ -13,13 +13,6 @@ if(NOT TARGET Boost::boost) endif() set(threading=multi) - -#openCL -if(NOT TARGET OpenCL::OpenCL) - set(CMAKE_PREFIX_PATH "${OPENCL_ROOT}") - find_package(OpenCL REQUIRED) - set_target_properties(OpenCL::OpenCL PROPERTIES INTERFACE_COMPILE_DEFINITIONS CL_TARGET_OPENCL_VERSION=220) -endif() find_package(Threads REQUIRED) find_package(TBB REQUIRED) @@ -36,9 +29,7 @@ set (src inference/abstract_inference_algorithm.hpp inference/gibbs_sampling.hpp inference/likelihood_weighting.hpp - inference/logic_sampling.hpp inference/rejection_sampling.hpp - inference/adaptive_importance_sampling.hpp network/bayesian_net.hpp network/bayesian_utils.hpp network/random_variable.hpp @@ -55,19 +46,27 @@ set (src baylib_assert.h baylib_concepts.hpp ) +set(src_opencl + inference/opencl/logic_sampling_opencl.hpp + inference/opencl/adaptive_importance_sampling_opencl.hpp + inference/opencl/vectorized_inference_opencl.hpp +) set (src_cuda - inference/cuda/samplers_cuda.cuh - inference/cuda/samplers_cuda.cu - inference/logic_sampling_cuda.hpp - inference/likelihood_weighting_cuda.hpp - tools/gpu/cuda_utils.cuh - tools/gpu/cuda_utils.cu - tools/gpu/cuda_graph_adapter.cuh + inference/cuda/samplers_cuda.cuh + inference/cuda/samplers_cuda.cu + inference/cuda/logic_sampling_cuda.hpp + inference/cuda/likelihood_weighting_cuda.hpp + tools/gpu/cuda_utils.cuh + tools/gpu/cuda_utils.cu + tools/gpu/cuda_graph_adapter.cuh ) if(CMAKE_CUDA_COMPILER) list(APPEND src ${src_cuda}) endif() +if(BAYLIB_OPENCL) + list(APPEND src ${src_opencl}) +endif() set (BAYLIB_INCLUDE_LIBS ../baylib) @@ -82,7 +81,8 @@ if(CMAKE_CUDA_COMPILER) PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON - LINKER_LANGUAGE CXX) + LINKER_LANGUAGE CXX + ) endif() target_include_directories(baylib diff --git a/baylib/inference/abstract_inference_algorithm.hpp b/baylib/inference/abstract_inference_algorithm.hpp index 0234372..7f8c87a 100644 --- a/baylib/inference/abstract_inference_algorithm.hpp +++ b/baylib/inference/abstract_inference_algorithm.hpp @@ -1,15 +1,11 @@ #ifndef BAYLIB_ABSTRACT_INFERENCE_ALGORITHM_HPP #define BAYLIB_ABSTRACT_INFERENCE_ALGORITHM_HPP -#define CL_TARGET_OPENCL_VERSION 220 + #include #include #include -#include -#include -#include -#include #include #include @@ -65,9 +61,9 @@ namespace baylib { void set_seed(unsigned int _seed) { seed = _seed; } - protected: const network_type & bn; unsigned long nsamples; + protected: unsigned int seed; }; @@ -154,171 +150,6 @@ namespace baylib { unsigned int nthreads; }; - - namespace compute = boost::compute; - using boost::compute::lambda::_1; - using boost::compute::lambda::_2; - /** - * This class models an approximate inference algorithm - * vectorized with a GPGPU approach. - * the method simulate_node samples a node given the results of - * previous simulations of its parents nodes - * @tparam Network_ : the type of bayesian network - */ - template < BNetDerived Network_ > - class vectorized_inference_algorithm : public inference_algorithm - { - public: - using typename inference_algorithm::network_type; - using typename inference_algorithm::probability_type; - using inference_algorithm::bn; - - vectorized_inference_algorithm( - const network_type & bn, - ulong n_samples, - size_t memory, - uint seed = 0, - const compute::device &device = compute::system::default_device() - ) - : inference_algorithm(bn, n_samples, seed) - , memory(memory) - , device(device) - , context(device) - , queue(context, device) - , rand(queue, seed) - {} - - using prob_v = boost::compute::vector; - - protected: - compute::device device; - compute::context context; - compute::command_queue queue; - compute::default_random_engine rand; - size_t memory; - - /** - * calculate the number of iterations needed for a complete simulation without exceeding the boundary set - * by the user - * @param bn network - * @return pair - */ - std::pair calculate_iterations() - { - ulong sample_p = this->memory / (bn.number_of_variables() * sizeof(probability_type) + 3 * sizeof(cl_ushort)) * MEMORY_SLACK / 100; - if(sample_p < this->nsamples) - return {sample_p, this->nsamples / sample_p}; - else - return {this->nsamples, 1}; - } - - std::vector accumulate_cpt(ulong v_id, baylib::cow::cpt cpt) { - auto factory = baylib::condition_factory(bn, v_id, bn.parents_of(v_id)); - std::vector flat_cpt{}; - uint n_states = bn[v_id].table().number_of_states(); - do { - auto temp = cpt[factory.get()]; - flat_cpt.insert(flat_cpt.end(), temp.begin(), temp.end()); - } while (factory.has_next()); - - for (baylib::state_t i = 0; i < flat_cpt.size(); i += n_states) - for (baylib::state_t j = 1; j < n_states - 1; j++) - flat_cpt[i + j] += flat_cpt[i + j - 1]; - return flat_cpt; - } - - /** - * Simulations of a specific node using opencl - * @param cpt cpt of the node - * @param parents_result results of previous simulate_node calls - * @param dim number of samples of the simulation - * @return result of the simulation - */ - bcvec simulate_node( - ulong v_id, - const cow::cpt &cpt, - std::vector &parents_result, - int dim - ) - { - std::vector flat_cpt_accum = accumulate_cpt(v_id, cpt); - bcvec result(dim, cpt.number_of_states(), context); - prob_v device_cpt(flat_cpt_accum.size(), context); - prob_v threshold_vec(dim, context); - prob_v random_vec(dim, context); - compute::uniform_real_distribution distribution(0, 1); - compute::vector index_vec(dim, context); - - // Async copy of the cpt in gpu memory - compute::copy(flat_cpt_accum.begin(), flat_cpt_accum.end(), device_cpt.begin(), queue); - - // cycle for deducing the row of the cpt given the parents state in the previous simulation - if(parents_result.empty()) - compute::fill(index_vec.begin(), index_vec.end(), 0, queue); - else { - uint coeff = bn[v_id].table().number_of_states(); - for (int i = 0; i < parents_result.size(); i++) { - if (i == 0) - compute::transform(parents_result[i]->state.begin(), - parents_result[i]->state.end(), - index_vec.begin(), - _1 * coeff, queue); - else - compute::transform(parents_result[i]->state.begin(), - parents_result[i]->state.end(), - index_vec.begin(), - index_vec.begin(), - _1 * coeff + _2, queue); - coeff *= parents_result[i]->cardinality; - } - } - - // get the threshold corresponding to the specific row of the cpt for every single simulation - compute::gather(index_vec.begin(), - index_vec.end(), - device_cpt.begin(), - threshold_vec.begin(), queue); - - - // generate random vector - distribution.generate(random_vec.begin(), - random_vec.end(), - rand, queue); - - // confront the random vector with the threshold - compute::transform(random_vec.begin(), - random_vec.end(), - threshold_vec.begin(), - result.state.begin(), - _1 > _2, - queue); - - // generalization in case of more than 2 states - for (int i = 0; i + 2 < bn[v_id].table().number_of_states(); i++) { - compute::vector temp(dim, context); - compute::transform(index_vec.begin(), - index_vec.end(), - index_vec.begin(), - _1 + 1, queue); - compute::gather(index_vec.begin(), - index_vec.end(), - device_cpt.begin(), - threshold_vec.begin(), queue); - compute::transform(random_vec.begin(), - random_vec.end(), - threshold_vec.begin(), - temp.begin(), - _1 > _2, queue); - compute::transform(temp.begin(), - temp.end(), - result.state.begin(), - result.state.begin(), - _1 + _2, queue); - } - - return result; - } - }; } // namespace inference } // namespace baylib diff --git a/baylib/inference/likelihood_weighting_cuda.hpp b/baylib/inference/cuda/likelihood_weighting_cuda.hpp similarity index 100% rename from baylib/inference/likelihood_weighting_cuda.hpp rename to baylib/inference/cuda/likelihood_weighting_cuda.hpp diff --git a/baylib/inference/logic_sampling_cuda.hpp b/baylib/inference/cuda/logic_sampling_cuda.hpp similarity index 89% rename from baylib/inference/logic_sampling_cuda.hpp rename to baylib/inference/cuda/logic_sampling_cuda.hpp index 3286bc5..dd429c0 100644 --- a/baylib/inference/logic_sampling_cuda.hpp +++ b/baylib/inference/cuda/logic_sampling_cuda.hpp @@ -10,6 +10,7 @@ #include #include #include +#include //! \file logic_sampling_cuda.hpp //! \brief Logic Sampling implementation with cuda optimization @@ -51,13 +52,13 @@ namespace baylib { * @return : marginal distribution */ baylib::marginal_distribution make_inference(){ - cuda_graph_adapter graph = make_cuda_graph_revised(this->bn); + cuda_graph_adapter graph = baylib::make_cuda_graph_revised(this->bn); bool evidence = evidence_presence(this->bn); auto vertex_queue = baylib::sampling_order(this->bn); std::vector result_line = logic_sampler( graph, vertex_queue, this->nsamples, evidence, this->seed ); - auto result = reshape_marginal(this->bn, vertex_queue, result_line); + auto result = baylib::reshape_marginal(this->bn, vertex_queue, result_line); result.normalize(); return result; } diff --git a/baylib/inference/adaptive_importance_sampling.hpp b/baylib/inference/opencl/adaptive_importance_sampling_opencl.hpp similarity index 95% rename from baylib/inference/adaptive_importance_sampling.hpp rename to baylib/inference/opencl/adaptive_importance_sampling_opencl.hpp index f593086..13c1fd3 100644 --- a/baylib/inference/adaptive_importance_sampling.hpp +++ b/baylib/inference/opencl/adaptive_importance_sampling_opencl.hpp @@ -2,23 +2,23 @@ // Created by paolo on 11/09/21. // -#ifndef BAYLIB_ADAPTIVE_IMPORTANCE_SAMPLING_HPP -#define BAYLIB_ADAPTIVE_IMPORTANCE_SAMPLING_HPP +#ifndef BAYLIB_ADAPTIVE_IMPORTANCE_SAMPLING_OPENCL_HPP +#define BAYLIB_ADAPTIVE_IMPORTANCE_SAMPLING_OPENCL_HPP #define CL_TARGET_OPENCL_VERSION 220 -#include -#include -#include +#include +#include +#include #include #include #include #include #include -#include -#include +#include +#include -//! \file adaptive_importance_sampling.hpp +//! \file adaptive_importance_sampling_opencl.hpp //! \brief Adaptive sampling implementation with opencl optimization and multi-thread support namespace baylib { @@ -48,7 +48,7 @@ namespace baylib { BNetDerived Network_, typename Generator_ = std::mt19937 > - class adaptive_importance_sampling: public vectorized_inference_algorithm + class adaptive_importance_sampling_opencl: public baylib::inference::vectorized_inference_algorithm { using typename vectorized_inference_algorithm::probability_type; using vectorized_inference_algorithm::bn; @@ -69,7 +69,7 @@ namespace baylib { * @param seed : seed for the random generators * @param device : opencl device used for the simulation */ - explicit adaptive_importance_sampling( + explicit adaptive_importance_sampling_opencl( const network_type & bn, ulong nsamples, size_t memory, @@ -98,7 +98,7 @@ namespace baylib { BAYLIB_ASSERT(std::all_of(bn.begin(), bn.end(), [this](auto &var){ return baylib::cpt_filled_out(bn, var.id()); }), "conditional probability tables must be properly filled to" - " run logic_sampling inference algorithm", + " run logic_sampling_opencl inference algorithm", std::runtime_error); icpt_vector icptvec{}; @@ -112,7 +112,7 @@ namespace baylib { } } // If no evidence is present the algorithm degenerates to simple - // logic_sampling, and we can skip the learning phase + // logic_sampling_opencl, and we can skip the learning phase if(evidence_found){ ancestors = ancestors_of_evidence(bn); learn_icpt(icptvec); @@ -365,4 +365,4 @@ namespace baylib { } } -#endif //BAYLIB_ADAPTIVE_IMPORTANCE_SAMPLING_HPP +#endif //BAYLIB_ADAPTIVE_IMPORTANCE_SAMPLING_OPENCL_HPP diff --git a/baylib/inference/logic_sampling.hpp b/baylib/inference/opencl/logic_sampling_opencl.hpp similarity index 91% rename from baylib/inference/logic_sampling.hpp rename to baylib/inference/opencl/logic_sampling_opencl.hpp index 70589fa..ff36d10 100644 --- a/baylib/inference/logic_sampling.hpp +++ b/baylib/inference/opencl/logic_sampling_opencl.hpp @@ -2,8 +2,8 @@ // Created by elle on 22/07/21. // -#ifndef BAYLIB_LOGIC_SAMPLING_HPP -#define BAYLIB_LOGIC_SAMPLING_HPP +#ifndef BAYLIB_LOGIC_SAMPLING_OPENCL_HPP +#define BAYLIB_LOGIC_SAMPLING_OPENCL_HPP #define CL_TARGET_OPENCL_VERSION 220 @@ -12,11 +12,12 @@ #include #include -#include +#include "baylib/probability/condition_factory.hpp" -#include +#include "baylib/inference/abstract_inference_algorithm.hpp" +#include "vectorized_inference_opencl.hpp" -//! \file logic_sampling.hpp +//! \file logic_sampling_opencl.hpp //! \brief Logic Sampling implementation with opencl optimization namespace baylib { @@ -46,7 +47,7 @@ namespace baylib { BNetDerived Network_, typename Generator_ = std::mt19937 > - class logic_sampling : public vectorized_inference_algorithm + class logic_sampling_opencl : public baylib::inference::vectorized_inference_algorithm { using typename vectorized_inference_algorithm::probability_type; using vectorized_inference_algorithm::bn; @@ -54,7 +55,7 @@ namespace baylib { typedef Network_ network_type; public: - logic_sampling( + logic_sampling_opencl( const network_type &bn, ulong samples, size_t memory, @@ -69,7 +70,7 @@ namespace baylib { BAYLIB_ASSERT(std::all_of(bn.begin(), bn.end(), [this](auto &var){ return baylib::cpt_filled_out(bn, var.id()); }), "conditional probability tables must be properly filled to" - " run logic_sampling inference algorithm", + " run logic_sampling_opencl inference algorithm", std::runtime_error); auto [iter_samples, niter] = this->calculate_iterations(); @@ -145,4 +146,4 @@ namespace baylib { } // namespace baylib -#endif //BAYLIB_LOGIC_SAMPLING_HPP \ No newline at end of file +#endif //BAYLIB_LOGIC_SAMPLING_OPENCL_HPP \ No newline at end of file diff --git a/baylib/inference/opencl/vectorized_inference_opencl.hpp b/baylib/inference/opencl/vectorized_inference_opencl.hpp new file mode 100644 index 0000000..ad5639d --- /dev/null +++ b/baylib/inference/opencl/vectorized_inference_opencl.hpp @@ -0,0 +1,190 @@ +// +// Created by paolo on 01/03/22. +// + +#ifndef BAYLIB_VECTORIZED_INFERENCE_OPENCL_HPP +#define BAYLIB_VECTORIZED_INFERENCE_OPENCL_HPP + +#define CL_TARGET_OPENCL_VERSION 220 +#define MEMORY_SLACK .8 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace baylib::inference { + + namespace compute = boost::compute; + using boost::compute::lambda::_1; + using boost::compute::lambda::_2; + /** + * This class models an approximate inference algorithm + * vectorized with a GPGPU approach. + * the method simulate_node samples a node given the results of + * previous simulations of its parents nodes + * @tparam Network_ : the type of bayesian network + */ + template < BNetDerived Network_ > + class vectorized_inference_algorithm : public inference_algorithm + { + public: + typedef Network_ network_type; + using typename inference_algorithm::probability_type; + using inference_algorithm::bn; + + vectorized_inference_algorithm( + const Network_ & bn, + unsigned long n_samples, + unsigned long memory, + unsigned int seed = 0, + const compute::device &device = compute::system::default_device() + ) + : inference_algorithm(bn, n_samples, seed) + , memory(memory) + , device(device) + , context(device) + , queue(context, device) + , rand(queue, seed) + {} + + using prob_v = boost::compute::vector; + + protected: + compute::device device; + compute::context context; + compute::command_queue queue; + compute::default_random_engine rand; + unsigned long memory; + + /** + * calculate the number of iterations needed for a complete simulation without exceeding the boundary set + * by the user + * @param bn network + * @return pair + */ + std::pair calculate_iterations() + { + unsigned long sample_p = this->memory / (bn.number_of_variables() * sizeof(probability_type) + 3 * sizeof(uint16_t)) * MEMORY_SLACK / 100; + if(sample_p < this->nsamples) + return {sample_p, this->nsamples / sample_p}; + else + return {this->nsamples, 1}; + } + + std::vector accumulate_cpt(unsigned long v_id, baylib::cow::cpt cpt) { + auto factory = baylib::condition_factory(bn, v_id, bn.parents_of(v_id)); + std::vector flat_cpt{}; + unsigned int n_states = bn[v_id].table().number_of_states(); + do { + auto temp = cpt[factory.get()]; + flat_cpt.insert(flat_cpt.end(), temp.begin(), temp.end()); + } while (factory.has_next()); + + for (baylib::state_t i = 0; i < flat_cpt.size(); i += n_states) + for (baylib::state_t j = 1; j < n_states - 1; j++) + flat_cpt[i + j] += flat_cpt[i + j - 1]; + return flat_cpt; + } + + /** + * Simulations of a specific node using opencl + * @param cpt cpt of the node + * @param parents_result results of previous simulate_node calls + * @param dim number of samples of the simulation + * @return result of the simulation + */ + bcvec simulate_node( + unsigned long v_id, + const cow::cpt &cpt, + std::vector &parents_result, + int dim + ) + { + std::vector flat_cpt_accum = accumulate_cpt(v_id, cpt); + bcvec result(dim, cpt.number_of_states(), context); + prob_v device_cpt(flat_cpt_accum.size(), context); + prob_v threshold_vec(dim, context); + prob_v random_vec(dim, context); + compute::uniform_real_distribution distribution(0, 1); + compute::vector index_vec(dim, context); + + // Async copy of the cpt in gpu memory + compute::copy(flat_cpt_accum.begin(), flat_cpt_accum.end(), device_cpt.begin(), queue); + + // cycle for deducing the row of the cpt given the parents state in the previous simulation + if(parents_result.empty()) + compute::fill(index_vec.begin(), index_vec.end(), 0, queue); + else { + unsigned int coeff = bn[v_id].table().number_of_states(); + for (int i = 0; i < parents_result.size(); i++) { + if (i == 0) + compute::transform(parents_result[i]->state.begin(), + parents_result[i]->state.end(), + index_vec.begin(), + _1 * coeff, queue); + else + compute::transform(parents_result[i]->state.begin(), + parents_result[i]->state.end(), + index_vec.begin(), + index_vec.begin(), + _1 * coeff + _2, queue); + coeff *= parents_result[i]->cardinality; + } + } + + // get the threshold corresponding to the specific row of the cpt for every single simulation + compute::gather(index_vec.begin(), + index_vec.end(), + device_cpt.begin(), + threshold_vec.begin(), queue); + + + // generate random vector + distribution.generate(random_vec.begin(), + random_vec.end(), + rand, queue); + + // confront the random vector with the threshold + compute::transform(random_vec.begin(), + random_vec.end(), + threshold_vec.begin(), + result.state.begin(), + _1 > _2, + queue); + + // generalization in case of more than 2 states + for (int i = 0; i + 2 < bn[v_id].table().number_of_states(); i++) { + compute::vector temp(dim, context); + compute::transform(index_vec.begin(), + index_vec.end(), + index_vec.begin(), + _1 + 1, queue); + compute::gather(index_vec.begin(), + index_vec.end(), + device_cpt.begin(), + threshold_vec.begin(), queue); + compute::transform(random_vec.begin(), + random_vec.end(), + threshold_vec.begin(), + temp.begin(), + _1 > _2, queue); + compute::transform(temp.begin(), + temp.end(), + result.state.begin(), + result.state.begin(), + _1 + _2, queue); + } + + return result; + } + }; +} + +#endif //BAYLIB_VECTORIZED_INFERENCE_OPENCL_HPP \ No newline at end of file diff --git a/baylib/tools/gpu/cuda_utils.cu b/baylib/tools/gpu/cuda_utils.cu index a93d654..5181037 100644 --- a/baylib/tools/gpu/cuda_utils.cu +++ b/baylib/tools/gpu/cuda_utils.cu @@ -81,7 +81,7 @@ namespace baylib { n_threads = max_threads_per_block; chucks = set_num / max_threads_per_block + 1; } - reduce_marginal_array_kernel_2<<>>(arr, var_num, set_num); + reduce_marginal_array_kernel_2<<>>(arr, var_num, set_num); std::vector marginal(var_num, 0); cudaMemcpy(marginal.data(), arr, sizeof(T) * var_num , cudaMemcpyDeviceToHost); return marginal; diff --git a/baylib/tools/gpu/gpu_utils.hpp b/baylib/tools/gpu/gpu_utils.hpp index a60483f..dd467d4 100644 --- a/baylib/tools/gpu/gpu_utils.hpp +++ b/baylib/tools/gpu/gpu_utils.hpp @@ -5,21 +5,27 @@ #ifndef BAYLIB_GPU_UTILS_HPP #define BAYLIB_GPU_UTILS_HPP +#ifdef BAYLIB_OPENCL +#define CL_TARGET_OPENCL_VERSION 220 #include #include -#include -#include +#endif + +#ifdef BAYLIB_CUDA #include #include +#endif +#include +#include +#include /** * @file gpu_utils.hpp - * @brief utils for using boost::compute + * @brief utils for using boost::compute and cuda */ -#define MEMORY_SLACK 80 - namespace baylib { +#ifdef BAYLIB_OPENCL /** * Container for gpu vectors with built in auto release of the memory after set number of uses */ @@ -53,6 +59,7 @@ namespace baylib { uint evidence_state{}; }; +#endif //BAYLIB_OPENCL /** * flatten a cpt into vector preserving the condition order given by the network @@ -98,6 +105,7 @@ namespace baylib { return result; } +#if BAYLIB_CUDA /** * * @tparam probability_type : type of cpt entry @@ -121,6 +129,8 @@ namespace baylib { graph.load_graph_to_device(); return graph; }; +#endif //BAYLIB_CUDA + } #endif //BAYLIB_GPU_UTILS_HPP \ No newline at end of file diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 76eaf97..4c655df 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -13,4 +13,4 @@ add_demo(example_load_network) add_demo(example_network) add_demo(example_named_network) add_demo(example_interact_network) -add_demo(example_gpu_inference) \ No newline at end of file +add_demo(example_cuda_inference) \ No newline at end of file diff --git a/examples/example_cuda_inference.cpp b/examples/example_cuda_inference.cpp new file mode 100644 index 0000000..a9eeecf --- /dev/null +++ b/examples/example_cuda_inference.cpp @@ -0,0 +1,36 @@ + +// +// Created by paolo on 28/10/21. +// + +#include +#include + +#include +/** + * Baylib implements several algorithms exploiting gpgpu parallelization. + */ + +int main(int argc, char** argv){ + + using namespace baylib; + using namespace baylib::inference; + + baylib::xdsl_parser parser; + // We use the Hailfinder network for this example + auto bn = parser.deserialize("../../examples/xdsl/Hailfinder2.5.xdsl"); + + // Gpu algorithms use montecarlo simulations to approximate inference results, all simulations are made + // simultaneously, for this reason we have to take into account memory usage + // For all gpu algorithms the first attribute will be the network, the second one will be the number of samples + // to be generated and the third one will be the amount of memory on the opencl device available + likelihood_weighting_cuda ls(bn, 10000); + + // Gpu algorithms offer the same external interface as all other baylib algorithms + auto result = ls.make_inference(); + + // The main advantage to using this kind of parallelization is that for high number of samples the + // computation time raises very slowly in respect to classical algorithms (as long as enough memory is provided) + std::cout << result << '\n'; + +} \ No newline at end of file diff --git a/examples/example_inference.cpp b/examples/example_inference.cpp index 5ec7230..bec9d77 100644 --- a/examples/example_inference.cpp +++ b/examples/example_inference.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include "baylib/inference/opencl/logic_sampling_opencl.hpp" #include #include diff --git a/examples/example_gpu_inference.cpp b/examples/example_opencl_inference.cpp similarity index 95% rename from examples/example_gpu_inference.cpp rename to examples/example_opencl_inference.cpp index 4d4a50e..c315578 100644 --- a/examples/example_gpu_inference.cpp +++ b/examples/example_opencl_inference.cpp @@ -2,7 +2,7 @@ // Created by paolo on 28/10/21. // -#include +#include "baylib/inference/opencl/logic_sampling_opencl.hpp" #include #include diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 84db540..c0579f2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -5,6 +5,7 @@ FetchContent_Declare( googletest GIT_REPOSITORY https://github.com/google/googletest.git GIT_TAG e2239ee6043f73722e7aa812a459f54a28552929 # release-1.11.0 + ) if (WIN32) diff --git a/test/cow_test.cpp b/test/cow_test.cpp index e5c1969..474d5a3 100644 --- a/test/cow_test.cpp +++ b/test/cow_test.cpp @@ -6,13 +6,16 @@ #include #include #include -#include #include #include -#include -#ifdef CUDA_CMP_FOUND -#include +#ifdef BAYLIB_OPENCL +#include +#include +#endif + +#ifdef BAYLIB_CUDA +#include #endif #define THREADS std::thread::hardware_concurrency() @@ -101,17 +104,21 @@ TEST_F(cow_tests, cow_inference){ auto n_map = baylib::make_name_map(net5); baylib::condition c; - auto logic = logic_sampling(net5, SAMPLES, MEMORY); auto gibbs = gibbs_sampling(net5, SAMPLES, THREADS); auto likely = likelihood_weighting(net5, SAMPLES, THREADS); - auto adaptive = adaptive_importance_sampling(net5, SAMPLES, MEMORY); -#ifdef CUDA_CMP_FOUND + std::vector*> algorithms = {&gibbs, &likely}; + +#ifdef BAYLIB_CUDA auto logic_cuda = logic_sampling_cuda(net5, SAMPLES); - std::vector*> algorithms = {&gibbs, &logic, &likely, &adaptive, &logic_cuda}; -#else - std::vector*> algorithms = {&gibbs, &logic, &likely, &adaptive}; + algorithms.emplace_back(&logic_cuda); #endif +#ifdef BAYLIB_OPENCL + auto logic_opencl = logic_sampling_opencl(net5, SAMPLES, MEMORY); + auto adaptive_opencl = adaptive_importance_sampling_opencl(net5, SAMPLES, MEMORY); + algorithms.emplace_back(&logic_opencl); + algorithms.emplace_back(&adaptive_opencl); +#endif auto name_map = baylib::make_name_map(net5); const auto& e1 = net5[name_map["Income"]].table(); diff --git a/test/evidence_test.cpp b/test/evidence_test.cpp index 99fed9b..5489181 100644 --- a/test/evidence_test.cpp +++ b/test/evidence_test.cpp @@ -6,14 +6,18 @@ #include #include #include -#include #include #include -#include #include -#ifdef CUDA_CMP_FOUND -#include -#include + +#ifdef BAYLIB_CUDA +#include "baylib/inference/cuda/logic_sampling_cuda.hpp" +#include "baylib/inference/cuda/likelihood_weighting_cuda.hpp" +#endif + +#ifdef BAYLIB_OPENCL +#include "baylib/inference/opencl/logic_sampling_opencl.hpp" +#include #endif #define THREADS std::thread::hardware_concurrency() @@ -32,14 +36,16 @@ using bnet = baylib::bayesian_net; template std::vector> get_results(const bnet &bn){ std::vector> results{ - logic_sampling>(bn, SAMPLES, MEMORY).make_inference(), - gibbs_sampling>(bn, SAMPLES, THREADS).make_inference(), - likelihood_weighting>(bn, SAMPLES, THREADS).make_inference(), - rejection_sampling>(bn, SAMPLES, THREADS).make_inference(), - adaptive_importance_sampling>(bn, SAMPLES, MEMORY).make_inference(), -#ifdef CUDA_CMP_FOUND - logic_sampling_cuda>(bn, SAMPLES).make_inference(), - likelihood_weighting_cuda>(bn, SAMPLES).make_inference() + gibbs_sampling>(bn, SAMPLES, THREADS).make_inference(), + likelihood_weighting>(bn, SAMPLES, THREADS).make_inference(), + rejection_sampling>(bn, SAMPLES, THREADS).make_inference(), +#ifdef BAYLIB_CUDA + logic_sampling_cuda>(bn, SAMPLES).make_inference(), + likelihood_weighting_cuda>(bn, SAMPLES).make_inference(), +#endif +#ifdef BAYLIB_OPENCL + logic_sampling_opencl>(bn, SAMPLES, MEMORY).make_inference(), + adaptive_importance_sampling_opencl>(bn, SAMPLES, MEMORY).make_inference(), #endif }; return results; @@ -48,12 +54,14 @@ template template std::vector> get_results_heavy(const bnet &bn){ std::vector> results{ - likelihood_weighting>(bn, SAMPLES, THREADS).make_inference(), - gibbs_sampling>(bn, SAMPLES, THREADS).make_inference(), - rejection_sampling>(bn, SAMPLES, THREADS).make_inference(), - adaptive_importance_sampling>(bn, SAMPLES, MEMORY).make_inference(), -#ifdef CUDA_CMP_FOUND - likelihood_weighting_cuda>(bn, SAMPLES).make_inference() + likelihood_weighting>(bn, SAMPLES, THREADS).make_inference(), + gibbs_sampling>(bn, SAMPLES, THREADS).make_inference(), + rejection_sampling>(bn, SAMPLES, THREADS).make_inference(), +#ifdef BAYLIB_OPENCL + adaptive_importance_sampling_opencl>(bn, SAMPLES, MEMORY).make_inference(), +#endif +#ifdef BAYLIB_CUDA + likelihood_weighting_cuda>(bn, SAMPLES).make_inference() #endif }; return results; diff --git a/test/inference_test.cpp b/test/inference_test.cpp index 8da3c67..52b4454 100644 --- a/test/inference_test.cpp +++ b/test/inference_test.cpp @@ -6,14 +6,17 @@ #include #include #include -#include #include #include -#include -#ifdef CUDA_CMP_FOUND -#include -#include +#ifdef BAYLIB_OPENCL +#include "baylib/inference/opencl/logic_sampling_opencl.hpp" +#include +#endif + +#ifdef BAYLIB_CUDA +#include "baylib/inference/cuda/logic_sampling_cuda.hpp" +#include "baylib/inference/cuda/likelihood_weighting_cuda.hpp" #endif #define THREADS std::thread::hardware_concurrency() @@ -29,14 +32,16 @@ using bnet = baylib::bayesian_net;; template std::vector> get_results(const bnet &bn){ std::vector> results{ - logic_sampling>(bn, SAMPLES, MEMORY).make_inference(), - gibbs_sampling>(bn, SAMPLES, THREADS).make_inference(), - likelihood_weighting>(bn, SAMPLES, THREADS).make_inference(), - rejection_sampling>(bn, SAMPLES, THREADS).make_inference(), - adaptive_importance_sampling>(bn, SAMPLES, MEMORY).make_inference(), -#ifdef CUDA_CMP_FOUND - logic_sampling_cuda>(bn, SAMPLES).make_inference(), - likelihood_weighting_cuda>(bn, SAMPLES).make_inference() + gibbs_sampling>(bn, SAMPLES, THREADS).make_inference(), + likelihood_weighting>(bn, SAMPLES, THREADS).make_inference(), + rejection_sampling>(bn, SAMPLES, THREADS).make_inference(), +#ifdef BAYLIB_OPENCL + logic_sampling_opencl>(bn, SAMPLES, MEMORY).make_inference(), + adaptive_importance_sampling_opencl>(bn, SAMPLES, MEMORY).make_inference(), +#endif +#ifdef BAYLIB_CUDA + logic_sampling_cuda>(bn, SAMPLES).make_inference(), + likelihood_weighting_cuda>(bn, SAMPLES).make_inference() #endif }; return results; @@ -45,13 +50,15 @@ std::vector> get_results(const bnet std::vector> get_results_deterministic(const bnet &bn){ std::vector> results{ - logic_sampling>(bn, SAMPLES, MEMORY).make_inference(), - likelihood_weighting>(bn, SAMPLES, THREADS).make_inference(), - rejection_sampling>(bn, SAMPLES, THREADS).make_inference(), - adaptive_importance_sampling>(bn, SAMPLES, MEMORY).make_inference(), -#ifdef CUDA_CMP_FOUND - logic_sampling_cuda>(bn, SAMPLES).make_inference(), - likelihood_weighting_cuda>(bn, SAMPLES).make_inference() + likelihood_weighting>(bn, SAMPLES, THREADS).make_inference(), + rejection_sampling>(bn, SAMPLES, THREADS).make_inference(), +#ifdef BAYLIB_CUDA + logic_sampling_cuda>(bn, SAMPLES).make_inference(), + likelihood_weighting_cuda>(bn, SAMPLES).make_inference(), +#endif +#ifdef BAYLIB_OPENCL + logic_sampling_opencl>(bn, SAMPLES, MEMORY).make_inference(), + adaptive_importance_sampling_opencl>(bn, SAMPLES, MEMORY).make_inference(), #endif }; return results; @@ -64,7 +71,7 @@ TEST(inference_tests, big_bang_Coma){ //https://repo.bayesfusion.com/network/permalink?net=Small+BNs%2FComa.xdsl auto net1 = baylib::xdsl_parser().deserialize("../../examples/xdsl/Coma.xdsl"); - //baylib::inference::logic_sampling alg = baylib::inference::logic_sampling(SAMPLES, MEMORY); + //baylib::inference::logic_sampling_opencl alg = baylib::inference::logic_sampling_opencl(SAMPLES, MEMORY); auto n_map = baylib::make_name_map(net1); for (baylib::marginal_distribution& result: get_results(net1)){ ASSERT_NEAR(result[n_map["MetastCancer"]][0], .2, TOLERANCE); diff --git a/test/regression_inference_test.cpp b/test/regression_inference_test.cpp index a7259c8..314d4d2 100644 --- a/test/regression_inference_test.cpp +++ b/test/regression_inference_test.cpp @@ -5,14 +5,17 @@ #include #include #include -#include #include #include -#include -#ifdef CUDA_CMP_FOUND -#include -#include +#ifdef BAYLIB_CUDA +#include +#include +#endif + +#ifdef BAYLIB_OPENCL +#include +#include #endif #define THREADS std::thread::hardware_concurrency() @@ -26,14 +29,16 @@ using Probability = double; template std::vector> get_results(const baylib::bayesian_net &bn){ std::vector> results{ - logic_sampling>(bn, SAMPLES, MEMORY).make_inference(), - gibbs_sampling>(bn, SAMPLES, THREADS).make_inference(), - likelihood_weighting>(bn, SAMPLES, THREADS).make_inference(), - rejection_sampling>(bn, SAMPLES, THREADS).make_inference(), - adaptive_importance_sampling>(bn, SAMPLES, MEMORY).make_inference(), -#ifdef CUDA_CMP_FOUND + gibbs_sampling>(bn, SAMPLES, THREADS).make_inference(), + likelihood_weighting>(bn, SAMPLES, THREADS).make_inference(), + rejection_sampling>(bn, SAMPLES, THREADS).make_inference(), +#ifdef BAYLIB_CUDA logic_sampling_cuda>(bn, SAMPLES).make_inference(), - likelihood_weighting_cuda>(bn, SAMPLES).make_inference() + likelihood_weighting_cuda>(bn, SAMPLES).make_inference(), +#endif +#ifdef BAYLIB_OPENCL + logic_sampling_opencl>(bn, SAMPLES, MEMORY).make_inference(), + adaptive_importance_sampling_opencl>(bn, SAMPLES, MEMORY).make_inference(), #endif }; return results; @@ -42,13 +47,15 @@ template template std::vector> get_results_deterministic(const baylib::bayesian_net &bn){ std::vector> results{ - logic_sampling>(bn, SAMPLES, MEMORY).make_inference(), - likelihood_weighting>(bn, SAMPLES, THREADS).make_inference(), - rejection_sampling>(bn, SAMPLES, THREADS).make_inference(), - adaptive_importance_sampling>(bn, SAMPLES, MEMORY).make_inference(), -#ifdef CUDA_CMP_FOUND + likelihood_weighting>(bn, SAMPLES, THREADS).make_inference(), + rejection_sampling>(bn, SAMPLES, THREADS).make_inference(), +#ifdef BAYLIB_CUDA logic_sampling_cuda>(bn, SAMPLES).make_inference(), - likelihood_weighting_cuda>(bn, SAMPLES).make_inference() + likelihood_weighting_cuda>(bn, SAMPLES).make_inference(), +#endif +#ifdef BAYLIB_OPENCL + logic_sampling_opencl>(bn, SAMPLES, MEMORY).make_inference(), + adaptive_importance_sampling_opencl>(bn, SAMPLES, MEMORY).make_inference(), #endif }; return results;