refactor: refactored opencl algorithms in order to support non-gpu ab…

…le systems
mspronesti · Mar 1, 2022 · 8583e95 · 8583e95
1 parent 041ae5d
commit 8583e95
Show file tree

Hide file tree

Showing 19 changed files with 406 additions and 292 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -16,15 +16,30 @@ set(CMAKE_MODULE_PATH cmake_module)
 option(BUILD_EXAMPLES "Build the examples showing how to use baylib" OFF)
 
 # change this option to compile tests
-option(BUILD_TESTS "Build tests for baylib" ON)
+option(BUILD_TESTS "Build tests for baylib" OFF)
 
+#CUDA
 check_language(CUDA)
 if(CMAKE_CUDA_COMPILER)
     enable_language(CUDA)
-    add_compile_definitions(CUDA_CMP_FOUND)
+    add_compile_definitions(BAYLIB_CUDA)
     set(CMAKE_CUDA_STANDARD 14)
     set(CMAKE_CUDA_STANDARD_REQUIRED ON)
-    set(CUDA_ARCHITECTURES OFF)
+    set(CMAKE_CUDA_ARCHITECTURES OFF)
+endif()
+
+#openCL
+if(NOT TARGET OpenCL::OpenCL)
+    set(CMAKE_PREFIX_PATH "${OPENCL_ROOT}")
+    find_package(OpenCL)
+    if(OpenCL_FOUND)
+        set(BAYLIB_OPENCL 1)
+        add_compile_definitions(BAYLIB_OPENCL)
+        set_target_properties(OpenCL::OpenCL PROPERTIES INTERFACE_COMPILE_DEFINITIONS CL_TARGET_OPENCL_VERSION=220)
+        add_compile_definitions(BAYLIB_OPENCL)
+    else()
+        set(BAYLIB_OPENCL 0)
+    endif()
 endif()
 
 

diff --git a/baylib/CMakeLists.txt b/baylib/CMakeLists.txt
@@ -13,13 +13,6 @@ if(NOT TARGET Boost::boost)
 endif()
 
 set(threading=multi)
-
-#openCL
-if(NOT TARGET OpenCL::OpenCL)
-    set(CMAKE_PREFIX_PATH "${OPENCL_ROOT}")
-    find_package(OpenCL REQUIRED)
-    set_target_properties(OpenCL::OpenCL PROPERTIES INTERFACE_COMPILE_DEFINITIONS CL_TARGET_OPENCL_VERSION=220)
-endif()
 find_package(Threads REQUIRED)
 find_package(TBB REQUIRED)
 
@@ -36,9 +29,7 @@ set (src
     inference/abstract_inference_algorithm.hpp
     inference/gibbs_sampling.hpp
     inference/likelihood_weighting.hpp
-    inference/logic_sampling.hpp
     inference/rejection_sampling.hpp
-    inference/adaptive_importance_sampling.hpp
     network/bayesian_net.hpp
     network/bayesian_utils.hpp
     network/random_variable.hpp
@@ -55,19 +46,27 @@ set (src
     baylib_assert.h
     baylib_concepts.hpp
 )
+set(src_opencl
+    inference/opencl/logic_sampling_opencl.hpp
+    inference/opencl/adaptive_importance_sampling_opencl.hpp
+    inference/opencl/vectorized_inference_opencl.hpp
+)
 set (src_cuda
-        inference/cuda/samplers_cuda.cuh
-        inference/cuda/samplers_cuda.cu
-        inference/logic_sampling_cuda.hpp
-        inference/likelihood_weighting_cuda.hpp
-        tools/gpu/cuda_utils.cuh
-        tools/gpu/cuda_utils.cu
-        tools/gpu/cuda_graph_adapter.cuh
+    inference/cuda/samplers_cuda.cuh
+    inference/cuda/samplers_cuda.cu
+    inference/cuda/logic_sampling_cuda.hpp
+    inference/cuda/likelihood_weighting_cuda.hpp
+    tools/gpu/cuda_utils.cuh
+    tools/gpu/cuda_utils.cu
+    tools/gpu/cuda_graph_adapter.cuh
     )
 
 if(CMAKE_CUDA_COMPILER)
     list(APPEND src ${src_cuda})
 endif()
+if(BAYLIB_OPENCL)
+    list(APPEND src ${src_opencl})
+endif()
 
 set (BAYLIB_INCLUDE_LIBS ../baylib)
 
@@ -82,7 +81,8 @@ if(CMAKE_CUDA_COMPILER)
             PROPERTIES
             CUDA_SEPARABLE_COMPILATION ON
             POSITION_INDEPENDENT_CODE ON
-            LINKER_LANGUAGE CXX)
+            LINKER_LANGUAGE CXX
+        )
 endif()
 
 target_include_directories(baylib

diff --git a/baylib/inference/abstract_inference_algorithm.hpp b/baylib/inference/abstract_inference_algorithm.hpp
@@ -1,15 +1,11 @@
 #ifndef BAYLIB_ABSTRACT_INFERENCE_ALGORITHM_HPP
 #define BAYLIB_ABSTRACT_INFERENCE_ALGORITHM_HPP
 
-#define CL_TARGET_OPENCL_VERSION 220
+
 
 #include <baylib/network/bayesian_utils.hpp>
 #include <baylib/probability/marginal_distribution.hpp>
 #include <baylib/tools/random/random_generator.hpp>
-#include <baylib/tools/gpu/gpu_utils.hpp>
-#include <boost/compute/core.hpp>
-#include <boost/compute.hpp>
-#include <boost/compute/device.hpp>
 #include <future>
 #include <baylib/baylib_concepts.hpp>
 
@@ -65,9 +61,9 @@ namespace baylib {
 
             void set_seed(unsigned int _seed) { seed = _seed; }
 
-        protected:
             const network_type & bn;
             unsigned long nsamples;
+        protected:
             unsigned int seed;
         };
 
@@ -154,171 +150,6 @@ namespace baylib {
             unsigned int nthreads;
         };
 
-
-        namespace compute = boost::compute;
-        using boost::compute::lambda::_1;
-        using boost::compute::lambda::_2;
-        /**
-         * This class models an approximate inference algorithm
-         * vectorized with a GPGPU approach.
-         * the method simulate_node samples a node given the results of
-         * previous simulations of its parents nodes
-         * @tparam Network_  : the type of bayesian network
-         */
-        template < BNetDerived Network_ >
-        class vectorized_inference_algorithm : public inference_algorithm<Network_>
-        {
-        public:
-            using typename inference_algorithm<Network_>::network_type;
-            using typename inference_algorithm<Network_>::probability_type;
-            using  inference_algorithm<Network_>::bn;
-
-            vectorized_inference_algorithm(
-                    const network_type & bn,
-                    ulong n_samples,
-                    size_t memory,
-                    uint seed = 0,
-                    const compute::device &device = compute::system::default_device()
-            )
-            : inference_algorithm<Network_>(bn, n_samples, seed)
-            , memory(memory)
-            , device(device)
-            , context(device)
-            , queue(context, device)
-            , rand(queue, seed)
-            {}
-
-            using prob_v = boost::compute::vector<probability_type>;
-
-        protected:
-            compute::device device;
-            compute::context context;
-            compute::command_queue queue;
-            compute::default_random_engine rand;
-            size_t memory;
-
-            /**
-             * calculate the number of iterations needed for a complete simulation without exceeding the boundary set
-             * by the user
-             * @param bn network
-             * @return pair<number of samples per iteration, number of iteration>
-             */
-            std::pair<ulong, ulong> calculate_iterations()
-            {
-                ulong sample_p = this->memory / (bn.number_of_variables() * sizeof(probability_type) + 3 * sizeof(cl_ushort)) * MEMORY_SLACK / 100;
-                if(sample_p < this->nsamples)
-                    return {sample_p, this->nsamples / sample_p};
-                else
-                    return {this->nsamples, 1};
-            }
-
-            std::vector<probability_type> accumulate_cpt(ulong v_id, baylib::cow::cpt<probability_type> cpt) {
-                auto factory = baylib::condition_factory(bn, v_id, bn.parents_of(v_id));
-                std::vector<probability_type> flat_cpt{};
-                uint n_states = bn[v_id].table().number_of_states();
-                do {
-                    auto temp = cpt[factory.get()];
-                    flat_cpt.insert(flat_cpt.end(), temp.begin(), temp.end());
-                } while (factory.has_next());
-
-                for (baylib::state_t i = 0; i < flat_cpt.size(); i += n_states)
-                    for (baylib::state_t j = 1; j < n_states - 1; j++)
-                        flat_cpt[i + j] += flat_cpt[i + j - 1];
-                return flat_cpt;
-            }
-
-            /**
-             * Simulations of a specific node using opencl
-             * @param cpt cpt of the node
-             * @param parents_result results of previous simulate_node calls
-             * @param dim number of samples of the simulation
-             * @return result of the simulation
-             */
-            bcvec simulate_node(
-                    ulong v_id,
-                    const cow::cpt<probability_type> &cpt,
-                    std::vector<bcvec*> &parents_result,
-                    int dim
-            )
-            {
-                std::vector<probability_type> flat_cpt_accum = accumulate_cpt(v_id, cpt);
-                bcvec result(dim, cpt.number_of_states(), context);
-                prob_v device_cpt(flat_cpt_accum.size(), context);
-                prob_v threshold_vec(dim, context);
-                prob_v random_vec(dim, context);
-                compute::uniform_real_distribution<probability_type> distribution(0, 1);
-                compute::vector<int> index_vec(dim, context);
-
-                // Async copy of the cpt in gpu memory
-                compute::copy(flat_cpt_accum.begin(), flat_cpt_accum.end(), device_cpt.begin(), queue);
-
-                // cycle for deducing the row of the cpt given the parents state in the previous simulation
-                if(parents_result.empty())
-                    compute::fill(index_vec.begin(), index_vec.end(), 0, queue);
-                else {
-                    uint coeff = bn[v_id].table().number_of_states();
-                    for (int i = 0; i < parents_result.size(); i++) {
-                        if (i == 0)
-                            compute::transform(parents_result[i]->state.begin(),
-                                               parents_result[i]->state.end(),
-                                               index_vec.begin(),
-                                               _1 * coeff, queue);
-                        else
-                            compute::transform(parents_result[i]->state.begin(),
-                                               parents_result[i]->state.end(),
-                                               index_vec.begin(),
-                                               index_vec.begin(),
-                                               _1 * coeff + _2, queue);
-                        coeff *= parents_result[i]->cardinality;
-                    }
-                }
-
-                // get the threshold corresponding to the specific row of the cpt for every single simulation
-                compute::gather(index_vec.begin(),
-                                index_vec.end(),
-                                device_cpt.begin(),
-                                threshold_vec.begin(), queue);
-
-
-                // generate random vector
-                distribution.generate(random_vec.begin(),
-                                      random_vec.end(),
-                                      rand, queue);
-
-                // confront the random vector with the threshold
-                compute::transform(random_vec.begin(),
-                                   random_vec.end(),
-                                   threshold_vec.begin(),
-                                   result.state.begin(),
-                                   _1 > _2,
-                                   queue);
-
-                // generalization in case of more than 2 states
-                for (int i = 0; i + 2 < bn[v_id].table().number_of_states(); i++) {
-                    compute::vector<int> temp(dim, context);
-                    compute::transform(index_vec.begin(),
-                                       index_vec.end(),
-                                       index_vec.begin(),
-                                       _1 + 1, queue);
-                    compute::gather(index_vec.begin(),
-                                    index_vec.end(),
-                                    device_cpt.begin(),
-                                    threshold_vec.begin(), queue);
-                    compute::transform(random_vec.begin(),
-                                       random_vec.end(),
-                                       threshold_vec.begin(),
-                                       temp.begin(),
-                                       _1 > _2, queue);
-                    compute::transform(temp.begin(),
-                                       temp.end(),
-                                       result.state.begin(),
-                                       result.state.begin(),
-                                       _1 + _2, queue);
-                }
-
-                return result;
-            }
-        };
     } // namespace inference
 } // namespace baylib
 

diff --git a/...b/inference/likelihood_weighting_cuda.hpp → ...erence/cuda/likelihood_weighting_cuda.hpp b/...b/inference/likelihood_weighting_cuda.hpp → ...erence/cuda/likelihood_weighting_cuda.hpp
diff --git a/baylib/inference/logic_sampling_cuda.hpp → ...ib/inference/cuda/logic_sampling_cuda.hpp b/baylib/inference/logic_sampling_cuda.hpp → ...ib/inference/cuda/logic_sampling_cuda.hpp
@@ -10,6 +10,7 @@
 #include <baylib/inference/cuda/samplers_cuda.cuh>
 #include <baylib/tools/gpu/cuda_utils.cuh>
 #include <baylib/network/bayesian_utils.hpp>
+#include <baylib/tools/gpu/gpu_utils.hpp>
 
 //! \file logic_sampling_cuda.hpp
 //! \brief Logic Sampling implementation with cuda optimization
@@ -51,13 +52,13 @@ namespace baylib {
              * @return : marginal distribution
              */
             baylib::marginal_distribution<probability_type> make_inference(){
-                cuda_graph_adapter<probability_type> graph = make_cuda_graph_revised<probability_type>(this->bn);
+                cuda_graph_adapter<probability_type> graph = baylib::make_cuda_graph_revised<probability_type>(this->bn);
                 bool evidence = evidence_presence(this->bn);
                 auto vertex_queue = baylib::sampling_order(this->bn);
                 std::vector<uint> result_line = logic_sampler(
                         graph, vertex_queue, this->nsamples, evidence, this->seed
                         );
-                auto result = reshape_marginal<probability_type>(this->bn, vertex_queue, result_line);
+                auto result = baylib::reshape_marginal<probability_type>(this->bn, vertex_queue, result_line);
                 result.normalize();
                 return result;
             }