diff --git a/benchmarks/filters/radius_outlier_removal.cpp b/benchmarks/filters/radius_outlier_removal.cpp index 66aa527d1a5..d8131fe3806 100644 --- a/benchmarks/filters/radius_outlier_removal.cpp +++ b/benchmarks/filters/radius_outlier_removal.cpp @@ -15,6 +15,7 @@ BM_RadiusOutlierRemoval(benchmark::State& state, const std::string& file) ror.setInputCloud(cloud); ror.setRadiusSearch(0.02); ror.setMinNeighborsInRadius(14); + ror.setNumberOfThreads(1); pcl::PointCloud::Ptr cloud_voxelized( new pcl::PointCloud); diff --git a/common/include/pcl/impl/pcl_base.hpp b/common/include/pcl/impl/pcl_base.hpp index 93d6c3844cb..31e44ad388c 100644 --- a/common/include/pcl/impl/pcl_base.hpp +++ b/common/include/pcl/impl/pcl_base.hpp @@ -167,6 +167,15 @@ pcl::PCLBase::initCompute () for (auto i = indices_size; i < indices_->size (); ++i) { (*indices_)[i] = i; } } + // Set the number of threads +#ifdef _OPENMP + num_threads_ = num_threads_ != 0 ? num_threads_ : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + return (true); } diff --git a/common/include/pcl/pcl_base.h b/common/include/pcl/pcl_base.h index ebf294b332b..2a11e49bc78 100644 --- a/common/include/pcl/pcl_base.h +++ b/common/include/pcl/pcl_base.h @@ -155,6 +155,11 @@ namespace pcl /** \brief If no set of indices are given, we construct a set of fake indices that mimic the input PointCloud. */ bool fake_indices_; + /** + * @brief Number of threads used if the algorithm supports parallelization + */ + unsigned int num_threads_{0}; + /** \brief This method should get called before starting the actual computation. * * Internally, initCompute() does the following: @@ -233,6 +238,11 @@ namespace pcl /** \brief If no set of indices are given, we construct a set of fake indices that mimic the input PointCloud. */ bool fake_indices_; + /** + * @brief Number of threads used during filtering + */ + unsigned int num_threads_{0}; + /** \brief The size of each individual field. */ std::vector field_sizes_; diff --git a/common/src/pcl_base.cpp b/common/src/pcl_base.cpp index 5a482c2dc90..898a5e6bc81 100644 --- a/common/src/pcl_base.cpp +++ b/common/src/pcl_base.cpp @@ -130,6 +130,15 @@ pcl::PCLBase::initCompute () std::iota(indices_->begin () + indices_size, indices_->end (), indices_size); } + // Set the number of threads +#ifdef _OPENMP + num_threads_ = num_threads_ != 0 ? num_threads_ : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + return (true); } diff --git a/features/include/pcl/features/fpfh_omp.h b/features/include/pcl/features/fpfh_omp.h index 2b7de4eaf96..da61659285f 100644 --- a/features/include/pcl/features/fpfh_omp.h +++ b/features/include/pcl/features/fpfh_omp.h @@ -92,20 +92,23 @@ namespace pcl using PointCloudOut = typename Feature::PointCloudOut; /** \brief Initialize the scheduler and set the number of threads to use. - * \param[in] nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param[in] num_threads the number of hardware threads to use (0 sets the value back to automatic) */ - FPFHEstimationOMP (unsigned int nr_threads = 0) + FPFHEstimationOMP (unsigned int num_threads = 0) { feature_name_ = "FPFHEstimationOMP"; - setNumberOfThreads(nr_threads); + setNumberOfThreads(num_threads); } /** \brief Initialize the scheduler and set the number of threads to use. - * \param[in] nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param[in] num_threads the number of hardware threads to use (0 sets the value back to automatic) */ void - setNumberOfThreads (unsigned int nr_threads = 0); + setNumberOfThreads (unsigned int num_threads = 0); + + protected: + using PCLBase::num_threads_; private: /** \brief Estimate the Fast Point Feature Histograms (FPFH) descriptors at a set of points given by @@ -119,9 +122,6 @@ namespace pcl public: /** \brief The number of subdivisions for each angular feature interval. */ int nr_bins_f1_{11}, nr_bins_f2_{11}, nr_bins_f3_{11}; - private: - /** \brief The number of threads the scheduler should use. */ - unsigned int threads_; }; } diff --git a/features/include/pcl/features/impl/fpfh_omp.hpp b/features/include/pcl/features/impl/fpfh_omp.hpp index a81a4b00cb3..797d6841f53 100644 --- a/features/include/pcl/features/impl/fpfh_omp.hpp +++ b/features/include/pcl/features/impl/fpfh_omp.hpp @@ -49,16 +49,15 @@ ////////////////////////////////////////////////////////////////////////////////////////////// template void -pcl::FPFHEstimationOMP::setNumberOfThreads (unsigned int nr_threads) +pcl::FPFHEstimationOMP::setNumberOfThreads (unsigned int num_threads) { - if (nr_threads == 0) #ifdef _OPENMP - threads_ = omp_get_num_procs(); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else - threads_ = 1; + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } #endif - else - threads_ = nr_threads; } ////////////////////////////////////////////////////////////////////////////////////////////// @@ -112,7 +111,7 @@ pcl::FPFHEstimationOMP::computeFeature (PointCloud default(none) \ shared(spfh_hist_lookup, spfh_indices_vec) \ firstprivate(nn_indices, nn_dists) \ - num_threads(threads_) + num_threads(num_threads_) for (std::ptrdiff_t i = 0; i < static_cast (spfh_indices_vec.size ()); ++i) { // Get the next point index @@ -141,7 +140,7 @@ pcl::FPFHEstimationOMP::computeFeature (PointCloud default(none) \ shared(nr_bins, output, spfh_hist_lookup) \ firstprivate(nn_dists, nn_indices) \ - num_threads(threads_) + num_threads(num_threads_) for (std::ptrdiff_t idx = 0; idx < static_cast (indices_->size ()); ++idx) { // Find the indices of point idx's neighbors... diff --git a/features/include/pcl/features/impl/intensity_gradient.hpp b/features/include/pcl/features/impl/intensity_gradient.hpp index d9e7baf3de4..ccf706ffd74 100644 --- a/features/include/pcl/features/impl/intensity_gradient.hpp +++ b/features/include/pcl/features/impl/intensity_gradient.hpp @@ -149,13 +149,6 @@ pcl::IntensityGradientEstimation nn_dists (k_); output.is_dense = true; -#ifdef _OPENMP - if (threads_ == 0) { - threads_ = omp_get_num_procs(); - PCL_DEBUG ("[pcl::IntensityGradientEstimation::computeFeature] Setting number of threads to %u.\n", threads_); - } -#endif // _OPENMP - // If the data is dense, we don't need to check for NaN if (surface_->is_dense) { @@ -163,7 +156,7 @@ pcl::IntensityGradientEstimation (indices_->size ()); ++idx) { @@ -203,7 +196,7 @@ pcl::IntensityGradientEstimation (indices_->size ()); ++idx) { diff --git a/features/include/pcl/features/impl/normal_3d_omp.hpp b/features/include/pcl/features/impl/normal_3d_omp.hpp index 736b5c8d2fe..2f01fd8d9f3 100644 --- a/features/include/pcl/features/impl/normal_3d_omp.hpp +++ b/features/include/pcl/features/impl/normal_3d_omp.hpp @@ -45,19 +45,15 @@ /////////////////////////////////////////////////////////////////////////////////////////// template void -pcl::NormalEstimationOMP::setNumberOfThreads (unsigned int nr_threads) +pcl::NormalEstimationOMP::setNumberOfThreads (unsigned int num_threads) { #ifdef _OPENMP - if (nr_threads == 0) - threads_ = omp_get_num_procs(); - else - threads_ = nr_threads; - PCL_DEBUG ("[pcl::NormalEstimationOMP::setNumberOfThreads] Setting number of threads to %u.\n", threads_); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else - threads_ = 1; - if (nr_threads != 1) - PCL_WARN ("[pcl::NormalEstimationOMP::setNumberOfThreads] Parallelization is requested, but OpenMP is not available! Continuing without parallelization.\n"); -#endif // _OPENMP + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif } /////////////////////////////////////////////////////////////////////////////////////////// @@ -77,7 +73,7 @@ pcl::NormalEstimationOMP::computeFeature (PointCloudOut &ou default(none) \ shared(output) \ firstprivate(nn_indices, nn_dists) \ - num_threads(threads_) \ + num_threads(num_threads_) \ schedule(dynamic, chunk_size_) // Iterating over the entire index vector for (std::ptrdiff_t idx = 0; idx < static_cast (indices_->size ()); ++idx) @@ -107,7 +103,7 @@ pcl::NormalEstimationOMP::computeFeature (PointCloudOut &ou default(none) \ shared(output) \ firstprivate(nn_indices, nn_dists) \ - num_threads(threads_) \ + num_threads(num_threads_) \ schedule(dynamic, chunk_size_) // Iterating over the entire index vector for (std::ptrdiff_t idx = 0; idx < static_cast (indices_->size ()); ++idx) diff --git a/features/include/pcl/features/impl/principal_curvatures.hpp b/features/include/pcl/features/impl/principal_curvatures.hpp index 9659aae56f4..7e95b6dbcbc 100644 --- a/features/include/pcl/features/impl/principal_curvatures.hpp +++ b/features/include/pcl/features/impl/principal_curvatures.hpp @@ -47,19 +47,15 @@ /////////////////////////////////////////////////////////////////////////////////////////// template void -pcl::PrincipalCurvaturesEstimation::setNumberOfThreads (unsigned int nr_threads) +pcl::PrincipalCurvaturesEstimation::setNumberOfThreads (unsigned int num_threads) { #ifdef _OPENMP - if (nr_threads == 0) - threads_ = omp_get_num_procs(); - else - threads_ = nr_threads; - PCL_DEBUG ("[pcl::PrincipalCurvaturesEstimation::setNumberOfThreads] Setting number of threads to %u.\n", threads_); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else - threads_ = 1; - if (nr_threads != 1) - PCL_WARN ("[pcl::PrincipalCurvaturesEstimation::setNumberOfThreads] Parallelization is requested, but OpenMP is not available! Continuing without parallelization.\n"); -#endif // _OPENMP + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif } ////////////////////////////////////////////////////////////////////////////////////////////// @@ -142,7 +138,7 @@ pcl::PrincipalCurvaturesEstimation::computeFeature default(none) \ shared(output) \ firstprivate(nn_indices, nn_dists) \ - num_threads(threads_) \ + num_threads(num_threads_) \ schedule(dynamic, chunk_size_) // Iterating over the entire index vector for (std::ptrdiff_t idx = 0; idx < static_cast (indices_->size ()); ++idx) @@ -167,7 +163,7 @@ pcl::PrincipalCurvaturesEstimation::computeFeature default(none) \ shared(output) \ firstprivate(nn_indices, nn_dists) \ - num_threads(threads_) \ + num_threads(num_threads_) \ schedule(dynamic, chunk_size_) // Iterating over the entire index vector for (std::ptrdiff_t idx = 0; idx < static_cast (indices_->size ()); ++idx) diff --git a/features/include/pcl/features/impl/shot_lrf_omp.hpp b/features/include/pcl/features/impl/shot_lrf_omp.hpp index 99895a8bf39..138fccee19b 100644 --- a/features/include/pcl/features/impl/shot_lrf_omp.hpp +++ b/features/include/pcl/features/impl/shot_lrf_omp.hpp @@ -45,16 +45,15 @@ ////////////////////////////////////////////////////////////////////////////////////////////// template void -pcl::SHOTLocalReferenceFrameEstimationOMP::setNumberOfThreads (unsigned int nr_threads) +pcl::SHOTLocalReferenceFrameEstimationOMP::setNumberOfThreads (unsigned int num_threads) { - if (nr_threads == 0) #ifdef _OPENMP - threads_ = omp_get_num_procs(); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else - threads_ = 1; + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } #endif - else - threads_ = nr_threads; } ////////////////////////////////////////////////////////////////////////////////////////////// @@ -74,7 +73,7 @@ pcl::SHOTLocalReferenceFrameEstimationOMP::computeFeature ( #pragma omp parallel for \ default(none) \ shared(output) \ - num_threads(threads_) \ + num_threads(num_threads_) \ schedule(dynamic, 64) for (std::ptrdiff_t i = 0; i < static_cast (indices_->size ()); ++i) { diff --git a/features/include/pcl/features/impl/shot_omp.hpp b/features/include/pcl/features/impl/shot_omp.hpp index 4a0a01a1988..0db6146c16f 100644 --- a/features/include/pcl/features/impl/shot_omp.hpp +++ b/features/include/pcl/features/impl/shot_omp.hpp @@ -69,7 +69,7 @@ pcl::SHOTEstimationOMP::initCompute () lrf_estimator->setRadiusSearch ((lrf_radius_ > 0 ? lrf_radius_ : search_radius_)); lrf_estimator->setInputCloud (input_); lrf_estimator->setIndices (indices_); - lrf_estimator->setNumberOfThreads(threads_); + lrf_estimator->setNumberOfThreads(num_threads_); if (!fake_surface_) lrf_estimator->setSearchSurface(surface_); @@ -107,7 +107,7 @@ pcl::SHOTColorEstimationOMP::initCompute lrf_estimator->setRadiusSearch ((lrf_radius_ > 0 ? lrf_radius_ : search_radius_)); lrf_estimator->setInputCloud (input_); lrf_estimator->setIndices (indices_); - lrf_estimator->setNumberOfThreads(threads_); + lrf_estimator->setNumberOfThreads(num_threads_); if (!fake_surface_) lrf_estimator->setSearchSurface(surface_); @@ -123,16 +123,15 @@ pcl::SHOTColorEstimationOMP::initCompute ////////////////////////////////////////////////////////////////////////////////////////////// template void -pcl::SHOTEstimationOMP::setNumberOfThreads (unsigned int nr_threads) +pcl::SHOTEstimationOMP::setNumberOfThreads (unsigned int num_threads) { - if (nr_threads == 0) #ifdef _OPENMP - threads_ = omp_get_num_procs(); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else - threads_ = 1; + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } #endif - else - threads_ = nr_threads; } ////////////////////////////////////////////////////////////////////////////////////////////// @@ -153,7 +152,7 @@ pcl::SHOTEstimationOMP::computeFeature ( #pragma omp parallel for \ default(none) \ shared(output) \ - num_threads(threads_) + num_threads(num_threads_) for (std::ptrdiff_t idx = 0; idx < static_cast (indices_->size ()); ++idx) { @@ -206,16 +205,15 @@ pcl::SHOTEstimationOMP::computeFeature ( ////////////////////////////////////////////////////////////////////////////////////////////// template void -pcl::SHOTColorEstimationOMP::setNumberOfThreads (unsigned int nr_threads) +pcl::SHOTColorEstimationOMP::setNumberOfThreads (unsigned int num_threads) { - if (nr_threads == 0) #ifdef _OPENMP - threads_ = omp_get_num_procs(); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else - threads_ = 1; + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } #endif - else - threads_ = nr_threads; } ////////////////////////////////////////////////////////////////////////////////////////////// @@ -240,7 +238,7 @@ pcl::SHOTColorEstimationOMP::computeFeat #pragma omp parallel for \ default(none) \ shared(output) \ - num_threads(threads_) + num_threads(num_threads_) for (std::ptrdiff_t idx = 0; idx < static_cast (indices_->size ()); ++idx) { Eigen::VectorXf shot; diff --git a/features/include/pcl/features/intensity_gradient.h b/features/include/pcl/features/intensity_gradient.h index b0f2d75fdb9..057fce6b8a8 100644 --- a/features/include/pcl/features/intensity_gradient.h +++ b/features/include/pcl/features/intensity_gradient.h @@ -75,10 +75,20 @@ namespace pcl } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ inline void - setNumberOfThreads (unsigned int nr_threads = 0) { threads_ = nr_threads; } + setNumberOfThreads(unsigned int num_threads = 0) + { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } protected: /** \brief Estimate the intensity gradients for a set of points given in using @@ -104,11 +114,10 @@ namespace pcl const Eigen::Vector3f &normal, Eigen::Vector3f &gradient); - protected: + using PCLBase::num_threads_; + ///intensity field accessor structure IntensitySelectorT intensity_; - ///number of threads to be used, default 0 (auto) - unsigned int threads_{0}; }; } diff --git a/features/include/pcl/features/normal_3d_omp.h b/features/include/pcl/features/normal_3d_omp.h index ba10bb76d8d..e54f9620ac2 100644 --- a/features/include/pcl/features/normal_3d_omp.h +++ b/features/include/pcl/features/normal_3d_omp.h @@ -71,26 +71,24 @@ namespace pcl public: /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) * \param chunk_size PCL will use dynamic scheduling with this chunk size. Setting it too low will lead to more parallelization overhead. Setting it too high will lead to a worse balancing between the threads. */ - NormalEstimationOMP (unsigned int nr_threads = 0, int chunk_size = 256): chunk_size_(chunk_size) + NormalEstimationOMP (unsigned int num_threads = 0, int chunk_size = 256): chunk_size_(chunk_size) { feature_name_ = "NormalEstimationOMP"; - setNumberOfThreads(nr_threads); + setNumberOfThreads(num_threads); } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ void - setNumberOfThreads (unsigned int nr_threads = 0); + setNumberOfThreads (unsigned int num_threads = 0); protected: - /** \brief The number of threads the scheduler should use. */ - unsigned int threads_; - + using PCLBase::num_threads_; /** \brief Chunk size for (dynamic) scheduling. */ int chunk_size_; private: diff --git a/features/include/pcl/features/principal_curvatures.h b/features/include/pcl/features/principal_curvatures.h index f7285a113ce..216c41bb0cd 100644 --- a/features/include/pcl/features/principal_curvatures.h +++ b/features/include/pcl/features/principal_curvatures.h @@ -73,17 +73,17 @@ namespace pcl using PointCloudIn = pcl::PointCloud; /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value to automatic) * \param chunk_size PCL will use dynamic scheduling with this chunk size. Setting it too * low will lead to more parallelization overhead. Setting it too high * will lead to a worse balancing between the threads. */ - PrincipalCurvaturesEstimation (unsigned int nr_threads = 1, int chunk_size = 256) : + PrincipalCurvaturesEstimation (unsigned int num_threads = 1, int chunk_size = 256) : chunk_size_(chunk_size) { feature_name_ = "PrincipalCurvaturesEstimation"; - setNumberOfThreads(nr_threads); + setNumberOfThreads(num_threads); }; /** \brief Perform Principal Components Analysis (PCA) on the point normals of a surface patch in the tangent @@ -105,15 +105,13 @@ namespace pcl /** \brief Initialize the scheduler and set the number of threads to use. The default behavior is * single threaded exectution - * \param nr_threads the number of hardware threads to use (0 sets the value to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value to automatic) */ void - setNumberOfThreads (unsigned int nr_threads); + setNumberOfThreads (unsigned int num_threads = 0); protected: - /** \brief The number of threads the scheduler should use. */ - unsigned int threads_; - + using PCLBase::num_threads_; /** \brief Chunk size for (dynamic) scheduling. */ int chunk_size_; diff --git a/features/include/pcl/features/shot_lrf_omp.h b/features/include/pcl/features/shot_lrf_omp.h index 1c32ccffea9..96d4363973f 100644 --- a/features/include/pcl/features/shot_lrf_omp.h +++ b/features/include/pcl/features/shot_lrf_omp.h @@ -80,12 +80,13 @@ namespace pcl ~SHOTLocalReferenceFrameEstimationOMP () override = default; /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ void - setNumberOfThreads (unsigned int nr_threads = 0); + setNumberOfThreads (unsigned int num_threads = 0); protected: + using PCLBase::num_threads_; using Feature::feature_name_; using Feature::getClassName; //using Feature::searchForNeighbors; @@ -103,10 +104,6 @@ namespace pcl */ void computeFeature (PointCloudOut &output) override; - - /** \brief The number of threads the scheduler should use. */ - unsigned int threads_; - }; } diff --git a/features/include/pcl/features/shot_omp.h b/features/include/pcl/features/shot_omp.h index 59d48eaac17..5b8e7ad1860 100644 --- a/features/include/pcl/features/shot_omp.h +++ b/features/include/pcl/features/shot_omp.h @@ -95,18 +95,19 @@ namespace pcl using PointCloudIn = typename Feature::PointCloudIn; /** \brief Empty constructor. */ - SHOTEstimationOMP (unsigned int nr_threads = 0) : SHOTEstimation () + SHOTEstimationOMP (unsigned int num_threads = 0) : SHOTEstimation () { - setNumberOfThreads(nr_threads); + setNumberOfThreads(num_threads); }; /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ void - setNumberOfThreads (unsigned int nr_threads = 0); + setNumberOfThreads (unsigned int num_threads = 0); protected: + using PCLBase::num_threads_; /** \brief Estimate the Signatures of Histograms of OrienTations (SHOT) descriptors at a set of points given by * using the surface in setSearchSurface () and the spatial locator in @@ -119,9 +120,6 @@ namespace pcl /** \brief This method should get called before starting the actual computation. */ bool initCompute () override; - - /** \brief The number of threads the scheduler should use. */ - unsigned int threads_; }; /** \brief SHOTColorEstimationOMP estimates the Signature of Histograms of OrienTations (SHOT) descriptor for a given point cloud dataset @@ -179,20 +177,20 @@ namespace pcl /** \brief Empty constructor. */ SHOTColorEstimationOMP (bool describe_shape = true, bool describe_color = true, - unsigned int nr_threads = 0) + unsigned int num_threads = 0) : SHOTColorEstimation (describe_shape, describe_color) { - setNumberOfThreads(nr_threads); + setNumberOfThreads(num_threads); } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ void - setNumberOfThreads (unsigned int nr_threads = 0); + setNumberOfThreads (unsigned int num_threads = 0); protected: - + using PCLBase::num_threads_; /** \brief Estimate the Signatures of Histograms of OrienTations (SHOT) descriptors at a set of points given by * using the surface in setSearchSurface () and the spatial locator in * setSearchMethod () @@ -204,9 +202,6 @@ namespace pcl /** \brief This method should get called before starting the actual computation. */ bool initCompute () override; - - /** \brief The number of threads the scheduler should use. */ - unsigned int threads_; }; } diff --git a/filters/include/pcl/filters/convolution.h b/filters/include/pcl/filters/convolution.h index 3a817bbc087..3731803af59 100644 --- a/filters/include/pcl/filters/convolution.h +++ b/filters/include/pcl/filters/convolution.h @@ -121,10 +121,20 @@ namespace pcl inline const float & getDistanceThreshold () const { return (distance_threshold_); } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ inline void - setNumberOfThreads (unsigned int nr_threads = 0) { threads_ = nr_threads; } + setNumberOfThreads(unsigned int num_threads = 0) + { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } /** Convolve a float image rows by a given kernel. * \param[out] output the convolved cloud * \note if output doesn't fit in input i.e. output.rows () < input.rows () or @@ -216,9 +226,9 @@ namespace pcl int half_width_{}; /// kernel size - 1 int kernel_width_{}; + /// number of threads to use + unsigned int num_threads_{0}; protected: - /** \brief The number of threads the scheduler should use. */ - unsigned int threads_{1}; void makeInfinite (PointOut& p) diff --git a/filters/include/pcl/filters/convolution_3d.h b/filters/include/pcl/filters/convolution_3d.h index ec6f0fc609c..b4b623016c4 100644 --- a/filters/include/pcl/filters/convolution_3d.h +++ b/filters/include/pcl/filters/convolution_3d.h @@ -211,10 +211,20 @@ namespace pcl Convolution3D (); /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ inline void - setNumberOfThreads (unsigned int nr_threads = 0) { threads_ = nr_threads; } + setNumberOfThreads(unsigned int num_threads = 0) + { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } /** \brief Set convolving kernel * \param[in] kernel convolving element @@ -259,6 +269,7 @@ namespace pcl convolve (PointCloudOut& output); protected: + using Base::num_threads_; /** \brief initialize computation */ bool initCompute (); @@ -271,9 +282,6 @@ namespace pcl /** \brief The nearest neighbors search radius for each point. */ double search_radius_; - /** \brief number of threads */ - unsigned int threads_{1}; - /** \brief convlving kernel */ KernelT kernel_; }; diff --git a/filters/include/pcl/filters/fast_bilateral_omp.h b/filters/include/pcl/filters/fast_bilateral_omp.h index 7725a993060..ff30b1ad37c 100644 --- a/filters/include/pcl/filters/fast_bilateral_omp.h +++ b/filters/include/pcl/filters/fast_bilateral_omp.h @@ -57,6 +57,7 @@ namespace pcl class FastBilateralFilterOMP : public FastBilateralFilter { protected: + using PCLBase::num_threads_; using FastBilateralFilter::input_; using FastBilateralFilter::sigma_s_; using FastBilateralFilter::sigma_r_; @@ -71,27 +72,22 @@ namespace pcl using ConstPtr = shared_ptr >; /** \brief Empty constructor. */ - FastBilateralFilterOMP (unsigned int nr_threads = 0) + FastBilateralFilterOMP (unsigned int num_threads = 0) { - setNumberOfThreads(nr_threads); + setNumberOfThreads(num_threads); } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ void - setNumberOfThreads (unsigned int nr_threads = 0); + setNumberOfThreads (unsigned int num_threads = 0); /** \brief Filter the input data and store the results into output. * \param[out] output the resultant point cloud */ void applyFilter (PointCloud &output) override; - - protected: - /** \brief The number of threads the scheduler should use. */ - unsigned int threads_; - }; } diff --git a/filters/include/pcl/filters/impl/convolution.hpp b/filters/include/pcl/filters/impl/convolution.hpp index 087fa9001f0..a1f499109ad 100644 --- a/filters/include/pcl/filters/impl/convolution.hpp +++ b/filters/include/pcl/filters/impl/convolution.hpp @@ -287,7 +287,7 @@ Convolution::convolve_rows (PointCloudOut& output) #pragma omp parallel for \ default(none) \ shared(height, last, output, width) \ - num_threads(threads_) + num_threads(num_threads_) for(int j = 0; j < height; ++j) { for (int i = 0; i < half_width_; ++i) @@ -305,7 +305,7 @@ Convolution::convolve_rows (PointCloudOut& output) #pragma omp parallel for \ default(none) \ shared(height, last, output, width) \ - num_threads(threads_) + num_threads(num_threads_) for(int j = 0; j < height; ++j) { for (int i = 0; i < half_width_; ++i) @@ -334,7 +334,7 @@ Convolution::convolve_rows_duplicate (PointCloudOut& output) #pragma omp parallel for \ default(none) \ shared(height, last, output, w, width) \ - num_threads(threads_) + num_threads(num_threads_) for(int j = 0; j < height; ++j) { for (int i = half_width_; i < last; ++i) @@ -352,7 +352,7 @@ Convolution::convolve_rows_duplicate (PointCloudOut& output) #pragma omp parallel for \ default(none) \ shared(height, last, output, w, width) \ - num_threads(threads_) + num_threads(num_threads_) for(int j = 0; j < height; ++j) { for (int i = half_width_; i < last; ++i) @@ -381,7 +381,7 @@ Convolution::convolve_rows_mirror (PointCloudOut& output) #pragma omp parallel for \ default(none) \ shared(height, last, output, w, width) \ - num_threads(threads_) + num_threads(num_threads_) for(int j = 0; j < height; ++j) { for (int i = half_width_; i < last; ++i) @@ -399,7 +399,7 @@ Convolution::convolve_rows_mirror (PointCloudOut& output) #pragma omp parallel for \ default(none) \ shared(height, last, output, w, width) \ - num_threads(threads_) + num_threads(num_threads_) for(int j = 0; j < height; ++j) { for (int i = half_width_; i < last; ++i) @@ -427,7 +427,7 @@ Convolution::convolve_cols (PointCloudOut& output) #pragma omp parallel for \ default(none) \ shared(height, last, output, width) \ - num_threads(threads_) + num_threads(num_threads_) for(int i = 0; i < width; ++i) { for (int j = 0; j < half_width_; ++j) @@ -445,7 +445,7 @@ Convolution::convolve_cols (PointCloudOut& output) #pragma omp parallel for \ default(none) \ shared(height, last, output, width) \ - num_threads(threads_) + num_threads(num_threads_) for(int i = 0; i < width; ++i) { for (int j = 0; j < half_width_; ++j) @@ -474,7 +474,7 @@ Convolution::convolve_cols_duplicate (PointCloudOut& output) #pragma omp parallel for \ default(none) \ shared(h, height, last, output, width) \ - num_threads(threads_) + num_threads(num_threads_) for(int i = 0; i < width; ++i) { for (int j = half_width_; j < last; ++j) @@ -492,7 +492,7 @@ Convolution::convolve_cols_duplicate (PointCloudOut& output) #pragma omp parallel for \ default(none) \ shared(h, height, last, output, width) \ - num_threads(threads_) + num_threads(num_threads_) for(int i = 0; i < width; ++i) { for (int j = half_width_; j < last; ++j) @@ -521,7 +521,7 @@ Convolution::convolve_cols_mirror (PointCloudOut& output) #pragma omp parallel for \ default(none) \ shared(h, height, last, output, width) \ - num_threads(threads_) + num_threads(num_threads_) for(int i = 0; i < width; ++i) { for (int j = half_width_; j < last; ++j) @@ -539,7 +539,7 @@ Convolution::convolve_cols_mirror (PointCloudOut& output) #pragma omp parallel for \ default(none) \ shared(h, height, last, output, width) \ - num_threads(threads_) + num_threads(num_threads_) for(int i = 0; i < width; ++i) { for (int j = half_width_; j < last; ++j) diff --git a/filters/include/pcl/filters/impl/convolution_3d.hpp b/filters/include/pcl/filters/impl/convolution_3d.hpp index 1001bce7319..ab6561cdf92 100644 --- a/filters/include/pcl/filters/impl/convolution_3d.hpp +++ b/filters/include/pcl/filters/impl/convolution_3d.hpp @@ -249,7 +249,7 @@ pcl::filters::Convolution3D::convolve (PointCloudO default(none) \ shared(output) \ firstprivate(nn_indices, nn_distances) \ - num_threads(threads_) \ + num_threads(num_threads_) \ schedule(dynamic, 64) for (std::int64_t point_idx = 0; point_idx < static_cast (surface_->size ()); ++point_idx) { diff --git a/filters/include/pcl/filters/impl/fast_bilateral_omp.hpp b/filters/include/pcl/filters/impl/fast_bilateral_omp.hpp index 1a7f544a477..de331cdfaef 100644 --- a/filters/include/pcl/filters/impl/fast_bilateral_omp.hpp +++ b/filters/include/pcl/filters/impl/fast_bilateral_omp.hpp @@ -45,16 +45,15 @@ ////////////////////////////////////////////////////////////////////////////////////////////// template void -pcl::FastBilateralFilterOMP::setNumberOfThreads (unsigned int nr_threads) +pcl::FastBilateralFilterOMP::setNumberOfThreads (unsigned int num_threads) { - if (nr_threads == 0) #ifdef _OPENMP - threads_ = omp_get_num_procs(); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else - threads_ = 1; + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } #endif - else - threads_ = nr_threads; } ////////////////////////////////////////////////////////////////////////////////////////////// @@ -88,7 +87,7 @@ pcl::FastBilateralFilterOMP::applyFilter (PointCloud &output) #pragma omp parallel for \ default(none) \ shared(base_min, base_max, output) \ - num_threads(threads_) + num_threads(num_threads_) for (long int i = 0; i < static_cast (output.size ()); ++i) if (!std::isfinite (output.at(i).z)) output.at(i).z = base_max; @@ -107,12 +106,12 @@ pcl::FastBilateralFilterOMP::applyFilter (PointCloud &output) #pragma omp parallel for \ default(none) \ shared(base_min, data, output) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(base_min, data, output, small_height, small_width) \ - num_threads(threads_) + num_threads(num_threads_) #endif for (long int i = 0; i < static_cast (small_width * small_height); ++i) { @@ -156,12 +155,12 @@ pcl::FastBilateralFilterOMP::applyFilter (PointCloud &output) #pragma omp parallel for \ default(none) \ shared(current_buffer, current_data, dim, offset) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(current_buffer, current_data, dim, offset, small_depth, small_height, small_width) \ - num_threads(threads_) + num_threads(num_threads_) #endif for(long int i = 0; i < static_cast ((small_width - 2)*(small_height - 2)); ++i) { @@ -188,7 +187,7 @@ pcl::FastBilateralFilterOMP::applyFilter (PointCloud &output) #pragma omp parallel for \ default(none) \ shared(base_min, data, output) \ - num_threads(threads_) + num_threads(num_threads_) for (long int i = 0; i < static_cast (input_->size ()); ++i) { auto x = static_cast (i % input_->width); @@ -205,7 +204,7 @@ pcl::FastBilateralFilterOMP::applyFilter (PointCloud &output) #pragma omp parallel for \ default(none) \ shared(base_min, data, output) \ - num_threads(threads_) + num_threads(num_threads_) for (long i = 0; i < static_cast (input_->size ()); ++i) { auto x = static_cast (i % input_->width); diff --git a/filters/include/pcl/filters/impl/pyramid.hpp b/filters/include/pcl/filters/impl/pyramid.hpp index dc47d935320..83f06f69906 100644 --- a/filters/include/pcl/filters/impl/pyramid.hpp +++ b/filters/include/pcl/filters/impl/pyramid.hpp @@ -125,7 +125,7 @@ Pyramid::compute (std::vector& output) #pragma omp parallel for \ default(none) \ shared(next) \ - num_threads(threads_) + num_threads(num_threads_) for(int i=0; i < next.height; ++i) { for(int j=0; j < next.width; ++j) @@ -161,7 +161,7 @@ Pyramid::compute (std::vector& output) #pragma omp parallel for \ default(none) \ shared(next) \ - num_threads(threads_) + num_threads(num_threads_) for(int i=0; i < next.height; ++i) { for(int j=0; j < next.width; ++j) diff --git a/filters/include/pcl/filters/pyramid.h b/filters/include/pcl/filters/pyramid.h index 107c83000c4..6ac26c5b6d8 100644 --- a/filters/include/pcl/filters/pyramid.h +++ b/filters/include/pcl/filters/pyramid.h @@ -94,10 +94,20 @@ namespace pcl getNumberOfLevels () const { return (levels_); } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic). + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic). */ inline void - setNumberOfThreads (unsigned int nr_threads = 0) { threads_ = nr_threads; } + setNumberOfThreads(unsigned int num_threads = 0) + { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } /** \brief Choose a larger smoothing kernel for enhanced smoothing. * \param large if true large smoothng kernel will be used. @@ -126,8 +136,10 @@ namespace pcl inline const std::string& getClassName () const { return (name_); } - + private: + /// \brief number of threads + unsigned int num_threads_{0}; /// \brief init computation bool @@ -154,9 +166,6 @@ namespace pcl Eigen::MatrixXf kernel_; /// Threshold distance between adjacent points float threshold_{0.01f}; - /// \brief number of threads - unsigned int threads_{0}; - public: PCL_MAKE_ALIGNED_OPERATOR_NEW }; diff --git a/filters/include/pcl/filters/radius_outlier_removal.h b/filters/include/pcl/filters/radius_outlier_removal.h index 272d091b015..ae67e84163a 100644 --- a/filters/include/pcl/filters/radius_outlier_removal.h +++ b/filters/include/pcl/filters/radius_outlier_removal.h @@ -72,6 +72,7 @@ namespace pcl class RadiusOutlierRemoval : public FilterIndices { protected: + using PCLBase::num_threads_; using PointCloud = typename FilterIndices::PointCloud; using PointCloudPtr = typename PointCloud::Ptr; using PointCloudConstPtr = typename PointCloud::ConstPtr; @@ -145,19 +146,18 @@ namespace pcl setSearchMethod (const SearcherPtr &searcher) { searcher_ = searcher; } /** \brief Set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back + * \param num_threads the number of hardware threads to use (0 sets the value back * to automatic) */ void - setNumberOfThreads(unsigned int nr_threads = 0) + setNumberOfThreads(unsigned int num_threads = 0) { #ifdef _OPENMP - num_threads_ = nr_threads != 0 ? nr_threads : omp_get_num_procs(); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else if (num_threads_ != 1) { PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); } - num_threads_ = 1; #endif } @@ -196,11 +196,6 @@ namespace pcl /** \brief The minimum number of neighbors that a point needs to have in the given search radius to be considered an inlier. */ int min_pts_radius_{1}; - - /** - * @brief Number of threads used during filtering - */ - int num_threads_{1}; }; ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/filters/src/pyramid.cpp b/filters/src/pyramid.cpp index 80670e4d9fc..23ef53b8681 100644 --- a/filters/src/pyramid.cpp +++ b/filters/src/pyramid.cpp @@ -69,7 +69,9 @@ Pyramid::compute( output[l - 1]->height / 2)); const PointCloud& previous = *output[l - 1]; PointCloud& next = *output[l]; -#pragma omp parallel for default(none) shared(next, previous, kernel_rows, kernel_cols, kernel_center_x, kernel_center_y) num_threads(threads_) +#pragma omp parallel for default(none) \ + shared(next, previous, kernel_rows, kernel_cols, kernel_center_x, kernel_center_y) \ + num_threads(num_threads_) for (int i = 0; i < static_cast(next.height); ++i) { // rows for (int j = 0; j < static_cast(next.width); ++j) { // columns float r = 0, g = 0, b = 0; @@ -113,7 +115,9 @@ Pyramid::compute( output[l - 1]->height / 2)); const PointCloud& previous = *output[l - 1]; PointCloud& next = *output[l]; -#pragma omp parallel for default(none) shared(next, previous, kernel_rows, kernel_cols, kernel_center_x, kernel_center_y) num_threads(threads_) +#pragma omp parallel for default(none) \ + shared(next, previous, kernel_rows, kernel_cols, kernel_center_x, kernel_center_y) \ + num_threads(num_threads_) for (int i = 0; i < static_cast(next.height); ++i) { // rows for (int j = 0; j < static_cast(next.width); ++j) { // columns float weight = 0; @@ -192,7 +196,9 @@ Pyramid::compute( output[l - 1]->width / 2, output[l - 1]->height / 2)); const PointCloud& previous = *output[l - 1]; PointCloud& next = *output[l]; -#pragma omp parallel for default(none) shared(next, previous, kernel_rows, kernel_cols, kernel_center_x, kernel_center_y) num_threads(threads_) +#pragma omp parallel for default(none) \ + shared(next, previous, kernel_rows, kernel_cols, kernel_center_x, kernel_center_y) \ + num_threads(num_threads_) for (int i = 0; i < static_cast(next.height); ++i) { // rows for (int j = 0; j < static_cast(next.width); ++j) { // columns float r = 0, g = 0, b = 0, a = 0; @@ -238,7 +244,9 @@ Pyramid::compute( output[l - 1]->width / 2, output[l - 1]->height / 2)); const PointCloud& previous = *output[l - 1]; PointCloud& next = *output[l]; -#pragma omp parallel for default(none) shared(next, previous, kernel_rows, kernel_cols, kernel_center_x, kernel_center_y) num_threads(threads_) +#pragma omp parallel for default(none) \ + shared(next, previous, kernel_rows, kernel_cols, kernel_center_x, kernel_center_y) \ + num_threads(num_threads_) for (int i = 0; i < static_cast(next.height); ++i) { // rows for (int j = 0; j < static_cast(next.width); ++j) { // columns float weight = 0; @@ -326,7 +334,9 @@ Pyramid::compute(std::vector::PointCloudPtr>& output output[l - 1]->height / 2)); const PointCloud& previous = *output[l - 1]; PointCloud& next = *output[l]; -#pragma omp parallel for default(none) shared(next, previous, kernel_rows, kernel_cols, kernel_center_x, kernel_center_y) num_threads(threads_) +#pragma omp parallel for default(none) \ + shared(next, previous, kernel_rows, kernel_cols, kernel_center_x, kernel_center_y) \ + num_threads(num_threads_) for (int i = 0; i < static_cast(next.height); ++i) { // rows for (int j = 0; j < static_cast(next.width); ++j) { // columns float r = 0, g = 0, b = 0; @@ -362,7 +372,9 @@ Pyramid::compute(std::vector::PointCloudPtr>& output output[l - 1]->height / 2)); const PointCloud& previous = *output[l - 1]; PointCloud& next = *output[l]; -#pragma omp parallel for default(none) shared(next, previous, kernel_rows, kernel_cols, kernel_center_x, kernel_center_y) num_threads(threads_) +#pragma omp parallel for default(none) \ + shared(next, previous, kernel_rows, kernel_cols, kernel_center_x, kernel_center_y) \ + num_threads(num_threads_) for (int i = 0; i < static_cast(next.height); ++i) { // rows for (int j = 0; j < static_cast(next.width); ++j) { // columns float weight = 0; diff --git a/io/include/pcl/io/image_grabber.h b/io/include/pcl/io/image_grabber.h index ab77591b464..bbb89f9b2bb 100644 --- a/io/include/pcl/io/image_grabber.h +++ b/io/include/pcl/io/image_grabber.h @@ -171,7 +171,7 @@ namespace pcl /** \brief Set the number of threads, if we wish to use OpenMP for quicker cloud population. * Note that for a standard (< 4 core) machine this is unlikely to yield a drastic speedup.*/ void - setNumberOfThreads (unsigned int nr_threads = 0); + setNumberOfThreads (unsigned int num_threads = 0); protected: /** \brief Convenience function to see how many frames this consists of diff --git a/io/src/image_grabber.cpp b/io/src/image_grabber.cpp index 780b9fa5255..e1789938f4b 100644 --- a/io/src/image_grabber.cpp +++ b/io/src/image_grabber.cpp @@ -169,7 +169,7 @@ struct pcl::ImageGrabberBase::ImageGrabberImpl double principal_point_x_ = 319.5; double principal_point_y_ = 239.5; - unsigned int num_threads_ = 1; + unsigned int num_threads_{1}; }; /////////////////////////////////////////////////////////////////////////////////////////// @@ -1015,7 +1015,13 @@ pcl::ImageGrabberBase::getTimestampAtIndex (std::size_t idx, std::uint64_t &time //////////////////////////////////////////////////////////////////////////////////////// void -pcl::ImageGrabberBase::setNumberOfThreads (unsigned int nr_threads) +pcl::ImageGrabberBase::setNumberOfThreads (unsigned int num_threads) { - impl_->num_threads_ = nr_threads; +#ifdef _OPENMP + impl_->num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif } diff --git a/keypoints/include/pcl/keypoints/harris_2d.h b/keypoints/include/pcl/keypoints/harris_2d.h index 5305d4f0eaf..74cf519e398 100644 --- a/keypoints/include/pcl/keypoints/harris_2d.h +++ b/keypoints/include/pcl/keypoints/harris_2d.h @@ -80,7 +80,6 @@ namespace pcl , refine_ (false) , nonmax_ (true) , method_ (method) - , threads_ (0) , response_ (new pcl::PointCloud ()) , window_width_ (window_width) , window_height_ (window_height) @@ -127,12 +126,24 @@ namespace pcl void setRefine (bool do_refine); /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ inline void - setNumberOfThreads (unsigned int nr_threads = 0) { threads_ = nr_threads; } + setNumberOfThreads(unsigned int num_threads = 0) + { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } protected: + using PCLBase::num_threads_; + bool initCompute () override; void @@ -161,8 +172,6 @@ namespace pcl bool nonmax_; /// cornerness computation method ResponseMethod method_; - /// number of threads to be used - unsigned int threads_; private: Eigen::MatrixXf derivatives_rows_; diff --git a/keypoints/include/pcl/keypoints/harris_3d.h b/keypoints/include/pcl/keypoints/harris_3d.h index 9ba689a0b57..8012b37b0e8 100644 --- a/keypoints/include/pcl/keypoints/harris_3d.h +++ b/keypoints/include/pcl/keypoints/harris_3d.h @@ -149,11 +149,24 @@ namespace pcl setSearchSurface (const PointCloudInConstPtr &cloud) override { surface_ = cloud; normals_.reset(); } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ inline void - setNumberOfThreads (unsigned int nr_threads = 0) { threads_ = nr_threads; } + setNumberOfThreads(unsigned int num_threads = 0) + { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } + protected: + using PCLBase::num_threads_; + bool initCompute () override; void detectKeypoints (PointCloudOut &output) override; @@ -172,7 +185,6 @@ namespace pcl bool nonmax_{true}; ResponseMethod method_; PointCloudNConstPtr normals_; - unsigned int threads_{0}; }; } diff --git a/keypoints/include/pcl/keypoints/harris_6d.h b/keypoints/include/pcl/keypoints/harris_6d.h index 8b3b3ad880b..c6da73e9568 100644 --- a/keypoints/include/pcl/keypoints/harris_6d.h +++ b/keypoints/include/pcl/keypoints/harris_6d.h @@ -116,11 +116,23 @@ namespace pcl setSearchSurface (const PointCloudInConstPtr &cloud) { surface_ = cloud; normals_->clear (); intensity_gradients_->clear ();} /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ inline void - setNumberOfThreads (unsigned int nr_threads = 0) { threads_ = nr_threads; } + setNumberOfThreads(unsigned int num_threads = 0) + { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } + protected: + using PCLBase::num_threads_; void detectKeypoints (PointCloudOut &output); void responseTomasi (PointCloudOut &output) const; void refineCorners (PointCloudOut &corners) const; @@ -129,7 +141,6 @@ namespace pcl float threshold_; bool refine_{true}; bool nonmax_{true}; - unsigned int threads_{0}; typename pcl::PointCloud::Ptr normals_; pcl::PointCloud::Ptr intensity_gradients_; } ; diff --git a/keypoints/include/pcl/keypoints/impl/harris_2d.hpp b/keypoints/include/pcl/keypoints/impl/harris_2d.hpp index fa8da44a169..7345c4118ed 100644 --- a/keypoints/include/pcl/keypoints/impl/harris_2d.hpp +++ b/keypoints/include/pcl/keypoints/impl/harris_2d.hpp @@ -261,13 +261,13 @@ HarrisKeypoint2D::detectKeypoints (PointCloudOu default(none) \ shared(occupency_map, output) \ firstprivate(width, height) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(occupency_map, occupency_map_size, output, threshold) \ firstprivate(width, height) \ - num_threads(threads_) + num_threads(num_threads_) #endif for (int i = 0; i < occupency_map_size; ++i) { @@ -314,13 +314,13 @@ HarrisKeypoint2D::responseHarris (PointCloudOut default(none) \ shared(output) \ firstprivate(covar) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(output, output_size) \ firstprivate(covar) \ - num_threads(threads_) + num_threads(num_threads_) #endif for (int index = 0; index < output_size; ++index) { @@ -360,13 +360,13 @@ HarrisKeypoint2D::responseNoble (PointCloudOut default(none) \ shared(output) \ firstprivate(covar) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(output, output_size) \ firstprivate(covar) \ - num_threads(threads_) + num_threads(num_threads_) #endif for (int index = 0; index < output_size; ++index) { @@ -406,13 +406,13 @@ HarrisKeypoint2D::responseLowe (PointCloudOut & default(none) \ shared(output) \ firstprivate(covar) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(output, output_size) \ firstprivate(covar) \ - num_threads(threads_) + num_threads(num_threads_) #endif for (int index = 0; index < output_size; ++index) { @@ -452,13 +452,13 @@ HarrisKeypoint2D::responseTomasi (PointCloudOut default(none) \ shared(output) \ firstprivate(covar) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(output, output_size) \ firstprivate(covar) \ - num_threads(threads_) + num_threads(num_threads_) #endif for (int index = 0; index < output_size; ++index) { diff --git a/keypoints/include/pcl/keypoints/impl/harris_3d.hpp b/keypoints/include/pcl/keypoints/impl/harris_3d.hpp index 17c56ef55cf..9f8ea92cd24 100644 --- a/keypoints/include/pcl/keypoints/impl/harris_3d.hpp +++ b/keypoints/include/pcl/keypoints/impl/harris_3d.hpp @@ -276,7 +276,7 @@ pcl::HarrisKeypoint3D::detectKeypoints (PointCloud #pragma omp parallel for \ default(none) \ shared(output, response) \ - num_threads(threads_) + num_threads(num_threads_) for (int idx = 0; idx < static_cast (response->size ()); ++idx) { if (!isFinite ((*response)[idx]) || @@ -323,7 +323,7 @@ pcl::HarrisKeypoint3D::responseHarris (PointCloudO default(none) \ shared(output) \ firstprivate(covar) \ - num_threads(threads_) + num_threads(num_threads_) for (int pIdx = 0; pIdx < static_cast (input_->size ()); ++pIdx) { const PointInT& pointIn = input_->points [pIdx]; @@ -364,7 +364,7 @@ pcl::HarrisKeypoint3D::responseNoble (PointCloudOu for default(none) \ shared(output) \ firstprivate(covar) \ - num_threads(threads_) + num_threads(num_threads_) for (int pIdx = 0; pIdx < static_cast (input_->size ()); ++pIdx) { const PointInT& pointIn = input_->points [pIdx]; @@ -404,7 +404,7 @@ pcl::HarrisKeypoint3D::responseLowe (PointCloudOut default(none) \ shared(output) \ firstprivate(covar) \ - num_threads(threads_) + num_threads(num_threads_) for (int pIdx = 0; pIdx < static_cast (input_->size ()); ++pIdx) { const PointInT& pointIn = input_->points [pIdx]; @@ -463,7 +463,7 @@ pcl::HarrisKeypoint3D::responseTomasi (PointCloudO default(none) \ shared(output) \ firstprivate(covar, covariance_matrix) \ - num_threads(threads_) + num_threads(num_threads_) for (int pIdx = 0; pIdx < static_cast (input_->size ()); ++pIdx) { const PointInT& pointIn = input_->points [pIdx]; @@ -508,7 +508,7 @@ pcl::HarrisKeypoint3D::refineCorners (PointCloudOu #pragma omp parallel for \ shared(corners) \ firstprivate(nnT, NNT, NNTp) \ - num_threads(threads_) + num_threads(num_threads_) for (int cIdx = 0; cIdx < static_cast (corners.size ()); ++cIdx) { unsigned iterations = 0; diff --git a/keypoints/include/pcl/keypoints/impl/harris_6d.hpp b/keypoints/include/pcl/keypoints/impl/harris_6d.hpp index d5f9a3537c9..8a9c5460e8f 100644 --- a/keypoints/include/pcl/keypoints/impl/harris_6d.hpp +++ b/keypoints/include/pcl/keypoints/impl/harris_6d.hpp @@ -165,7 +165,7 @@ pcl::HarrisKeypoint6D::detectKeypoints (PointCloud cloud->resize (surface_->size ()); #pragma omp parallel for \ default(none) \ - num_threads(threads_) + num_threads(num_threads_) for (unsigned idx = 0; idx < surface_->size (); ++idx) { cloud->points [idx].x = surface_->points [idx].x; @@ -185,7 +185,7 @@ pcl::HarrisKeypoint6D::detectKeypoints (PointCloud #pragma omp parallel for \ default(none) \ - num_threads(threads_) + num_threads(num_threads_) for (std::size_t idx = 0; idx < intensity_gradients_->size (); ++idx) { float len = intensity_gradients_->points [idx].gradient_x * intensity_gradients_->points [idx].gradient_x + @@ -228,7 +228,7 @@ pcl::HarrisKeypoint6D::detectKeypoints (PointCloud #pragma omp parallel for \ default(none) \ - num_threads(threads_) + num_threads(num_threads_) for (std::size_t idx = 0; idx < response->size (); ++idx) { if (!isFinite ((*response)[idx]) || (*response)[idx].intensity < threshold_) @@ -275,7 +275,7 @@ pcl::HarrisKeypoint6D::responseTomasi (PointCloudO #pragma omp parallel for \ default(none) \ firstprivate(pointOut, covar, covariance, solver) \ - num_threads(threads_) + num_threads(num_threads_) for (unsigned pIdx = 0; pIdx < input_->size (); ++pIdx) { const PointInT& pointIn = input_->points [pIdx]; diff --git a/keypoints/include/pcl/keypoints/impl/iss_3d.hpp b/keypoints/include/pcl/keypoints/impl/iss_3d.hpp index a14a1bf966f..c8ec23c020b 100644 --- a/keypoints/include/pcl/keypoints/impl/iss_3d.hpp +++ b/keypoints/include/pcl/keypoints/impl/iss_3d.hpp @@ -103,16 +103,15 @@ pcl::ISSKeypoint3D::setNormals (const PointCloudNC ////////////////////////////////////////////////////////////////////////////////////////////// template void -pcl::ISSKeypoint3D::setNumberOfThreads (unsigned int nr_threads) +pcl::ISSKeypoint3D::setNumberOfThreads (unsigned int num_threads) { - if (nr_threads == 0) #ifdef _OPENMP - threads_ = omp_get_num_procs(); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else - threads_ = 1; + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } #endif - else - threads_ = nr_threads; } ////////////////////////////////////////////////////////////////////////////////////////////// @@ -131,7 +130,7 @@ pcl::ISSKeypoint3D::getBoundaryPoints (PointCloudI default(none) \ shared(angle_threshold, boundary_estimator, border_radius, edge_points, input) \ firstprivate(u, v) \ - num_threads(threads_) + num_threads(num_threads_) for (int index = 0; index < static_cast(input.size ()); index++) { edge_points[index] = false; @@ -313,7 +312,7 @@ pcl::ISSKeypoint3D::detectKeypoints (PointCloudOut #pragma omp parallel for \ default(none) \ shared(borders) \ - num_threads(threads_) + num_threads(num_threads_) for (int index = 0; index < static_cast(input_->size ()); index++) { borders[index] = false; @@ -338,9 +337,9 @@ pcl::ISSKeypoint3D::detectKeypoints (PointCloudOut } #ifdef _OPENMP - auto *omp_mem = new Eigen::Vector3d[threads_]; + auto* omp_mem = new Eigen::Vector3d[num_threads_]; - for (std::size_t i = 0; i < threads_; i++) + for (std::size_t i = 0; i < num_threads_; i++) omp_mem[i].setZero (3); #else auto *omp_mem = new Eigen::Vector3d[1]; @@ -357,7 +356,7 @@ pcl::ISSKeypoint3D::detectKeypoints (PointCloudOut #pragma omp parallel for \ default(none) \ shared(borders, omp_mem, prg_mem) \ - num_threads(threads_) + num_threads(num_threads_) for (int index = 0; index < static_cast (input_->size ()); index++) { #ifdef _OPENMP @@ -412,7 +411,7 @@ pcl::ISSKeypoint3D::detectKeypoints (PointCloudOut #pragma omp parallel for \ default(none) \ shared(feat_max) \ - num_threads(threads_) + num_threads(num_threads_) for (int index = 0; index < static_cast(input_->size ()); index++) { feat_max [index] = false; @@ -444,7 +443,7 @@ pcl::ISSKeypoint3D::detectKeypoints (PointCloudOut #pragma omp parallel for \ default(none) \ shared(feat_max, output) \ - num_threads(threads_) + num_threads(num_threads_) for (int index = 0; index < static_cast(input_->size ()); index++) { if (feat_max[index]) diff --git a/keypoints/include/pcl/keypoints/impl/susan.hpp b/keypoints/include/pcl/keypoints/impl/susan.hpp index de688f2a413..85b67ce2e7e 100644 --- a/keypoints/include/pcl/keypoints/impl/susan.hpp +++ b/keypoints/include/pcl/keypoints/impl/susan.hpp @@ -102,9 +102,15 @@ pcl::SUSANKeypoint::setSearchSurface ( ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template void -pcl::SUSANKeypoint::setNumberOfThreads (unsigned int nr_threads) +pcl::SUSANKeypoint::setNumberOfThreads (unsigned int num_threads) { - threads_ = nr_threads; +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif } diff --git a/keypoints/include/pcl/keypoints/impl/trajkovic_2d.hpp b/keypoints/include/pcl/keypoints/impl/trajkovic_2d.hpp index 8b1c1463834..6632f80d6de 100644 --- a/keypoints/include/pcl/keypoints/impl/trajkovic_2d.hpp +++ b/keypoints/include/pcl/keypoints/impl/trajkovic_2d.hpp @@ -94,12 +94,12 @@ TrajkovicKeypoint2D::detectKeypoints (PointClou #if OPENMP_LEGACY_CONST_DATA_SHARING_RULE #pragma omp parallel for \ default(none) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(h, w) \ - num_threads(threads_) + num_threads(num_threads_) #endif for(int j = half_window_size_; j < h; ++j) { @@ -142,12 +142,12 @@ TrajkovicKeypoint2D::detectKeypoints (PointClou #if OPENMP_LEGACY_CONST_DATA_SHARING_RULE #pragma omp parallel for \ default(none) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(h, w) \ - num_threads(threads_) + num_threads(num_threads_) #endif for(int j = half_window_size_; j < h; ++j) { @@ -239,12 +239,12 @@ TrajkovicKeypoint2D::detectKeypoints (PointClou #pragma omp parallel for \ default(none) \ shared(indices, occupency_map, output) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(height, indices, occupency_map, output, width) \ - num_threads(threads_) + num_threads(num_threads_) #endif // Disable lint since this 'for' is part of the pragma // NOLINTNEXTLINE(modernize-loop-convert) diff --git a/keypoints/include/pcl/keypoints/impl/trajkovic_3d.hpp b/keypoints/include/pcl/keypoints/impl/trajkovic_3d.hpp index 0edbcb07589..f50ad9bd6f6 100644 --- a/keypoints/include/pcl/keypoints/impl/trajkovic_3d.hpp +++ b/keypoints/include/pcl/keypoints/impl/trajkovic_3d.hpp @@ -110,12 +110,12 @@ TrajkovicKeypoint3D::detectKeypoints (PointCloudOu #pragma omp parallel for \ default(none) \ shared(input, normals, response) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(h, input, normals, response, w) \ - num_threads(threads_) + num_threads(num_threads_) #endif for(int j = half_window_size_; j < h; ++j) { @@ -160,12 +160,12 @@ TrajkovicKeypoint3D::detectKeypoints (PointCloudOu #pragma omp parallel for \ default(none) \ shared(input, normals, response) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(h, input, normals, response, w) \ - num_threads(threads_) + num_threads(num_threads_) #endif for(int j = half_window_size_; j < h; ++j) { @@ -254,12 +254,12 @@ TrajkovicKeypoint3D::detectKeypoints (PointCloudOu #pragma omp parallel for \ default(none) \ shared(indices, occupency_map, output) \ - num_threads(threads_) + num_threads(num_threads_) #else #pragma omp parallel for \ default(none) \ shared(height, indices, occupency_map, output, width) \ - num_threads(threads_) + num_threads(num_threads_) #endif for (int i = 0; i < static_cast(indices.size ()); ++i) { diff --git a/keypoints/include/pcl/keypoints/iss_3d.h b/keypoints/include/pcl/keypoints/iss_3d.h index de5017f957c..26d0cbfc6a4 100644 --- a/keypoints/include/pcl/keypoints/iss_3d.h +++ b/keypoints/include/pcl/keypoints/iss_3d.h @@ -116,7 +116,9 @@ namespace pcl { name_ = "ISSKeypoint3D"; search_radius_ = salient_radius_; - setNumberOfThreads(threads_); // Reset number of threads with the member's initialization value to apply input validation. + setNumberOfThreads( + num_threads_); // Reset number of threads with the member's initialization + // value to apply input validation. } /** \brief Destructor. */ @@ -187,13 +189,15 @@ namespace pcl } /** \brief Initialize the scheduler and set the number of threads to use. - * \param[in] nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param[in] num_threads the number of hardware threads to use (0 sets the value back to automatic) */ void - setNumberOfThreads (unsigned int nr_threads = 0); + setNumberOfThreads (unsigned int num_threads = 0); protected: + using PCLBase::num_threads_; + /** \brief Compute the boundary points for the given input cloud. * \param[in] input the input cloud * \param[in] border_radius the radius used to compute the boundary points @@ -255,10 +259,6 @@ namespace pcl /** \brief The decision boundary (angle threshold) that marks points as boundary or regular. (default \f$\pi / 2.0\f$) */ float angle_threshold_; - - /** \brief The number of threads that has to be used by the scheduler. */ - unsigned int threads_{0}; - }; } diff --git a/keypoints/include/pcl/keypoints/susan.h b/keypoints/include/pcl/keypoints/susan.h index 1b767bd2e03..67281d3a558 100644 --- a/keypoints/include/pcl/keypoints/susan.h +++ b/keypoints/include/pcl/keypoints/susan.h @@ -136,10 +136,10 @@ namespace pcl setSearchSurface (const PointCloudInConstPtr &cloud) override; /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ void - setNumberOfThreads (unsigned int nr_threads); + setNumberOfThreads (unsigned int num_threads = 0); /** \brief Apply non maxima suppression to the responses to keep strongest corners. * \note in SUSAN points with less response or stronger corners @@ -156,6 +156,8 @@ namespace pcl setGeometricValidation (bool validate); protected: + using PCLBase::num_threads_; + bool initCompute () override; @@ -180,7 +182,6 @@ namespace pcl float intensity_threshold_; float tolerance_; PointCloudNConstPtr normals_; - unsigned int threads_{0}; bool geometric_validation_; bool nonmax_; /// intensity field accessor diff --git a/keypoints/include/pcl/keypoints/trajkovic_2d.h b/keypoints/include/pcl/keypoints/trajkovic_2d.h index 9baf1adb7b0..2716079318a 100644 --- a/keypoints/include/pcl/keypoints/trajkovic_2d.h +++ b/keypoints/include/pcl/keypoints/trajkovic_2d.h @@ -126,16 +126,31 @@ namespace pcl getSecondThreshold () const { return (second_threshold_); } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use, 0 for automatic. + * \param num_threads the number of hardware threads to use, 0 for automatic. */ inline void - setNumberOfThreads (unsigned int nr_threads = 0) { threads_ = nr_threads; } + setNumberOfThreads(unsigned int num_threads = 0) + { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } /// \brief \return the number of threads inline unsigned int - getNumberOfThreads () const { return (threads_); } + getNumberOfThreads() const + { + return (num_threads_); + } protected: + using PCLBase::num_threads_; + bool initCompute () override; @@ -162,8 +177,6 @@ namespace pcl float first_threshold_; /// second threshold for corner evaluation float second_threshold_; - /// number of threads to be used - unsigned int threads_{1}; /// point cloud response pcl::PointCloud::Ptr response_; }; diff --git a/keypoints/include/pcl/keypoints/trajkovic_3d.h b/keypoints/include/pcl/keypoints/trajkovic_3d.h index 1758c672387..f1235861d85 100644 --- a/keypoints/include/pcl/keypoints/trajkovic_3d.h +++ b/keypoints/include/pcl/keypoints/trajkovic_3d.h @@ -140,16 +140,30 @@ namespace pcl getNormals () const { return (normals_); } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use, 0 for automatic. + * \param num_threads the number of hardware threads to use, 0 for automatic. */ inline void - setNumberOfThreads (unsigned int nr_threads = 0) { threads_ = nr_threads; } + setNumberOfThreads (unsigned int num_threads = 0) { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } /// \brief \return the number of threads inline unsigned int - getNumberOfThreads () const { return (threads_); } + getNumberOfThreads() const + { + return (num_threads_); + } protected: + using PCLBase::num_threads_; + bool initCompute () override; @@ -202,8 +216,6 @@ namespace pcl float first_threshold_; /// second threshold for corner evaluation float second_threshold_; - /// number of threads to be used - unsigned int threads_{1}; /// point cloud normals NormalsConstPtr normals_; /// point cloud response diff --git a/registration/include/pcl/registration/correspondence_estimation.h b/registration/include/pcl/registration/correspondence_estimation.h index 7dcffc258cf..eee5b0fba47 100644 --- a/registration/include/pcl/registration/correspondence_estimation.h +++ b/registration/include/pcl/registration/correspondence_estimation.h @@ -138,16 +138,16 @@ class CorrespondenceEstimationBase : public PCLBase { } /** \brief Set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to + * \param num_threads the number of hardware threads to use (0 sets the value back to * automatic) */ void - setNumberOfThreads(unsigned int nr_threads) + setNumberOfThreads(unsigned int num_threads) { #ifdef _OPENMP - num_threads_ = nr_threads != 0 ? nr_threads : omp_get_num_procs(); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else - if (nr_threads != 1) { + if (num_threads != 1) { PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); } num_threads_ = 1; @@ -322,6 +322,8 @@ class CorrespondenceEstimationBase : public PCLBase { clone() const = 0; protected: + using PCLBase::num_threads_; + /** \brief The correspondence estimation method name. */ std::string corr_name_; @@ -379,8 +381,6 @@ class CorrespondenceEstimationBase : public PCLBase { /** \brief A flag which, if set, means the tree operating on the source cloud * will never be recomputed*/ bool force_no_recompute_reciprocal_{false}; - - unsigned int num_threads_{1}; }; /** \brief @b CorrespondenceEstimation represents a simple class for diff --git a/registration/include/pcl/registration/gicp.h b/registration/include/pcl/registration/gicp.h index 99a9288250e..4390aba3ca7 100644 --- a/registration/include/pcl/registration/gicp.h +++ b/registration/include/pcl/registration/gicp.h @@ -141,7 +141,6 @@ class GeneralizedIterativeClosestPoint max_iterations_ = 200; transformation_epsilon_ = 5e-4; corr_dist_threshold_ = 5.; - setNumberOfThreads(0); rigid_transformation_estimation_ = [this](const PointCloudSource& cloud_src, const pcl::Indices& indices_src, const PointCloudTarget& cloud_tgt, @@ -373,13 +372,15 @@ class GeneralizedIterativeClosestPoint } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to + * \param num_threads the number of hardware threads to use (0 sets the value back to * automatic) */ void - setNumberOfThreads(unsigned int nr_threads = 0); + setNumberOfThreads(unsigned int num_threads = 0); protected: + using PCLBase::num_threads_; + /** \brief The number of neighbors used for covariances computation. * default: 20 */ @@ -532,9 +533,6 @@ class GeneralizedIterativeClosestPoint Eigen::Matrix3d& ddR_dTheta_dTheta, Eigen::Matrix3d& ddR_dTheta_dPsi, Eigen::Matrix3d& ddR_dPsi_dPsi) const; - - /** \brief The number of threads the scheduler should use. */ - unsigned int threads_; }; } // namespace pcl diff --git a/registration/include/pcl/registration/ia_fpcs.h b/registration/include/pcl/registration/ia_fpcs.h index afb9f252805..42794e2d5ac 100644 --- a/registration/include/pcl/registration/ia_fpcs.h +++ b/registration/include/pcl/registration/ia_fpcs.h @@ -45,28 +45,28 @@ namespace pcl { /** \brief Compute the mean point density of a given point cloud. * \param[in] cloud pointer to the input point cloud * \param[in] max_dist maximum distance of a point to be considered as a neighbor - * \param[in] nr_threads number of threads to use (default = 1, only used if OpenMP flag - * is set) \return the mean point density of a given point cloud + * \param[in] num_threads number of threads to use (default = 1, only used if OpenMP + * flag is set) \return the mean point density of a given point cloud */ template inline float getMeanPointDensity(const typename pcl::PointCloud::ConstPtr& cloud, float max_dist, - int nr_threads = 1); + int num_threads = 1); /** \brief Compute the mean point density of a given point cloud. * \param[in] cloud pointer to the input point cloud * \param[in] indices the vector of point indices to use from \a cloud * \param[in] max_dist maximum distance of a point to be considered as a neighbor - * \param[in] nr_threads number of threads to use (default = 1, only used if OpenMP flag - * is set) \return the mean point density of a given point cloud + * \param[in] num_threads number of threads to use (default = 1, only used if OpenMP + * flag is set) \return the mean point density of a given point cloud */ template inline float getMeanPointDensity(const typename pcl::PointCloud::ConstPtr& cloud, const pcl::Indices& indices, float max_dist, - int nr_threads = 1); + int num_threads = 1); namespace registration { /** \brief FPCSInitialAlignment computes corresponding four point congruent sets as @@ -160,19 +160,25 @@ class FPCSInitialAlignment : public Registration::num_threads_; using PCLBase::deinitCompute; using PCLBase::input_; using PCLBase::indices_; @@ -468,11 +475,6 @@ class FPCSInitialAlignment : public RegistrationsetNumberOfThreads(nr_threads); + correspondence_estimation_->setNumberOfThreads(num_threads); } protected: diff --git a/registration/include/pcl/registration/impl/gicp.hpp b/registration/include/pcl/registration/impl/gicp.hpp index 78490b66370..b6ed22f7a50 100644 --- a/registration/include/pcl/registration/impl/gicp.hpp +++ b/registration/include/pcl/registration/impl/gicp.hpp @@ -48,23 +48,15 @@ namespace pcl { template void GeneralizedIterativeClosestPoint::setNumberOfThreads( - unsigned int nr_threads) + unsigned int num_threads) { #ifdef _OPENMP - if (nr_threads == 0) - threads_ = omp_get_num_procs(); - else - threads_ = nr_threads; - PCL_DEBUG("[pcl::GeneralizedIterativeClosestPoint::setNumberOfThreads] Setting " - "number of threads to %u.\n", - threads_); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else - threads_ = 1; - if (nr_threads != 1) - PCL_WARN("[pcl::GeneralizedIterativeClosestPoint::setNumberOfThreads] " - "Parallelization is requested, but OpenMP is not available! Continuing " - "without parallelization.\n"); -#endif // _OPENMP + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif } template @@ -92,7 +84,7 @@ GeneralizedIterativeClosestPoint::computeCovar if (cloud_covariances.size() < cloud->size()) cloud_covariances.resize(cloud->size()); -#pragma omp parallel for num_threads(threads_) schedule(dynamic, 32) \ +#pragma omp parallel for num_threads(num_threads_) schedule(dynamic, 32) \ shared(cloud, cloud_covariances) firstprivate(mean, cov, nn_indices, nn_dist_sq) for (std::ptrdiff_t i = 0; i < static_cast(cloud->size()); ++i) { const PointT& query_point = (*cloud)[i]; @@ -796,7 +788,7 @@ GeneralizedIterativeClosestPoint:: pcl::transformPointCloud(output, output, guess); pcl::registration::CorrespondenceEstimation corr_estimation; - corr_estimation.setNumberOfThreads(threads_); + corr_estimation.setNumberOfThreads(num_threads_); // setSearchMethodSource is not necessary because we do not use // determineReciprocalCorrespondences corr_estimation.setSearchMethodTarget(this->getSearchMethodTarget()); diff --git a/registration/include/pcl/registration/impl/ia_fpcs.hpp b/registration/include/pcl/registration/impl/ia_fpcs.hpp index 31533621a82..2ea342750b6 100644 --- a/registration/include/pcl/registration/impl/ia_fpcs.hpp +++ b/registration/include/pcl/registration/impl/ia_fpcs.hpp @@ -53,7 +53,7 @@ template inline float pcl::getMeanPointDensity(const typename pcl::PointCloud::ConstPtr& cloud, float max_dist, - int nr_threads) + int num_threads) { const float max_dist_sqr = max_dist * max_dist; const std::size_t s = cloud->size(); @@ -66,10 +66,10 @@ pcl::getMeanPointDensity(const typename pcl::PointCloud::ConstPtr& cloud pcl::Indices ids(2); std::vector dists_sqr(2); - pcl::utils::ignore(nr_threads); + pcl::utils::ignore(num_threads); #pragma omp parallel for default(none) shared(tree, cloud) \ firstprivate(ids, dists_sqr) reduction(+ : mean_dist, num) \ - firstprivate(s, max_dist_sqr) num_threads(nr_threads) + firstprivate(s, max_dist_sqr) num_threads(num_threads) for (int i = 0; i < 1000; i++) { tree.nearestKSearch((*cloud)[rand() % s], 2, ids, dists_sqr); if (dists_sqr[1] < max_dist_sqr) { @@ -87,7 +87,7 @@ inline float pcl::getMeanPointDensity(const typename pcl::PointCloud::ConstPtr& cloud, const pcl::Indices& indices, float max_dist, - int nr_threads) + int num_threads) { const float max_dist_sqr = max_dist * max_dist; const std::size_t s = indices.size(); @@ -100,13 +100,13 @@ pcl::getMeanPointDensity(const typename pcl::PointCloud::ConstPtr& cloud pcl::Indices ids(2); std::vector dists_sqr(2); - pcl::utils::ignore(nr_threads); + pcl::utils::ignore(num_threads); #if OPENMP_LEGACY_CONST_DATA_SHARING_RULE #pragma omp parallel for default(none) shared(tree, cloud, indices) \ - firstprivate(ids, dists_sqr) reduction(+ : mean_dist, num) num_threads(nr_threads) + firstprivate(ids, dists_sqr) reduction(+ : mean_dist, num) num_threads(num_threads) #else #pragma omp parallel for default(none) shared(tree, cloud, indices, s, max_dist_sqr) \ - firstprivate(ids, dists_sqr) reduction(+ : mean_dist, num) num_threads(nr_threads) + firstprivate(ids, dists_sqr) reduction(+ : mean_dist, num) num_threads(num_threads) #endif for (int i = 0; i < 1000; i++) { tree.nearestKSearch((*cloud)[indices[rand() % s]], 2, ids, dists_sqr); @@ -150,7 +150,7 @@ pcl::registration::FPCSInitialAlignment( - target_, *target_indices_, 0.05f * diameter_, nr_threads_); + target_, *target_indices_, 0.05f * diameter_, num_threads_); delta_ *= mean_dist; } diff --git a/sample_consensus/include/pcl/sample_consensus/impl/ransac.hpp b/sample_consensus/include/pcl/sample_consensus/impl/ransac.hpp index f41d0e58301..d0c2f9592ea 100644 --- a/sample_consensus/include/pcl/sample_consensus/impl/ransac.hpp +++ b/sample_consensus/include/pcl/sample_consensus/impl/ransac.hpp @@ -78,24 +78,11 @@ pcl::RandomSampleConsensus::computeModel (int) // suppress infinite loops by just allowing 10 x maximum allowed iterations for invalid model parameters! const unsigned max_skip = max_iterations_ * 10; - int threads = threads_; - if (threads >= 0) - { -#if OPENMP_AVAILABLE_RANSAC - if (threads == 0) - { - threads = omp_get_num_procs(); - PCL_DEBUG ("[pcl::RandomSampleConsensus::computeModel] Automatic number of threads requested, choosing %i threads.\n", threads); - } -#else - // Parallelization desired, but not available - PCL_WARN ("[pcl::RandomSampleConsensus::computeModel] Parallelization is requested, but OpenMP 3.1 is not available! Continuing without parallelization.\n"); - threads = -1; -#endif - } - #if OPENMP_AVAILABLE_RANSAC -#pragma omp parallel if(threads > 0) num_threads(threads) shared(k, skipped_count, n_best_inliers_count) firstprivate(selection, model_coefficients) // would be nice to have a default(none)-clause here, but then some compilers complain about the shared const variables +#pragma omp parallel \ + num_threads(num_threads_) \ + shared(k, skipped_count, n_best_inliers_count) \ + firstprivate(selection, model_coefficients) // would be nice to have a default(none)-clause here, but then some compilers complain about the shared const variables #endif { #if OPENMP_AVAILABLE_RANSAC diff --git a/sample_consensus/include/pcl/sample_consensus/ransac.h b/sample_consensus/include/pcl/sample_consensus/ransac.h index 2e2c09ee6c6..6dc4d430ad6 100644 --- a/sample_consensus/include/pcl/sample_consensus/ransac.h +++ b/sample_consensus/include/pcl/sample_consensus/ransac.h @@ -78,7 +78,7 @@ namespace pcl using SampleConsensus::model_coefficients_; using SampleConsensus::inliers_; using SampleConsensus::probability_; - using SampleConsensus::threads_; + using SampleConsensus::num_threads_; /** \brief RANSAC (RANdom SAmple Consensus) main constructor * \param[in] model a Sample Consensus model diff --git a/sample_consensus/include/pcl/sample_consensus/sac.h b/sample_consensus/include/pcl/sample_consensus/sac.h index 64ac36e7ba7..19239912646 100644 --- a/sample_consensus/include/pcl/sample_consensus/sac.h +++ b/sample_consensus/include/pcl/sample_consensus/sac.h @@ -80,7 +80,6 @@ namespace pcl , iterations_ (0) , threshold_ (std::numeric_limits::max ()) , max_iterations_ (1000) - , threads_ (-1) , rng_ (new boost::uniform_01 (rng_alg_)) { // Create a random number generator object @@ -103,7 +102,6 @@ namespace pcl , iterations_ (0) , threshold_ (threshold) , max_iterations_ (1000) - , threads_ (-1) , rng_ (new boost::uniform_01 (rng_alg_)) { // Create a random number generator object @@ -164,15 +162,28 @@ namespace pcl getProbability () const { return (probability_); } /** \brief Set the number of threads to use or turn off parallelization. - * \param[in] nr_threads the number of hardware threads to use (0 sets the value automatically, a negative number turns parallelization off) + * \param[in] num_threads the number of hardware threads to use (0 sets the value automatically) * \note Not all SAC methods have a parallel implementation. Some will ignore this setting. */ inline void - setNumberOfThreads (const int nr_threads = -1) { threads_ = nr_threads; } + setNumberOfThreads (const int num_threads = 0) + { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } /** \brief Get the number of threads, as set by the user. */ - inline int - getNumberOfThreads () const { return (threads_); } + inline unsigned int + getNumberOfThreads() const + { + return (num_threads_); + } /** \brief Compute the actual model. Pure virtual. */ virtual bool @@ -340,8 +351,8 @@ namespace pcl /** \brief Maximum number of iterations before giving up. */ int max_iterations_; - /** \brief The number of threads the scheduler should use, or a negative number if no parallelization is wanted. */ - int threads_; + /** Number of threads used */ + unsigned int num_threads_{1}; /** \brief Boost-based random number generator algorithm. */ boost::mt19937 rng_alg_; diff --git a/segmentation/include/pcl/segmentation/approximate_progressive_morphological_filter.h b/segmentation/include/pcl/segmentation/approximate_progressive_morphological_filter.h index 77bf58926f5..e589440a1eb 100644 --- a/segmentation/include/pcl/segmentation/approximate_progressive_morphological_filter.h +++ b/segmentation/include/pcl/segmentation/approximate_progressive_morphological_filter.h @@ -129,10 +129,20 @@ namespace pcl setExponential (bool exponential) { exponential_ = exponential; } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to automatic) + * \param num_threads the number of hardware threads to use (0 sets the value back to automatic) */ inline void - setNumberOfThreads (unsigned int nr_threads = 0) { threads_ = nr_threads; } + setNumberOfThreads(unsigned int num_threads = 0) + { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } /** \brief This method launches the segmentation algorithm and returns indices of * points determined to be ground returns. @@ -143,6 +153,8 @@ namespace pcl protected: + using PCLBase::num_threads_; + /** \brief Maximum window size to be used in filtering ground returns. */ int max_window_size_{33}; @@ -163,9 +175,6 @@ namespace pcl /** \brief Exponentially grow window sizes? */ bool exponential_{true}; - - /** \brief Number of threads to be used. */ - unsigned int threads_{0}; }; } diff --git a/segmentation/include/pcl/segmentation/impl/approximate_progressive_morphological_filter.hpp b/segmentation/include/pcl/segmentation/impl/approximate_progressive_morphological_filter.hpp index 564d3e9471a..8a75d7b24de 100644 --- a/segmentation/include/pcl/segmentation/impl/approximate_progressive_morphological_filter.hpp +++ b/segmentation/include/pcl/segmentation/impl/approximate_progressive_morphological_filter.hpp @@ -117,7 +117,7 @@ pcl::ApproximateProgressiveMorphologicalFilter::extract (Indices& ground #pragma omp parallel for \ default(none) \ shared(A, global_min) \ - num_threads(threads_) + num_threads(num_threads_) for (int i = 0; i < static_cast(input_->size ()); ++i) { // ...then test for lower points within the cell const PointT& p = (*input_)[i]; @@ -132,7 +132,7 @@ pcl::ApproximateProgressiveMorphologicalFilter::extract (Indices& ground #pragma omp parallel for \ default(none) \ shared(A, global_min) \ - num_threads(threads_) + num_threads(num_threads_) for (int i = 0; i < static_cast(input_->size ()); ++i) { // ...then test for lower points within the cell const PointT& p = (*input_)[i]; @@ -173,7 +173,7 @@ pcl::ApproximateProgressiveMorphologicalFilter::extract (Indices& ground #pragma omp parallel for \ default(none) \ shared(A, cols, half_sizes, i, rows, Z) \ - num_threads(threads_) + num_threads(num_threads_) for (int row = 0; row < rows; ++row) { int rs, re; @@ -208,7 +208,7 @@ pcl::ApproximateProgressiveMorphologicalFilter::extract (Indices& ground #pragma omp parallel for \ default(none) \ shared(cols, half_sizes, i, rows, Z, Zf) \ - num_threads(threads_) + num_threads(num_threads_) for (int row = 0; row < rows; ++row) { int rs, re; diff --git a/segmentation/include/pcl/segmentation/impl/sac_segmentation.hpp b/segmentation/include/pcl/segmentation/impl/sac_segmentation.hpp index e701b5ef752..84e7a886694 100644 --- a/segmentation/include/pcl/segmentation/impl/sac_segmentation.hpp +++ b/segmentation/include/pcl/segmentation/impl/sac_segmentation.hpp @@ -363,10 +363,12 @@ pcl::SACSegmentation::initSAC (const int method_type) // Set maximum distance for radius search during random sampling model_->setSamplesMaxDist (samples_radius_, samples_radius_search_); } - if (sac_->getNumberOfThreads () != threads_) + if (sac_->getNumberOfThreads() != num_threads_) { - PCL_DEBUG ("[pcl::%s::initSAC] Setting the number of threads to %i\n", getClassName ().c_str (), threads_); - sac_->setNumberOfThreads (threads_); + PCL_DEBUG("[pcl::%s::initSAC] Setting the number of threads to %i\n", + getClassName().c_str(), + num_threads_); + sac_->setNumberOfThreads(num_threads_); } } diff --git a/segmentation/include/pcl/segmentation/sac_segmentation.h b/segmentation/include/pcl/segmentation/sac_segmentation.h index a43832cdd83..6e77f40a9c7 100644 --- a/segmentation/include/pcl/segmentation/sac_segmentation.h +++ b/segmentation/include/pcl/segmentation/sac_segmentation.h @@ -147,11 +147,21 @@ namespace pcl getProbability () const { return (probability_); } /** \brief Set the number of threads to use or turn off parallelization. - * \param[in] nr_threads the number of hardware threads to use (0 sets the value automatically, a negative number turns parallelization off) + * \param[in] num_threads the number of hardware threads to use (0 sets the value automatically) * \note Not all SAC methods have a parallel implementation. Some will ignore this setting. */ inline void - setNumberOfThreads (const int nr_threads = -1) { threads_ = nr_threads; } + setNumberOfThreads (const int num_threads = 0 ) + { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } /** \brief Set to true if a coefficient refinement is required. * \param[in] optimize true for enabling model coefficient refinement, false otherwise @@ -247,6 +257,8 @@ namespace pcl virtual void initSAC (const int method_type); + using PCLBase::num_threads_; + /** \brief The model that needs to be segmented. */ SampleConsensusModelPtr model_{nullptr}; @@ -283,9 +295,6 @@ namespace pcl /** \brief Maximum number of iterations before giving up (user given parameter). */ int max_iterations_{50}; - /** \brief The number of threads the scheduler should use, or a negative number if no parallelization is wanted. */ - int threads_{-1}; - /** \brief Desired probability of choosing at least one sample free from outliers (user given parameter). */ double probability_{0.99}; diff --git a/surface/include/pcl/surface/impl/mls.hpp b/surface/include/pcl/surface/impl/mls.hpp index e14f84cc100..737b68aef2d 100644 --- a/surface/include/pcl/surface/impl/mls.hpp +++ b/surface/include/pcl/surface/impl/mls.hpp @@ -289,12 +289,10 @@ pcl::MovingLeastSquares::performProcessing (PointCloudOut & nr_coeff_ = (order_ + 1) * (order_ + 2) / 2; #ifdef _OPENMP - // (Maximum) number of threads - const unsigned int threads = threads_ == 0 ? 1 : threads_; // Create temporaries for each thread in order to avoid synchronization - typename PointCloudOut::CloudVectorType projected_points (threads); - typename NormalCloud::CloudVectorType projected_points_normals (threads); - std::vector corresponding_input_indices (threads); + typename PointCloudOut::CloudVectorType projected_points(num_threads_); + typename NormalCloud::CloudVectorType projected_points_normals(num_threads_); + std::vector corresponding_input_indices(num_threads_); #endif // For all points @@ -302,7 +300,7 @@ pcl::MovingLeastSquares::performProcessing (PointCloudOut & default(none) \ shared(corresponding_input_indices, projected_points, projected_points_normals) \ schedule(dynamic,1000) \ - num_threads(threads) + num_threads(num_threads_) for (int cp = 0; cp < static_cast (indices_->size ()); ++cp) { // Allocate enough space to hold the results of nearest neighbor searches @@ -353,7 +351,7 @@ pcl::MovingLeastSquares::performProcessing (PointCloudOut & #ifdef _OPENMP // Combine all threads' results into the output vectors - for (unsigned int tn = 0; tn < threads; ++tn) + for (unsigned int tn = 0; tn < num_threads_; ++tn) { output.insert (output.end (), projected_points[tn].begin (), projected_points[tn].end ()); corresponding_input_indices_->indices.insert (corresponding_input_indices_->indices.end (), diff --git a/surface/include/pcl/surface/impl/poisson.hpp b/surface/include/pcl/surface/impl/poisson.hpp index 9e0cd210a90..cda26d00424 100644 --- a/surface/include/pcl/surface/impl/poisson.hpp +++ b/surface/include/pcl/surface/impl/poisson.hpp @@ -68,16 +68,9 @@ pcl::Poisson::~Poisson () = default; ////////////////////////////////////////////////////////////////////////////////////////////// template void -pcl::Poisson::setThreads (int threads) +pcl::Poisson::setThreads (unsigned int num_threads) { - if (threads == 0) -#ifdef _OPENMP - threads_ = omp_get_num_procs(); -#else - threads_ = 1; -#endif - else - threads_ = threads; + setNumberOfThreads(num_threads); } ////////////////////////////////////////////////////////////////////////////////////////////// @@ -91,7 +84,7 @@ pcl::Poisson::execute (poisson::CoredVectorMeshData &mesh, poisson::Octree tree; - tree.threads = threads_; + tree.threads = num_threads_; center.coords[0] = center.coords[1] = center.coords[2] = 0; diff --git a/surface/include/pcl/surface/mls.h b/surface/include/pcl/surface/mls.h index 7c33feddfff..47908a3e7ac 100644 --- a/surface/include/pcl/surface/mls.h +++ b/surface/include/pcl/surface/mls.h @@ -480,12 +480,19 @@ namespace pcl getMLSResults () const { return (mls_results_); } /** \brief Set the maximum number of threads to use - * \param threads the maximum number of hardware threads to use (0 sets the value to 1) + * \param threads the maximum number of hardware threads to use (0 sets the value automatically) */ inline void - setNumberOfThreads (unsigned int threads = 1) + setNumberOfThreads(unsigned int num_threads = 0) { - threads_ = threads; +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif } /** \brief Base method for surface reconstruction for all points given in @@ -501,6 +508,8 @@ namespace pcl getCorrespondingIndices () const { return (corresponding_input_indices_); } protected: + using PCLBase::num_threads_; + /** \brief The point cloud that will hold the estimated normals, if set. */ NormalCloudPtr normals_{nullptr}; @@ -556,10 +565,6 @@ namespace pcl /** \brief Parameter that specifies the projection method to be used. */ MLSResult::ProjectionMethod projection_method_{MLSResult::SIMPLE}; - /** \brief The maximum number of threads the scheduler should use. */ - unsigned int threads_{1}; - - /** \brief A minimalistic implementation of a voxel grid, necessary for the point cloud upsampling * \note Used only in the case of VOXEL_GRID_DILATION upsampling */ diff --git a/surface/include/pcl/surface/poisson.h b/surface/include/pcl/surface/poisson.h index 9fca5b4b343..b26def3f282 100644 --- a/surface/include/pcl/surface/poisson.h +++ b/surface/include/pcl/surface/poisson.h @@ -219,18 +219,38 @@ namespace pcl /** \brief Set the number of threads to use. * \param[in] threads the number of threads */ + PCL_DEPRECATED(1,18, "Use setNumberOfThreads() instead.") void - setThreads(int threads); + setThreads(unsigned int num_threads = 0); + + /** \brief Initialize the scheduler and set the number of threads to use. + * \param num_threads the number of hardware threads to use (0 sets the value back + * to automatic) + */ + inline void + setNumberOfThreads(unsigned int num_threads = 0) + { +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN( + "OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif + } /** \brief Get the number of threads*/ inline int getThreads() { - return threads_; + return num_threads_; } protected: + using PCLBase::num_threads_; + /** \brief Class get name method. */ std::string getClassName () const override { return ("Poisson"); } @@ -257,7 +277,6 @@ namespace pcl bool show_residual_{false}; int min_iterations_{8}; float solver_accuracy_{1e-3f}; - int threads_{1}; template void execute (poisson::CoredVectorMeshData &mesh, diff --git a/test/registration/test_fpcs_ia.cpp b/test/registration/test_fpcs_ia.cpp index 18f1d1cb02f..f00d443e381 100644 --- a/test/registration/test_fpcs_ia.cpp +++ b/test/registration/test_fpcs_ia.cpp @@ -70,7 +70,7 @@ TEST (PCL, FPCSInitialAlignment) fpcs_ia.setInputSource (cloud_source_ptr); fpcs_ia.setInputTarget (cloud_target_ptr); - fpcs_ia.setNumberOfThreads (nr_threads); + fpcs_ia.setNumberOfThreads (num_threads); fpcs_ia.setApproxOverlap (approx_overlap); fpcs_ia.setDelta (delta, true); fpcs_ia.setScoreThreshold (0.025); // if score is below this threshold, fpcs can stop because the solution is very good diff --git a/test/registration/test_fpcs_ia_data.h b/test/registration/test_fpcs_ia_data.h index 84236a2dab0..482359d5759 100644 --- a/test/registration/test_fpcs_ia_data.h +++ b/test/registration/test_fpcs_ia_data.h @@ -1,6 +1,6 @@ #pragma once -constexpr int nr_threads = 1; +constexpr int num_threads = 1; constexpr float approx_overlap = 0.9f; constexpr float delta = 1.f; constexpr int nr_samples = 100; diff --git a/test/registration/test_kfpcs_ia.cpp b/test/registration/test_kfpcs_ia.cpp index 4caba0b41a4..1e9febef0e1 100644 --- a/test/registration/test_kfpcs_ia.cpp +++ b/test/registration/test_kfpcs_ia.cpp @@ -67,7 +67,7 @@ TEST (PCL, KFPCSInitialAlignment) kfpcs_ia.setInputSource (cloud_source_ptr); kfpcs_ia.setInputTarget (cloud_target_ptr); - //kfpcs_ia.setNumberOfThreads (nr_threads); + //kfpcs_ia.setNumberOfThreads (num_threads); kfpcs_ia.setApproxOverlap (approx_overlap); kfpcs_ia.setDelta (voxel_size, false); kfpcs_ia.setScoreThreshold (abort_score); diff --git a/test/registration/test_kfpcs_ia_data.h b/test/registration/test_kfpcs_ia_data.h index 349c7721c92..d12ebdcec69 100644 --- a/test/registration/test_kfpcs_ia_data.h +++ b/test/registration/test_kfpcs_ia_data.h @@ -1,6 +1,6 @@ #pragma once -constexpr int nr_threads = 1; +constexpr int num_threads = 1; constexpr float voxel_size = 0.1f; constexpr float approx_overlap = 0.9f; constexpr float abort_score = 0.4f; diff --git a/test/surface/test_poisson.cpp b/test/surface/test_poisson.cpp index b6a0ec1bf10..304939aeb34 100644 --- a/test/surface/test_poisson.cpp +++ b/test/surface/test_poisson.cpp @@ -67,6 +67,9 @@ TEST (PCL, Poisson) Poisson poisson; poisson.setInputCloud (cloud_with_normals); + // poisson4 returns different vertices when parallelized + // for now, disable parallelization + poisson.setNumberOfThreads(1); PolygonMesh mesh; poisson.reconstruct (mesh); diff --git a/tracking/include/pcl/tracking/impl/kld_adaptive_particle_filter_omp.hpp b/tracking/include/pcl/tracking/impl/kld_adaptive_particle_filter_omp.hpp index 8b036574c3e..7867f3e6d71 100644 --- a/tracking/include/pcl/tracking/impl/kld_adaptive_particle_filter_omp.hpp +++ b/tracking/include/pcl/tracking/impl/kld_adaptive_particle_filter_omp.hpp @@ -9,16 +9,15 @@ namespace tracking { template void KLDAdaptiveParticleFilterOMPTracker::setNumberOfThreads( - unsigned int nr_threads) + unsigned int num_threads) { - if (nr_threads == 0) #ifdef _OPENMP - threads_ = omp_get_num_procs(); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else - threads_ = 1; + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } #endif - else - threads_ = nr_threads; } ////////////////////////////////////////////////////////////////////////////////////////////// @@ -30,7 +29,7 @@ KLDAdaptiveParticleFilterOMPTracker::weight() // clang-format off #pragma omp parallel for \ default(none) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int i = 0; i < particle_num_; i++) this->computeTransformedPointCloudWithoutNormal((*particles_)[i], @@ -48,7 +47,7 @@ KLDAdaptiveParticleFilterOMPTracker::weight() // clang-format off #pragma omp parallel for \ default(none) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int i = 0; i < particle_num_; i++) { IndicesPtr indices; @@ -66,7 +65,7 @@ KLDAdaptiveParticleFilterOMPTracker::weight() // clang-format off #pragma omp parallel for \ default(none) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int i = 0; i < particle_num_; i++) { IndicesPtr indices; @@ -84,7 +83,7 @@ KLDAdaptiveParticleFilterOMPTracker::weight() #pragma omp parallel for \ default(none) \ shared(indices_list) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int i = 0; i < particle_num_; i++) { this->computeTransformedPointCloudWithNormal( @@ -100,7 +99,7 @@ KLDAdaptiveParticleFilterOMPTracker::weight() #pragma omp parallel for \ default(none) \ shared(indices_list) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int i = 0; i < particle_num_; i++) { coherence_->compute( diff --git a/tracking/include/pcl/tracking/impl/particle_filter_omp.hpp b/tracking/include/pcl/tracking/impl/particle_filter_omp.hpp index a4cd2a1ff66..89bd53a22a7 100644 --- a/tracking/include/pcl/tracking/impl/particle_filter_omp.hpp +++ b/tracking/include/pcl/tracking/impl/particle_filter_omp.hpp @@ -8,16 +8,15 @@ namespace tracking { ////////////////////////////////////////////////////////////////////////////////////////////// template void -ParticleFilterOMPTracker::setNumberOfThreads(unsigned int nr_threads) +ParticleFilterOMPTracker::setNumberOfThreads(unsigned int num_threads) { - if (nr_threads == 0) #ifdef _OPENMP - threads_ = omp_get_num_procs(); + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); #else - threads_ = 1; + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } #endif - else - threads_ = nr_threads; } ////////////////////////////////////////////////////////////////////////////////////////////// @@ -29,7 +28,7 @@ ParticleFilterOMPTracker::weight() // clang-format off #pragma omp parallel for \ default(none) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int i = 0; i < particle_num_; i++) this->computeTransformedPointCloudWithoutNormal((*particles_)[i], @@ -47,7 +46,7 @@ ParticleFilterOMPTracker::weight() // clang-format off #pragma omp parallel for \ default(none) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int i = 0; i < particle_num_; i++) { IndicesPtr indices; // dummy @@ -65,7 +64,7 @@ ParticleFilterOMPTracker::weight() // clang-format off #pragma omp parallel for \ default(none) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int i = 0; i < particle_num_; i++) { IndicesPtr indices; // dummy @@ -83,7 +82,7 @@ ParticleFilterOMPTracker::weight() #pragma omp parallel for \ default(none) \ shared(indices_list) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int i = 0; i < particle_num_; i++) { this->computeTransformedPointCloudWithNormal( @@ -99,7 +98,7 @@ ParticleFilterOMPTracker::weight() #pragma omp parallel for \ default(none) \ shared(indices_list) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int i = 0; i < particle_num_; i++) { coherence_->compute( diff --git a/tracking/include/pcl/tracking/impl/pyramidal_klt.hpp b/tracking/include/pcl/tracking/impl/pyramidal_klt.hpp index 29d0fc9b6f8..baae4d33f59 100644 --- a/tracking/include/pcl/tracking/impl/pyramidal_klt.hpp +++ b/tracking/include/pcl/tracking/impl/pyramidal_klt.hpp @@ -250,7 +250,7 @@ PyramidalKLTTracker::downsample(const FloatImageConstPtr& default(none) \ shared(down, height, output, smoothed, width) \ firstprivate(ii) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int j = 0; j < height; ++j) { int jj = 2 * j; @@ -304,7 +304,7 @@ PyramidalKLTTracker::convolveRows( #pragma omp parallel for \ default(none) \ shared(input, height, last, output, w, width) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int j = 0; j < height; ++j) { for (int i = kernel_size_2_; i < last; ++i) { @@ -340,7 +340,7 @@ PyramidalKLTTracker::convolveCols(const FloatImageConstPtr #pragma omp parallel for \ default(none) \ shared(input, h, height, last, output, width) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int i = 0; i < width; ++i) { for (int j = kernel_size_2_; j < last; ++j) { @@ -375,7 +375,7 @@ PyramidalKLTTracker::computePyramids( #pragma omp parallel for \ default(none) \ shared(input, tmp) \ - num_threads(threads_) + num_threads(num_threads_) // clang-format on for (int i = 0; i < static_cast(input->size()); ++i) (*tmp)[i] = intensity_((*input)[i]); diff --git a/tracking/include/pcl/tracking/kld_adaptive_particle_filter_omp.h b/tracking/include/pcl/tracking/kld_adaptive_particle_filter_omp.h index 5f479270f78..c8f0a0e8f87 100644 --- a/tracking/include/pcl/tracking/kld_adaptive_particle_filter_omp.h +++ b/tracking/include/pcl/tracking/kld_adaptive_particle_filter_omp.h @@ -66,27 +66,26 @@ class KLDAdaptiveParticleFilterOMPTracker using CloudCoherenceConstPtr = typename CloudCoherence::ConstPtr; /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value + * \param num_threads the number of hardware threads to use (0 sets the value * back to automatic) */ - KLDAdaptiveParticleFilterOMPTracker(unsigned int nr_threads = 0) + KLDAdaptiveParticleFilterOMPTracker(unsigned int num_threads = 0) : KLDAdaptiveParticleFilterTracker() { tracker_name_ = "KLDAdaptiveParticleFilterOMPTracker"; - setNumberOfThreads(nr_threads); + setNumberOfThreads(num_threads); } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value back to + * \param num_threads the number of hardware threads to use (0 sets the value back to * automatic) */ void - setNumberOfThreads(unsigned int nr_threads = 0); + setNumberOfThreads(unsigned int num_threads = 0); protected: - /** \brief The number of threads the scheduler should use. */ - unsigned int threads_; + using PCLBase::num_threads_; /** \brief weighting phase of particle filter method. calculate the likelihood of all * of the particles and set the weights. diff --git a/tracking/include/pcl/tracking/particle_filter_omp.h b/tracking/include/pcl/tracking/particle_filter_omp.h index 91bb1b271be..3c19a79027d 100644 --- a/tracking/include/pcl/tracking/particle_filter_omp.h +++ b/tracking/include/pcl/tracking/particle_filter_omp.h @@ -54,27 +54,26 @@ class ParticleFilterOMPTracker : public ParticleFilterTracker using CloudCoherenceConstPtr = typename CloudCoherence::ConstPtr; /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value + * \param num_threads the number of hardware threads to use (0 sets the value * back to automatic) */ - ParticleFilterOMPTracker(unsigned int nr_threads = 0) + ParticleFilterOMPTracker(unsigned int num_threads = 0) : ParticleFilterTracker() { tracker_name_ = "ParticleFilterOMPTracker"; - setNumberOfThreads(nr_threads); + setNumberOfThreads(num_threads); } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value + * \param num_threads the number of hardware threads to use (0 sets the value * back to automatic) */ void - setNumberOfThreads(unsigned int nr_threads = 0); + setNumberOfThreads(unsigned int num_threads = 0); protected: - /** \brief The number of threads the scheduler should use. */ - unsigned int threads_; + using PCLBase::num_threads_; /** \brief weighting phase of particle filter method. calculate the likelihood of all * of the particles and set the weights. diff --git a/tracking/include/pcl/tracking/pyramidal_klt.h b/tracking/include/pcl/tracking/pyramidal_klt.h index d5b44908355..008a9ca4120 100644 --- a/tracking/include/pcl/tracking/pyramidal_klt.h +++ b/tracking/include/pcl/tracking/pyramidal_klt.h @@ -85,7 +85,6 @@ class PyramidalKLTTracker : public Tracker { , nb_levels_(nb_levels) , track_width_(tracking_window_width) , track_height_(tracking_window_height) - , threads_(0) , initialized_(false) { tracker_name_ = "PyramidalKLTTracker"; @@ -203,13 +202,19 @@ class PyramidalKLTTracker : public Tracker { } /** \brief Initialize the scheduler and set the number of threads to use. - * \param nr_threads the number of hardware threads to use (0 sets the value + * \param num_threads the number of hardware threads to use (0 sets the value * back to automatic). */ inline void - setNumberOfThreads(unsigned int nr_threads = 0) + setNumberOfThreads(unsigned int num_threads = 0) { - threads_ = nr_threads; +#ifdef _OPENMP + num_threads_ = num_threads != 0 ? num_threads : omp_get_num_procs(); +#else + if (num_threads_ != 1) { + PCL_WARN("OpenMP is not available. Keeping number of threads unchanged at 1\n"); + } +#endif } /** \brief Get a pointer of the cloud at t-1. */ @@ -389,6 +394,8 @@ class PyramidalKLTTracker : public Tracker { void computeTracking() override; + using PCLBase::num_threads_; + /** \brief input pyranid at t-1 */ std::vector ref_pyramid_; /** \brief point cloud at t-1 */ @@ -417,8 +424,6 @@ class PyramidalKLTTracker : public Tracker { /** \brief epsilon for subpixel computation */ float epsilon_; float max_residue_; - /** \brief number of hardware threads */ - unsigned int threads_; /** \brief intensity accessor */ IntensityT intensity_; /** \brief is the tracker initialized ? */