-
-
Notifications
You must be signed in to change notification settings - Fork 448
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
5944: added adaptive_static_chunk_size r=hkaiser a=kmoham6 I added adaptive_static_chunk_size to executors. Co-authored-by: Karame Mohammadiporshokooh <[email protected]>
- Loading branch information
Showing
3 changed files
with
136 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
127 changes: 127 additions & 0 deletions
127
libs/core/execution/include/hpx/execution/executors/adaptive_static_chunk_size.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
// Copyright (c) 2007-2022 Hartmut Kaiser | ||
// Copyright (c) 2022 Karame M.Shokooh | ||
// | ||
// SPDX-License-Identifier: BSL-1.0 | ||
// Distributed under the Boost Software License, Version 1.0. (See accompanying | ||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | ||
|
||
/// \file parallel/executors/adaptive_static_chunk_size.hpp | ||
|
||
#pragma once | ||
|
||
#include <hpx/config.hpp> | ||
#include <hpx/execution_base/traits/is_executor_parameters.hpp> | ||
#include <hpx/serialization/serialize.hpp> | ||
|
||
#include <hpx/execution/executors/execution_parameters_fwd.hpp> | ||
|
||
#include <cmath> | ||
#include <cstddef> | ||
#include <iostream> | ||
#include <type_traits> | ||
|
||
namespace hpx { namespace execution { | ||
/////////////////////////////////////////////////////////////////////////// | ||
/// Loop iterations are divided into pieces of size \a chunk_size and then | ||
/// assigned to threads. If \a chunk_size is not specified, the iterations | ||
/// are evenly (if possible) divided contiguously among the threads. | ||
/// | ||
/// \note This executor parameters type is equivalent to OpenMP's STATIC | ||
/// scheduling directive. | ||
/// | ||
struct adaptive_static_chunk_size | ||
{ | ||
/// Construct a \a adaptive_static_chunk_size executor parameters object | ||
/// | ||
/// \note By default the number of loop iterations is determined from | ||
/// the number of available cores and the overall number of loop | ||
/// iterations to schedule. | ||
/// | ||
constexpr adaptive_static_chunk_size() noexcept | ||
: chunk_size_(0) | ||
{ | ||
} | ||
|
||
/// Construct a \a adaptive_static_chunk_size executor parameters object | ||
/// | ||
/// \param chunk_size [in] The optional chunk size to use as the | ||
/// number of loop iterations to run on a single | ||
/// thread. | ||
/// | ||
constexpr explicit adaptive_static_chunk_size( | ||
std::size_t chunk_size) noexcept | ||
: chunk_size_(chunk_size) | ||
{ | ||
} | ||
|
||
/// \cond NOINTERNAL | ||
template <typename Executor, typename F> | ||
std::size_t get_chunk_size( | ||
Executor& exec, F&&, std::size_t cores, std::size_t input_size) | ||
{ | ||
// Make sure the internal round robin counter of the executor is | ||
// reset | ||
parallel::execution::reset_thread_distribution(*this, exec); | ||
|
||
// use the given chunk size if given | ||
if (chunk_size_ != 0) | ||
{ | ||
return chunk_size_; | ||
} | ||
|
||
if (cores == 1) | ||
{ | ||
return input_size; | ||
} | ||
|
||
// Return a chunk size that is a power of two; and that leads to at | ||
// least 2 chunks per core, and at most 4 chunks per core. | ||
std::size_t chunk_size = 1; | ||
std::size_t coeff = 1; | ||
|
||
if (input_size >= (1ull << 25)) | ||
{ | ||
coeff = 8; | ||
} | ||
else if (input_size >= (1ull << 19)) | ||
{ | ||
coeff = 4; | ||
} | ||
while (chunk_size * coeff * cores < input_size) | ||
{ | ||
chunk_size *= 2; | ||
} | ||
|
||
return chunk_size; | ||
} | ||
/// \endcond | ||
|
||
private: | ||
/// \cond NOINTERNAL | ||
friend class hpx::serialization::access; | ||
|
||
template <typename Archive> | ||
void serialize(Archive& ar, const unsigned int /* version */) | ||
{ | ||
// clang-format off | ||
ar & chunk_size_; | ||
// clang-format on | ||
} | ||
/// \endcond | ||
|
||
private: | ||
/// \cond NOINTERNAL | ||
std::size_t chunk_size_; | ||
/// \endcond | ||
}; | ||
}} // namespace hpx::execution | ||
|
||
namespace hpx { namespace parallel { namespace execution { | ||
/// \cond NOINTERNAL | ||
template <> | ||
struct is_executor_parameters<hpx::execution::adaptive_static_chunk_size> | ||
: std::true_type | ||
{ | ||
}; | ||
/// \endcond | ||
}}} // namespace hpx::parallel::execution |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters