Skip to content

Commit

Permalink
Also pin benchmark threads in microbenchmarks (without runtime)
Browse files Browse the repository at this point in the history
  • Loading branch information
PeterTh committed Nov 20, 2024
1 parent 749bc6d commit 7622cd3
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/platform_specific/affinity.unix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <mutex>
#include <optional>
#include <unordered_map>
#include <vector>

#include <pthread.h>
#include <sched.h>
Expand Down
10 changes: 9 additions & 1 deletion test/dag_benchmarks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ struct bench_graph_node : intrusive_graph_node<bench_graph_node> {};

// try to cover the dependency counts we'll see in practice
TEMPLATE_TEST_CASE_SIG("benchmark intrusive graph dependency handling with N nodes", "[benchmark][group:graph-nodes]", ((int N), N), 1, 10, 100) {
test_utils::benchmark_thread_pinner pinner;
// note that bench_graph_nodes are created/destroyed *within* the BENCHMARK
// in the first two cases while the latter 2 cases only operate on already
// existing nodes -- this is intentional; both cases are relevant in practise
Expand Down Expand Up @@ -61,6 +62,7 @@ TEMPLATE_TEST_CASE_SIG("benchmark intrusive graph dependency handling with N nod
}

TEST_CASE("benchmark task handling", "[benchmark][group:task-graph]") {
test_utils::benchmark_thread_pinner pinner;
constexpr int N = 10000;
constexpr int report_interval = 10;

Expand Down Expand Up @@ -97,7 +99,6 @@ static constexpr instruction_graph_generator::policy_set benchmark_instruction_g
/* overlapping_write_error */ CELERITY_ACCESS_PATTERN_DIAGNOSTICS ? error_policy::panic : error_policy::ignore,
};


struct task_manager_benchmark_context {
const size_t num_nodes = 1;
task_graph tdag;
Expand Down Expand Up @@ -242,6 +243,8 @@ class restartable_thread {
std::thread m_thread{&restartable_thread::main, this};

void main() {
// This thread is used for scheduling, so pin it to the scheduler core
detail::thread_pinning::pin_this_thread(detail::thread_pinning::thread_type::scheduler);
std::unique_lock lk{m_mutex};
for(;;) {
m_update.wait(lk, [this] { return !std::holds_alternative<empty>(m_next); });
Expand Down Expand Up @@ -470,28 +473,33 @@ void run_benchmarks(BenchmarkContextFactory&& make_ctx) {
}

TEST_CASE("generating large task graphs", "[benchmark][group:task-graph]") {
test_utils::benchmark_thread_pinner pinner;
run_benchmarks([] { return task_manager_benchmark_context{}; });
}

TEMPLATE_TEST_CASE_SIG("generating large command graphs for N nodes", "[benchmark][group:command-graph]", ((size_t NumNodes), NumNodes), 1, 4, 16) {
test_utils::benchmark_thread_pinner pinner;
run_benchmarks([] { return command_graph_generator_benchmark_context{NumNodes}; });
}

TEMPLATE_TEST_CASE_SIG(
"generating large instruction graphs for N devices", "[benchmark][group:instruction-graph]", ((size_t NumDevices), NumDevices), 1, 4, 16) {
test_utils::benchmark_thread_pinner pinner;
constexpr static size_t num_nodes = 2;
run_benchmarks([] { return instruction_graph_generator_benchmark_context(num_nodes, NumDevices); });
}

TEMPLATE_TEST_CASE_SIG("generating large instruction graphs for N devices without d2d copy support", "[benchmark][group:instruction-graph]",
((size_t NumDevices), NumDevices), 1, 4, 16) {
test_utils::benchmark_thread_pinner pinner;
constexpr static size_t num_nodes = 2;
run_benchmarks([] { return instruction_graph_generator_benchmark_context(num_nodes, NumDevices, false /* supports_d2d_copies */); });
}

TEMPLATE_TEST_CASE_SIG("building command- and instruction graphs in a dedicated scheduler thread for N nodes", "[benchmark][group:scheduler]",
((size_t NumNodes), NumNodes), 1, 4) //
{
test_utils::benchmark_thread_pinner pinner;
constexpr static size_t num_devices = 1;
SECTION("reference: single-threaded immediate graph generation") {
run_benchmarks([&] { return command_graph_generator_benchmark_context(NumNodes); });
Expand Down
6 changes: 6 additions & 0 deletions test/grid_benchmarks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ using namespace celerity;
using namespace celerity::detail;

TEST_CASE("normalizing randomized box sets - 2d", "[benchmark][group:grid]") {
test_utils::benchmark_thread_pinner pinner;
const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values<std::tuple<const char*, size_t, size_t, size_t>>({
{"small", 10, 5, 4},
{"medium", 50, 1, 50},
Expand All @@ -33,6 +34,7 @@ TEST_CASE("normalizing randomized box sets - 2d", "[benchmark][group:grid]") {
}

TEST_CASE("normalizing randomized box sets - 3d", "[benchmark][group:grid]") {
test_utils::benchmark_thread_pinner pinner;
const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values<std::tuple<const char*, size_t, size_t, size_t>>({
{"small", 10, 5, 4},
{"medium", 50, 1, 50},
Expand Down Expand Up @@ -66,6 +68,7 @@ box_vector<Dims> create_box_tiling(const size_t n_per_side) {
}

TEMPLATE_TEST_CASE_SIG("normalizing a fully mergeable tiling of boxes", "[benchmark][group:grid]", ((int Dims), Dims), 1, 2, 3) {
test_utils::benchmark_thread_pinner pinner;
const auto [label, n] = GENERATE(values<std::tuple<const char*, size_t>>({
{"small", 4},
{"medium", 50},
Expand All @@ -92,6 +95,7 @@ TEMPLATE_TEST_CASE_SIG("normalizing a fully mergeable tiling of boxes", "[benchm
}

TEST_CASE("performing set operations between randomized regions - 2d", "[benchmark][group:grid]") {
test_utils::benchmark_thread_pinner pinner;
const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values<std::tuple<const char*, size_t, size_t, size_t>>({
{"small", 10, 5, 4},
{"medium", 50, 1, 50},
Expand Down Expand Up @@ -129,6 +133,7 @@ TEST_CASE("performing set operations between randomized regions - 2d", "[benchma
}

TEST_CASE("performing set operations between randomized regions - 3d", "[benchmark][group:grid]") {
test_utils::benchmark_thread_pinner pinner;
const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values<std::tuple<const char*, size_t, size_t, size_t>>({
{"small", 10, 5, 4},
{"medium", 50, 1, 50},
Expand Down Expand Up @@ -158,6 +163,7 @@ box_vector<2> create_interlocking_boxes(const size_t num_boxes_per_side) {
}

TEST_CASE("normalizing a fully mergeable, complex tiling of boxes - 2d", "[benchmark][group:grid]") {
test_utils::benchmark_thread_pinner pinner;
const auto [label, n] = GENERATE(values<std::tuple<const char*, size_t>>({
{"small", 10},
{"large", 200},
Expand Down
14 changes: 14 additions & 0 deletions test/test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,20 @@ namespace detail {

namespace test_utils {

// Pin the benchmark threads (even in absence of a runtime) for more consistent results
struct benchmark_thread_pinner {
benchmark_thread_pinner() {
const detail::thread_pinning::runtime_configuration cfg{
.enabled = true,
.use_backend_device_submission_threads = false,
};
m_thread_pinner.emplace(cfg);
detail::thread_pinning::pin_this_thread(detail::thread_pinning::thread_type::application);
}

std::optional<detail::thread_pinning::thread_pinner> m_thread_pinner;
};

inline const detail::task* find_task(const detail::task_graph& tdag, const detail::task_id tid) {
return detail::graph_testspy::find_node_if(tdag, [tid](const detail::task& tsk) { return tsk.get_id() == tid; });
}
Expand Down

0 comments on commit 7622cd3

Please sign in to comment.