From 3fe7ae2a1178d28f44aba7cfc5d4ae2af98be14c Mon Sep 17 00:00:00 2001 From: Gabriel Mitterrutzner Date: Mon, 18 Dec 2023 16:11:42 +0100 Subject: [PATCH] Revision: add celerity blockchain for task divergence checking --- CHANGELOG.md | 2 +- CMakeLists.txt | 8 +- docs/pitfalls.md | 4 + ...nce_block_chain.h => divergence_checker.h} | 27 ++-- include/grid.h | 2 +- include/mpi_communicator.h | 7 +- include/ranges.h | 2 +- include/recorders.h | 9 +- include/runtime.h | 4 +- src/config.cc | 5 + ...e_block_chain.cc => divergence_checker.cc} | 122 +++++++++--------- src/runtime.cc | 37 ++++-- test/CMakeLists.txt | 2 +- ...tils.h => divergence_checker_test_utils.h} | 7 +- ...k_tests.cc => divergence_checker_tests.cc} | 59 ++++----- test/system/distr_tests.cc | 14 +- test/test_utils.h | 2 +- 17 files changed, 163 insertions(+), 150 deletions(-) rename include/{divergence_block_chain.h => divergence_checker.h} (90%) rename src/{divergence_block_chain.cc => divergence_checker.cc} (79%) rename test/{divergence_check_test_utils.h => divergence_checker_test_utils.h} (95%) rename test/{divergence_check_tests.cc => divergence_checker_tests.cc} (81%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40a89e5dd..2cf8d1310 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ Versioning](http://semver.org/spec/v2.0.0.html). - Introduce new experimental `for_each_item` utility to iterate over a celerity range (#199) - Add new environment variables `CELERITY_HORIZON_STEP` and `CELERITY_HORIZON_MAX_PARALLELISM` to control Horizon generation (#199) - Add new `experimental::constrain_split` API to limit how a kernel can be split (#?) -- Add divergence check blockchain for automatic detection of diverging tasks in debug mode (#217) +- Add automatic detection of diverging execution in debug mode (#217) - `distr_queue::fence` and `buffer_snapshot` are now stable, subsuming the `experimental::` APIs of the same name (#225) - Celerity now warns at runtime when a task declares reads from uninitialized buffers or writes with overlapping ranges between nodes (#224) - Introduce new `experimental::hint` API for providing the runtime with additional information on how to execute a task (#227) diff --git a/CMakeLists.txt b/CMakeLists.txt index 55d30ec85..d84968ea7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,11 +23,16 @@ endif() option(CELERITY_ACCESS_PATTERN_DIAGNOSTICS "Diagnose uninitialized reads and overlapping writes" ${DEFAULT_ENABLE_DEBUG_CHECKS}) option(CELERITY_ACCESSOR_BOUNDARY_CHECK "Enable accessor boundary check" ${DEFAULT_ENABLE_DEBUG_CHECKS}) +option(CELERITY_DIVERGENCE_CHECK "Enable divergence check" ${DEFAULT_ENABLE_DEBUG_CHECKS}) if(CELERITY_ACCESSOR_BOUNDARY_CHECK AND NOT (CMAKE_BUILD_TYPE STREQUAL "Debug")) message(STATUS "Accessor boundary check enabled - this will impact kernel performance") endif() +if(CELERITY_DIVERGENCE_CHECK AND NOT (CMAKE_BUILD_TYPE STREQUAL "Debug")) + message(STATUS "Divergence checker enabled - this will impact the overall performance") +endif() + set(CELERITY_CMAKE_DIR "${PROJECT_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}" "${CELERITY_CMAKE_DIR}") find_package(MPI 2.0 REQUIRED) @@ -186,7 +191,7 @@ set(SOURCES src/command_graph.cc src/config.cc src/device_queue.cc - src/divergence_block_chain.cc + src/divergence_checker.cc src/executor.cc src/distributed_graph_generator.cc src/graph_serializer.cc @@ -289,6 +294,7 @@ target_compile_definitions(celerity_runtime PUBLIC CELERITY_FEATURE_UNNAMED_KERNELS=$ CELERITY_DETAIL_HAS_NAMED_THREADS=$ CELERITY_ACCESSOR_BOUNDARY_CHECK=$ + CELERITY_DIVERGENCE_CHECK=$ CELERITY_ACCESS_PATTERN_DIAGNOSTICS=$ ) diff --git a/docs/pitfalls.md b/docs/pitfalls.md index 94da5df46..8fed84f8e 100644 --- a/docs/pitfalls.md +++ b/docs/pitfalls.md @@ -132,3 +132,7 @@ if(rand() > 1337) { celerity::buffer my_buffer(...); } ``` + +> Diverging Host-Execution can be detected at runtime by enabling the +> `CELERITY_DIVERGENCE_CHECK` CMake option at the cost of some runtime +> overhead (enabled by default in debug builds). diff --git a/include/divergence_block_chain.h b/include/divergence_checker.h similarity index 90% rename from include/divergence_block_chain.h rename to include/divergence_checker.h index 02b367c4c..a9950460b 100644 --- a/include/divergence_block_chain.h +++ b/include/divergence_checker.h @@ -7,10 +7,6 @@ #include "communicator.h" #include "recorders.h" -namespace celerity::detail { -struct runtime_testspy; -} - namespace celerity::detail::divergence_checker_detail { using task_hash = size_t; using divergence_map = std::unordered_map>; @@ -72,29 +68,32 @@ class divergence_block_chain { std::mutex m_task_records_mutex; std::chrono::time_point m_last_cleared = std::chrono::steady_clock::now(); + std::chrono::seconds m_time_of_last_warning = std::chrono::seconds(0); std::unique_ptr m_communicator; - void divergence_out(const divergence_map& check_map, const int task_num); + void reprot_divergence(const divergence_map& check_map, const int task_num); void add_new_hashes(); void clear(const int min_progress); std::pair collect_hash_counts(); per_node_task_hashes collect_hashes(const int min_hash_count) const; - divergence_map create_check_map(const per_node_task_hashes& task_hashes, const int task_num) const; + divergence_map create_divergence_map(const per_node_task_hashes& task_hashes, const int task_num) const; - void check_for_deadlock() const; + void check_for_deadlock(); - static void log_node_divergences(const divergence_map& check_map, const int task_num); + static void log_node_divergences(const divergence_map& check_map, const int task_id); static void log_task_record(const divergence_map& check_map, const task_record& task, const task_hash hash); void log_task_record_once(const divergence_map& check_map, const int task_num); void add_new_task(const task_record& task); task_record thread_save_get_task_record(const size_t task_num); }; +}; // namespace celerity::detail::divergence_checker_detail +namespace celerity::detail { class divergence_checker { - friend struct ::celerity::detail::runtime_testspy; + friend struct runtime_testspy; public: divergence_checker(task_recorder& task_recorder, std::unique_ptr comm, bool test_mode = false) @@ -111,6 +110,10 @@ class divergence_checker { ~divergence_checker() { stop(); } private: + std::thread m_thread; + bool m_is_running = false; + divergence_checker_detail::divergence_block_chain m_block_chain; + void start() { m_thread = std::thread(&divergence_checker::run, this); m_is_running = true; @@ -129,9 +132,5 @@ class divergence_checker { std::this_thread::sleep_for(std::chrono::milliseconds(100)); } } - - std::thread m_thread; - bool m_is_running = false; - divergence_block_chain m_block_chain; }; -}; // namespace celerity::detail::divergence_checker_detail +}; diff --git a/include/grid.h b/include/grid.h index 9eb2ca29a..6563fa6f3 100644 --- a/include/grid.h +++ b/include/grid.h @@ -271,7 +271,7 @@ template struct std::hash> { std::size_t operator()(const celerity::detail::region r) { std::size_t seed = 0; - for(auto box : r.get_boxes()) { + for(auto& box : r.get_boxes()) { celerity::detail::utils::hash_combine(seed, std::hash>{}(box)); } return seed; diff --git a/include/mpi_communicator.h b/include/mpi_communicator.h index 5b6da8d94..912e73fa3 100644 --- a/include/mpi_communicator.h +++ b/include/mpi_communicator.h @@ -1,4 +1,5 @@ #pragma once + #include #include @@ -11,6 +12,8 @@ class mpi_communicator : public communicator { mpi_communicator(MPI_Comm comm) : m_comm(comm) {} private: + MPI_Comm m_comm; + void allgather_inplace_impl(std::byte* sendrecvbuf, const int sendrecvcount) override { MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, sendrecvbuf, sendrecvcount, MPI_BYTE, m_comm); }; @@ -32,7 +35,5 @@ class mpi_communicator : public communicator { MPI_Comm_rank(m_comm, &rank); return static_cast(rank); } - - MPI_Comm m_comm; }; -} // namespace celerity::detail \ No newline at end of file +} // namespace celerity::detail diff --git a/include/ranges.h b/include/ranges.h index 5a62a7c74..6f24fb3fb 100644 --- a/include/ranges.h +++ b/include/ranges.h @@ -235,7 +235,7 @@ struct std::hash> { std::size_t operator()(const celerity::detail::coordinate& r) const noexcept { std::size_t seed = 0; for(int i = 0; i < Dims; ++i) { - celerity::detail::utils::hash_combine(seed, std::hash{}(r[i])); + celerity::detail::utils::hash_combine(seed, std::hash{}(r[i])); } return seed; }; diff --git a/include/recorders.h b/include/recorders.h index a11938d0d..8eb457987 100644 --- a/include/recorders.h +++ b/include/recorders.h @@ -61,7 +61,6 @@ class task_recorder { void record_task(const task& tsk); void add_callback(task_callback callback); - void invoke_callbacks(const task_record& tsk) const; const task_records& get_tasks() const { return m_recorded_tasks; } @@ -69,6 +68,8 @@ class task_recorder { task_records m_recorded_tasks; std::vector m_callbacks{}; const buffer_manager* m_buff_mngr; + + void invoke_callbacks(const task_record& tsk) const; }; // Command recording @@ -104,16 +105,16 @@ struct command_record { class command_recorder { public: - using command_record = std::vector; + using command_records = std::vector; command_recorder(const task_manager* task_mngr, const buffer_manager* buff_mngr = nullptr) : m_task_mngr(task_mngr), m_buff_mngr(buff_mngr) {} void record_command(const abstract_command& com); - const command_record& get_commands() const { return m_recorded_commands; } + const command_records& get_commands() const { return m_recorded_commands; } private: - command_record m_recorded_commands; + command_records m_recorded_commands; const task_manager* m_task_mngr; const buffer_manager* m_buff_mngr; }; diff --git a/include/runtime.h b/include/runtime.h index 2e6ac8be6..481315578 100644 --- a/include/runtime.h +++ b/include/runtime.h @@ -7,7 +7,7 @@ #include "command.h" #include "config.h" #include "device_queue.h" -#include "divergence_block_chain.h" +#include "divergence_checker.h" #include "frame.h" #include "host_queue.h" #include "recorders.h" @@ -102,7 +102,7 @@ namespace detail { size_t m_num_nodes; node_id m_local_nid; - std::unique_ptr m_divergence_check; + std::unique_ptr m_divergence_check; // These management classes are only constructed on the master node. std::unique_ptr m_cdag; diff --git a/src/config.cc b/src/config.cc index 16192909c..5475e9438 100644 --- a/src/config.cc +++ b/src/config.cc @@ -201,7 +201,12 @@ namespace detail { const auto has_dry_run_nodes = parsed_and_validated_envs.get(env_dry_run_nodes); if(has_dry_run_nodes) { m_dry_run_nodes = *has_dry_run_nodes; } +#if CELERITY_DIVERGENCE_CHECK + // divergence checker needs recording + m_recording = true; +#else m_recording = parsed_and_validated_envs.get_or(env_recording, false); +#endif m_horizon_step = parsed_and_validated_envs.get(env_horizon_step); m_horizon_max_parallelism = parsed_and_validated_envs.get(env_horizon_max_para); diff --git a/src/divergence_block_chain.cc b/src/divergence_checker.cc similarity index 79% rename from src/divergence_block_chain.cc rename to src/divergence_checker.cc index 7bb2059da..fdca4fb40 100644 --- a/src/divergence_block_chain.cc +++ b/src/divergence_checker.cc @@ -1,6 +1,46 @@ -#include "divergence_block_chain.h" +#include "divergence_checker.h" namespace celerity::detail::divergence_checker_detail { +bool divergence_block_chain::check_for_divergence() { + add_new_hashes(); + + const auto [min_hash_count, max_hash_count] = collect_hash_counts(); + + if(min_hash_count == 0) { + if(max_hash_count != 0 && m_local_nid == 0) { + check_for_deadlock(); + } else if(max_hash_count == 0) { + return true; + } + return false; + } + + const per_node_task_hashes task_graphs = collect_hashes(min_hash_count); + + for(int j = 0; j < min_hash_count; ++j) { + const divergence_map check_map = create_divergence_map(task_graphs, j); + + // If there is more than one hash for this task, we have a divergence! + if(check_map.size() > 1) { reprot_divergence(check_map, j); } + } + + clear(min_hash_count); + + return false; +} + +void divergence_block_chain::reprot_divergence(const divergence_map& check_map, const int task_num) { + if(m_local_nid == 0) { log_node_divergences(check_map, task_num + static_cast(m_tasks_checked) + 1); } + + // sleep for local_nid * 100 ms such that we have a no lock synchronized output + std::this_thread::sleep_for(std::chrono::milliseconds(m_local_nid * 100)); + + log_task_record_once(check_map, task_num); + + m_communicator->barrier(); + + throw std::runtime_error("Divergence in task graph detected"); +} void divergence_block_chain::add_new_hashes() { std::lock_guard lock(m_task_records_mutex); @@ -38,36 +78,36 @@ per_node_task_hashes divergence_block_chain::collect_hashes(const int min_hash_c } -divergence_map divergence_block_chain::create_check_map(const per_node_task_hashes& task_hashes, const int task_num) const { +divergence_map divergence_block_chain::create_divergence_map(const per_node_task_hashes& task_hashes, const int task_num) const { divergence_map check_map; - for(size_t i = 0; i < m_num_nodes; ++i) { - check_map[task_hashes(i, task_num)].push_back(i); + for(node_id nid = 0; nid < m_num_nodes; ++nid) { + check_map[task_hashes(nid, task_num)].push_back(nid); } return check_map; } -void divergence_block_chain::check_for_deadlock() const { +void divergence_block_chain::check_for_deadlock() { auto diff = std::chrono::duration_cast(std::chrono::steady_clock::now() - m_last_cleared); - static auto last = std::chrono::seconds(0); - if(diff >= std::chrono::seconds(10) && diff - last >= std::chrono::seconds(5)) { - std::string warning = fmt::format("After {} seconds of waiting nodes", diff.count()); + if(diff >= std::chrono::seconds(10) && diff - m_time_of_last_warning >= std::chrono::seconds(5)) { + std::string warning = fmt::format("After {} seconds of waiting, node(s)", diff.count()); - for(size_t i = 0; i < m_num_nodes; ++i) { - if(m_per_node_hash_counts[i] == 0) { warning += fmt::format(" {},", i); } + std::vector stuck_nodes; + for(node_id nid = 0; nid < m_num_nodes; ++nid) { + if(m_per_node_hash_counts[nid] == 0) stuck_nodes.push_back(nid); } - - warning += " did not move to the next task. The runtime might be stuck."; + warning += fmt::format(" {} ", fmt::join(stuck_nodes, ",")); + warning += "did not move to the next task. The runtime might be stuck."; CELERITY_WARN("{}", warning); - last = diff; + m_time_of_last_warning = diff; } } -void divergence_block_chain::log_node_divergences(const divergence_map& check_map, const int task_num) { - std::string error = fmt::format("Divergence detected in task graph at index {}:\n\n", task_num); +void divergence_block_chain::log_node_divergences(const divergence_map& check_map, const int task_id) { + std::string error = fmt::format("Divergence detected. Task Nr {} diverges on nodes:\n\n", task_id); for(auto& [hash, nodes] : check_map) { - error += fmt::format("{:#x} on nodes ", hash); + error += fmt::format("Following task-hash {:#x} resulted on {} ", hash, nodes.size() > 1 ? "nodes" : "node "); for(auto& node : nodes) { error += fmt::format("{} ", node); } @@ -115,11 +155,6 @@ void divergence_block_chain::log_task_record(const divergence_map& check_map, co CELERITY_ERROR("{}", task_record_output); } -task_record divergence_block_chain::thread_save_get_task_record(const size_t task_num) { - std::lock_guard lock(m_task_records_mutex); - return m_task_records[task_num]; -} - void divergence_block_chain::log_task_record_once(const divergence_map& check_map, const int task_num) { for(auto& [hash, nodes] : check_map) { if(nodes[0] == m_local_nid) { @@ -129,49 +164,14 @@ void divergence_block_chain::log_task_record_once(const divergence_map& check_ma } } -bool divergence_block_chain::check_for_divergence() { - add_new_hashes(); - - const auto [min_hash_count, max_hash_count] = collect_hash_counts(); - - if(min_hash_count == 0) { - if(max_hash_count != 0 && m_local_nid == 0) { - check_for_deadlock(); - } else if(max_hash_count == 0) { - return true; - } - return false; - } - - const per_node_task_hashes task_graphs = collect_hashes(min_hash_count); - - for(int j = 0; j < min_hash_count; ++j) { - const divergence_map check_map = create_check_map(task_graphs, j); - - if(check_map.size() > 1) { divergence_out(check_map, j); } - } - - clear(min_hash_count); - - return false; -} - -void divergence_block_chain::divergence_out(const divergence_map& check_map, const int task_num) { - if(m_local_nid == 0) { log_node_divergences(check_map, task_num); } - - // sleep for local_nid * 100 ms such that we have a no lock synchronized output - std::this_thread::sleep_for(std::chrono::milliseconds(m_local_nid * 100)); - - log_task_record_once(check_map, task_num); - - m_communicator->barrier(); - - throw std::runtime_error("Divergence in task graph detected"); -} - void divergence_block_chain::add_new_task(const task_record& task) { // std::lock_guard lock(m_task_records_mutex); // make copy of task record so that we can access it later m_task_records.emplace_back(task); } + +task_record divergence_block_chain::thread_save_get_task_record(const size_t task_num) { + std::lock_guard lock(m_task_records_mutex); + return m_task_records[task_num]; +} } // namespace celerity::detail::divergence_checker_detail diff --git a/src/runtime.cc b/src/runtime.cc index 04b6fc44f..c04698afc 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -177,12 +177,19 @@ namespace detail { m_schdlr = std::make_unique(is_dry_run(), std::move(dggen), *m_exec); m_task_mngr->register_task_callback([this](const task* tsk) { m_schdlr->notify_task_created(tsk); }); - if(m_cfg->is_recording()) { - MPI_Comm comm = nullptr; - MPI_Comm_dup(MPI_COMM_WORLD, &comm); - m_divergence_check = - std::make_unique(*m_task_recorder, std::make_unique(comm), m_test_mode); - } + +#if CELERITY_DIVERGENCE_CHECK + MPI_Comm comm = nullptr; + MPI_Comm_dup(MPI_COMM_WORLD, &comm); + m_divergence_check = + std::make_unique(*m_task_recorder, std::make_unique(comm), m_test_mode); +#endif + // if (m_cfg->is_recording()) { + // MPI_Comm comm = nullptr; + // MPI_Comm_dup(MPI_COMM_WORLD, &comm); + // m_divergence_check = + // std::make_unique(*m_task_recorder, std::make_unique(comm), m_test_mode); + // } CELERITY_INFO("Celerity runtime version {} running on {}. PID = {}, build type = {}, {}", get_version_string(), get_sycl_version(), get_pid(), get_build_type(), get_mimalloc_string()); @@ -247,15 +254,19 @@ namespace detail { } } - if(m_divergence_check != nullptr) { - // Sychronize all nodes before reseting shuch that we don't get into a deadlock - MPI_Barrier(MPI_COMM_WORLD); - m_divergence_check.reset(); - } else { - CELERITY_WARN("Divergence block chain not initialized"); - } + // // Sychronize all nodes before reseting shuch that we don't get into a deadlock + // // With this barrier we can be shure that every node is fully finished and all mpi operations are done. (Sending ...) + // MPI_Barrier(MPI_COMM_WORLD); + // m_divergence_check.reset(); } +#if CELERITY_DIVERGENCE_CHECK + // Sychronize all nodes before reseting shuch that we don't get into a deadlock + // With this barrier we can be shure that every node is fully finished and all mpi operations are done. (Sending ...) + MPI_Barrier(MPI_COMM_WORLD); + m_divergence_check.reset(); +#endif + // Shutting down the task_manager will cause all buffers captured inside command group functions to unregister. // Since we check whether the runtime is still active upon unregistering, we have to set this to false first. m_is_active = false; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8f70353d7..6e1bba87a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -48,7 +48,7 @@ set(TEST_TARGETS test_utils_tests utils_tests device_selection_tests - divergence_check_tests + divergence_checker_tests ) add_library(test_main test_main.cc grid_test_utils.cc) diff --git a/test/divergence_check_test_utils.h b/test/divergence_checker_test_utils.h similarity index 95% rename from test/divergence_check_test_utils.h rename to test/divergence_checker_test_utils.h index 05dc9732f..d8ce91743 100644 --- a/test/divergence_check_test_utils.h +++ b/test/divergence_checker_test_utils.h @@ -1,13 +1,12 @@ #pragma once -#include "divergence_block_chain.h" +#include "divergence_checker.h" using namespace celerity; using namespace celerity::detail; using namespace celerity::detail::divergence_checker_detail; -namespace celerity::detail::divergence_checker_detail { -struct divergence_block_chain_testspy { +struct divergence_checker_detail::divergence_block_chain_testspy { static per_node_task_hashes pre_check(divergence_block_chain& div_test, const int max_size) { div_test.add_new_hashes(); div_test.collect_hash_counts(); @@ -38,7 +37,6 @@ struct divergence_block_chain_testspy { static void set_last_cleared(divergence_block_chain& div_test, std::chrono::time_point time) { div_test.m_last_cleared = time; } }; -} // namespace celerity::detail::divergence_checker_detail namespace celerity::test_utils { // Note: this is only a simulator for this specific case. In the general case, we should use something more sophisticated for tracking the allgather @@ -137,4 +135,5 @@ class divergence_test_communicator_provider { tracker m_inplace_data{m_num_nodes}; tracker m_gather_data{m_num_nodes}; }; + } // namespace celerity::test_utils diff --git a/test/divergence_check_tests.cc b/test/divergence_checker_tests.cc similarity index 81% rename from test/divergence_check_tests.cc rename to test/divergence_checker_tests.cc index 5a84da125..58f78b3ee 100644 --- a/test/divergence_check_tests.cc +++ b/test/divergence_checker_tests.cc @@ -4,7 +4,7 @@ #include -#include "divergence_check_test_utils.h" +#include "divergence_checker_test_utils.h" #include "log_test_utils.h" #include "test_utils.h" @@ -13,10 +13,9 @@ using namespace celerity; using namespace celerity::detail; using namespace celerity::test_utils; using celerity::access::fixed; +using celerity::access_mode; TEST_CASE("test diverged task execution on device tasks", "[divergence]") { - using namespace cl::sycl::access; - test_utils::task_test_context tt = test_utils::task_test_context{}; test_utils::task_test_context tt_two = test_utils::task_test_context{}; @@ -28,9 +27,9 @@ TEST_CASE("test diverged task execution on device tasks", "[divergence]") { auto buf = tt.mbf.create_buffer(range<1>(128)); auto buf_two = tt_two.mbf.create_buffer(range<1>(128)); - test_utils::add_compute_task(tt.tm, [&](handler& cgh) { buf.get_access(cgh, fixed<1>{{0, 64}}); }); - test_utils::add_compute_task(tt.tm, [&](handler& cgh) { buf.get_access(cgh, fixed<1>{{0, 128}}); }); - test_utils::add_compute_task(tt_two.tm, [&](handler& cgh) { buf_two.get_access(cgh, fixed<1>{{64, 128}}); }); + test_utils::add_compute_task(tt.tm, [&](handler& cgh) { buf.get_access(cgh, fixed<1>{{0, 64}}); }); + test_utils::add_compute_task(tt.tm, [&](handler& cgh) { buf.get_access(cgh, fixed<1>{{0, 128}}); }); + test_utils::add_compute_task(tt_two.tm, [&](handler& cgh) { buf_two.get_access(cgh, fixed<1>{{64, 128}}); }); test_utils::log_capture log_capture; @@ -40,8 +39,6 @@ TEST_CASE("test diverged task execution on device tasks", "[divergence]") { } TEST_CASE("test divergence free task execution on device", "[divergence]") { - using namespace cl::sycl::access; - auto tt = test_utils::task_test_context{}; auto tt_two = test_utils::task_test_context{}; @@ -57,13 +54,13 @@ TEST_CASE("test divergence free task execution on device", "[divergence]") { test_utils::add_compute_task(tt.tm, [&](handler& cgh) { // manually set the name because SYCL needs the class tag to be unique making the default name different. celerity::debug::set_task_name(cgh, "task_a"); - buf.get_access(cgh, fixed<1>{{0, 64}}); + buf.get_access(cgh, fixed<1>{{0, 64}}); }); test_utils::add_compute_task(tt_two.tm, [&](handler& cgh) { // manually set the name because SYCL needs the class tag to be unique making the default name different. celerity::debug::set_task_name(cgh, "task_a"); - buf_two.get_access(cgh, fixed<1>{{0, 64}}); + buf_two.get_access(cgh, fixed<1>{{0, 64}}); }); test_utils::log_capture log_capture; @@ -75,8 +72,6 @@ TEST_CASE("test divergence free task execution on device", "[divergence]") { } TEST_CASE("test diverged task execution on host task", "[divergence]") { - using namespace cl::sycl::access; - auto tt = test_utils::task_test_context{}; auto tt_two = test_utils::task_test_context{}; @@ -88,9 +83,9 @@ TEST_CASE("test diverged task execution on host task", "[divergence]") { auto buf = tt.mbf.create_buffer(range<1>(128)); auto buf_two = tt_two.mbf.create_buffer(range<1>(128)); - test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf.get_access(cgh, fixed<1>({0, 128})); }); - test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf.get_access(cgh, fixed<1>({64, 128})); }); - test_utils::add_host_task(tt_two.tm, on_master_node, [&](handler& cgh) { buf_two.get_access(cgh, fixed<1>({64, 128})); }); + test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf.get_access(cgh, fixed<1>({0, 128})); }); + test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf.get_access(cgh, fixed<1>({64, 128})); }); + test_utils::add_host_task(tt_two.tm, on_master_node, [&](handler& cgh) { buf_two.get_access(cgh, fixed<1>({64, 128})); }); test_utils::log_capture log_capture; @@ -100,8 +95,6 @@ TEST_CASE("test diverged task execution on host task", "[divergence]") { } TEST_CASE("test divergence free task execution on host task", "[divergence]") { - using namespace cl::sycl::access; - auto tt = test_utils::task_test_context{}; auto tt_two = test_utils::task_test_context{}; @@ -114,11 +107,11 @@ TEST_CASE("test divergence free task execution on host task", "[divergence]") { auto buf = tt.mbf.create_buffer(range<1>(128)); auto buf_two = tt_two.mbf.create_buffer(range<1>(128)); - test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf.get_access(cgh, fixed<1>({0, 128})); }); - test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf.get_access(cgh, fixed<1>({64, 128})); }); + test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf.get_access(cgh, fixed<1>({0, 128})); }); + test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf.get_access(cgh, fixed<1>({64, 128})); }); - test_utils::add_host_task(tt_two.tm, on_master_node, [&](handler& cgh) { buf_two.get_access(cgh, fixed<1>({0, 128})); }); - test_utils::add_host_task(tt_two.tm, on_master_node, [&](handler& cgh) { buf_two.get_access(cgh, fixed<1>({64, 128})); }); + test_utils::add_host_task(tt_two.tm, on_master_node, [&](handler& cgh) { buf_two.get_access(cgh, fixed<1>({0, 128})); }); + test_utils::add_host_task(tt_two.tm, on_master_node, [&](handler& cgh) { buf_two.get_access(cgh, fixed<1>({64, 128})); }); test_utils::log_capture log_capture; @@ -128,9 +121,7 @@ TEST_CASE("test divergence free task execution on host task", "[divergence]") { } } -TEST_CASE("test divergence warning for tasks that are stale longer than 10 seconds", "[divergence]") { - using namespace cl::sycl::access; - +TEST_CASE("test deadlock warning for tasks that are stale longer than 10 seconds", "[divergence]") { auto tt = test_utils::task_test_context{}; auto tt_two = test_utils::task_test_context{}; @@ -141,7 +132,7 @@ TEST_CASE("test divergence warning for tasks that are stale longer than 10 secon auto buf = tt.mbf.create_buffer(range<1>(128)); - test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf.get_access(cgh, fixed<1>({0, 128})); }); + test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf.get_access(cgh, fixed<1>({0, 128})); }); test_utils::log_capture log_capture; @@ -151,7 +142,7 @@ TEST_CASE("test divergence warning for tasks that are stale longer than 10 secon divergence_block_chain_testspy::call_check_for_divergence_with_pre_post(div_tests); CHECK_THAT(log_capture.get_log(), - Catch::Matchers::ContainsSubstring("After 10 seconds of waiting nodes 1, did not move to the next task. The runtime might be stuck.")); + Catch::Matchers::ContainsSubstring("After 10 seconds of waiting, node(s) 1 did not move to the next task. The runtime might be stuck.")); } size_t get_hash(const std::vector& tasks, size_t start, size_t end) { @@ -163,8 +154,6 @@ size_t get_hash(const std::vector& tasks, size_t start, size_t end) } TEST_CASE("test correct output of 3 different divergent tasks", "[divergence]") { - using namespace cl::sycl::access; - auto tt = test_utils::task_test_context{}; auto tt_two = test_utils::task_test_context{}; auto tt_three = test_utils::task_test_context{}; @@ -181,25 +170,25 @@ TEST_CASE("test correct output of 3 different divergent tasks", "[divergence]") test_utils::add_compute_task(tt.tm, [&](handler& cgh) { celerity::debug::set_task_name(cgh, "task_a"); - buf.get_access(cgh, fixed<1>{{0, 64}}); + buf.get_access(cgh, fixed<1>{{0, 64}}); }); test_utils::add_compute_task(tt_two.tm, [&](handler& cgh) { celerity::debug::set_task_name(cgh, "task_a"); - buf_two.get_access(cgh, fixed<1>{{64, 128}}); + buf_two.get_access(cgh, fixed<1>{{64, 128}}); }); test_utils::add_compute_task(tt_three.tm, [&](handler& cgh) { celerity::debug::set_task_name(cgh, "task_a"); - buf_three.get_access(cgh, fixed<1>{{0, 128}}); + buf_three.get_access(cgh, fixed<1>{{0, 128}}); }); test_utils::log_capture log_capture; CHECK_THROWS(divergence_block_chain_testspy::call_check_for_divergence_with_pre_post(div_tests)); - CHECK_THAT(log_capture.get_log(), Catch::Matchers::ContainsSubstring("Divergence detected in task graph at index 0:\n\n")); - CHECK_THAT(log_capture.get_log(), Catch::Matchers::ContainsSubstring("on nodes 2")); - CHECK_THAT(log_capture.get_log(), Catch::Matchers::ContainsSubstring("on nodes 1")); - CHECK_THAT(log_capture.get_log(), Catch::Matchers::ContainsSubstring("on nodes 0")); + CHECK_THAT(log_capture.get_log(), Catch::Matchers::ContainsSubstring("Divergence detected. Task Nr 1 diverges on nodes:")); + CHECK_THAT(log_capture.get_log(), Catch::Matchers::ContainsSubstring("on node 2")); + CHECK_THAT(log_capture.get_log(), Catch::Matchers::ContainsSubstring("on node 1")); + CHECK_THAT(log_capture.get_log(), Catch::Matchers::ContainsSubstring("on node 0")); } diff --git a/test/system/distr_tests.cc b/test/system/distr_tests.cc index 4c10f6321..e7c3b898d 100644 --- a/test/system/distr_tests.cc +++ b/test/system/distr_tests.cc @@ -11,7 +11,7 @@ #include -#include "../divergence_check_test_utils.h" +#include "../divergence_checker_test_utils.h" #include "../log_test_utils.h" namespace celerity { @@ -478,6 +478,10 @@ namespace detail { } TEST_CASE_METHOD(test_utils::runtime_fixture, "Check divergence of different nodes", "[divergence]") { +#if !CELERITY_DIVERGENCE_CHECK + SKIP("Distributed divergence boundary check only enabled when CELERITY_DIVERGENCE_CHECK=ON"); +#endif + env::scoped_test_environment tenv(recording_enabled_env_setting); runtime::init(nullptr, nullptr); @@ -542,13 +546,7 @@ namespace detail { } // create the check text - std::string check_text = "After 10 seconds of waiting nodes "; - - for(unsigned long i = 0; i < n; ++i) { - if(i % 2 == 1) { check_text += std::to_string(i) + ", "; } - } - - check_text += "did not move to the next task. The runtime might be stuck."; + std::string check_text = "After 10 seconds of waiting, node(s) 1 did not move to the next task. The runtime might be stuck."; if(rank == 0) { const auto log = log_capture.get_log(); diff --git a/test/test_utils.h b/test/test_utils.h index e7c7bc1e5..b8f0601aa 100644 --- a/test/test_utils.h +++ b/test/test_utils.h @@ -21,7 +21,7 @@ #include "command_graph.h" #include "device_queue.h" #include "distributed_graph_generator.h" -#include "divergence_block_chain.h" +#include "divergence_checker.h" #include "graph_serializer.h" #include "print_graph.h" #include "range_mapper.h"