From 8d8d0c2d0ab8f21543ff2076e5045c88912b5179 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Thu, 21 Nov 2024 13:29:28 +0100 Subject: [PATCH 1/4] Disable clang-tidy checks that frequently produce false-positives --- .clang-tidy | 24 +++++++++++++++++++++--- examples/.clang-tidy | 6 ++++++ test/.clang-tidy | 16 +++++++++------- 3 files changed, 36 insertions(+), 10 deletions(-) create mode 100644 examples/.clang-tidy diff --git a/.clang-tidy b/.clang-tidy index fbeec77cc..74506d7ea 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -11,20 +11,29 @@ # `git diff -U0 --no-color | clang-tidy-diff.py -p1 -path path/to/compile_commands.json` # InheritParentConfig: false + # See https://clang.llvm.org/extra/clang-tidy/checks/list.html for a full list of available checks. -# Note: We would like to enable `misc-const-correctness` (introduced with Clang 15), but it currently -# seems to be somewhat buggy still (producing false positives) => revisit at some point. +# We disable a number of checks that cause frequent false positives, including: +# -bugprone-unchecked-optional-access treats std::optional::value as "unchecked" +# -misc-include-cleaner can't deal with "interface headers" such as sycl.hpp +# -misc-unused-using-decls complains about "using sub_group = sycl::sub_group" (API export) +# -readability-convert-member-functions-to-static lints against implementations of fmt::formatter Checks: -*, bugprone-*, -bugprone-easily-swappable-parameters, -bugprone-lambda-function-name, -bugprone-macro-parentheses, + -bugprone-unchecked-optional-access, misc-*, - -misc-const-correctness, + -misc-include-cleaner, + -misc-misplaced-const, -misc-no-recursion, -misc-non-private-member-variables-in-classes, -misc-unused-parameters, + -misc-unused-using-decls, + -misc-use-anonymous-namespace, clang-analyzer-*, + -clang-analyzer-optin.mpi.MPI-Checker, clang-diagnostic-*, cppcoreguidelines-*, -cppcoreguidelines-avoid-c-arrays, @@ -32,19 +41,28 @@ Checks: -*, -cppcoreguidelines-avoid-magic-numbers, -cppcoreguidelines-macro-usage, -cppcoreguidelines-non-private-member-variables-in-classes, + -cppcoreguidelines-pro-bounds-array-to-pointer-decay, + -cppcoreguidelines-pro-bounds-constant-array-index, -cppcoreguidelines-pro-bounds-pointer-arithmetic, + -cppcoreguidelines-pro-type-const-cast, + -cppcoreguidelines-pro-type-reinterpret-cast, mpi-*, performance-*, -performance-enum-size, readability-*, -readability-avoid-const-params-in-decls, + -readability-convert-member-functions-to-static, + -readability-else-after-return, -readability-function-cognitive-complexity, -readability-identifier-length, -readability-magic-numbers, -readability-qualified-auto, + -readability-redundant-inline-specifier, -readability-uppercase-literal-suffix, CheckOptions: + - key: misc-const-correctness.WarnPointersAsValues + value: true # Naming conventions - key: readability-identifier-naming.ClassCase value: lower_case diff --git a/examples/.clang-tidy b/examples/.clang-tidy new file mode 100644 index 000000000..13ff79957 --- /dev/null +++ b/examples/.clang-tidy @@ -0,0 +1,6 @@ +--- +InheritParentConfig: true + +# We disable some checks that cause false-positives in examples. +# -misc-const-correctness: Would suggest `const accessor` in CGFs +Checks: -misc-const-correctness diff --git a/test/.clang-tidy b/test/.clang-tidy index 2d0eeae9c..f54911553 100644 --- a/test/.clang-tidy +++ b/test/.clang-tidy @@ -1,11 +1,13 @@ --- InheritParentConfig: true + # Disable some checks that cause frequent false-positives in tests -# misc-include-cleaner: Does not understand that celerity.h is the entry point for all public Celerity headers -# misc-use-anonymous-namespace: Catch2 emits static functions for TEST_CASE() -# bugprone-chained-comparison: Catch2 uses a comparison operator overloading hack to decompose expressions -Checks: -readability-function-cognitive-complexity, +# -misc-const-correctness: Would suggest `const accessor` in CGFs +# -misc-include-cleaner: Does not understand that celerity.h is the entry point for all public Celerity headers +# -misc-use-anonymous-namespace: Catch2 emits static functions for TEST_CASE() +# -bugprone-chained-comparison: Catch2 uses a comparison operator overloading hack to decompose expressions +Checks: -misc-const-correctness, + -readability-function-cognitive-complexity, + -cppcoreguidelines-avoid-do-while, -cppcoreguidelines-avoid-non-const-global-variables, - -misc-include-cleaner, - -misc-use-anonymous-namespace, - -bugprone-chained-comparison, + -bugprone-chained-comparison From f87b51c84edce05087000c173bd66f448c5b8b26 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Thu, 21 Nov 2024 16:56:50 +0100 Subject: [PATCH 2/4] Improve const-correctness in codebase We can't enable clang-tidy's `misc-const-correctness` everywhere due to its large number of false positives around accessors and guard types. --- src/backend/sycl_backend.cc | 4 +-- src/runtime.cc | 2 +- src/task_manager.cc | 2 +- test/accessor_tests.cc | 14 ++++---- test/affinity_tests.cc | 2 +- test/dag_benchmarks.cc | 2 +- test/debug/pretty_printables.cc | 54 +++++++++++++++---------------- test/device_selection_tests.cc | 6 ++-- test/graph_test_utils.h | 1 - test/print_graph_tests.cc | 2 +- test/range_mapper_tests.cc | 44 ++++++++++++------------- test/receive_arbiter_tests.cc | 2 +- test/region_map_tests.cc | 8 ++--- test/runtime_deprecation_tests.cc | 5 ++- test/runtime_tests.cc | 32 +++++++----------- test/system/distr_tests.cc | 2 +- test/system_benchmarks.cc | 1 - test/task_graph_tests.cc | 16 ++++----- 18 files changed, 94 insertions(+), 105 deletions(-) diff --git a/src/backend/sycl_backend.cc b/src/backend/sycl_backend.cc index aa50e6839..86c28b88b 100644 --- a/src/backend/sycl_backend.cc +++ b/src/backend/sycl_backend.cc @@ -25,7 +25,7 @@ std::optional sycl_event::get_native_execution_time() void delayed_async_event::state::set_value(async_event event) { m_event = std::move(event); - [[maybe_unused]] bool previously_ready = m_is_ready.exchange(true, std::memory_order_release); + [[maybe_unused]] const bool previously_ready = m_is_ready.exchange(true, std::memory_order_release); assert(!previously_ready && "delayed_async_event::state::set_value() called more than once"); } @@ -309,7 +309,7 @@ async_event celerity::detail::sycl_backend::enqueue_device_work( // Note: this mechanism is quite similar in principle to a std::future/promise, // but implementing it with that caused a 50% (!) slowdown in system-level benchmarks - sycl_backend_detail::delayed_async_event::shared_state async_event_state = std::make_shared(); + const auto async_event_state = std::make_shared(); auto async_event = make_async_event(async_event_state); (void)submission_thread->submit([this, device, lane, work, async_event_state] { diff --git a/src/runtime.cc b/src/runtime.cc index bc21510db..1252b2edb 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -245,7 +245,7 @@ namespace detail { { const auto& pin_cfg = m_cfg->get_thread_pinning_config(); - thread_pinning::runtime_configuration thread_pinning_cfg{ + const thread_pinning::runtime_configuration thread_pinning_cfg{ .enabled = pin_cfg.enabled, .num_devices = static_cast(devices.size()), .use_backend_device_submission_threads = m_cfg->should_use_backend_device_submission_threads(), diff --git a/src/task_manager.cc b/src/task_manager.cc index 8e163010a..e3ad28fc1 100644 --- a/src/task_manager.cc +++ b/src/task_manager.cc @@ -228,7 +228,7 @@ namespace detail { const auto previous_horizon = m_current_horizon; m_current_horizon = unique_horizon.get(); - task& new_horizon = reduce_execution_front(std::move(unique_horizon)); + const task& new_horizon = reduce_execution_front(std::move(unique_horizon)); if(previous_horizon != nullptr) { set_epoch_for_new_tasks(previous_horizon); } invoke_callbacks(&new_horizon); diff --git a/test/accessor_tests.cc b/test/accessor_tests.cc index 5aacad8a3..efd59781d 100644 --- a/test/accessor_tests.cc +++ b/test/accessor_tests.cc @@ -192,12 +192,12 @@ namespace detail { accessor acc_read(buf_in, cgh, one_to_one(), read_only); accessor acc_write(buf_out, cgh, one_to_one(), write_only, no_init); cgh.parallel_for(range, [=](celerity::item item) { - size_t i = item[0]; - size_t j = item[1]; + const size_t i = item[0]; + const size_t j = item[1]; if constexpr(Dims == 2) { acc_write[i][j] = acc_read[i][j]; } else { - size_t k = item[2]; + const size_t k = item[2]; acc_write[i][j][k] = acc_read[i][j][k]; } }); @@ -210,12 +210,12 @@ namespace detail { accessor acc_write(buf_out, cgh, one_to_one(), write_only_host_task, no_init); cgh.host_task(range, [=](celerity::partition part) { experimental::for_each_item(range, [&](celerity::item item) { - size_t i = item[0]; - size_t j = item[1]; + const size_t i = item[0]; + const size_t j = item[1]; if constexpr(Dims == 2) { acc_write[i][j] = acc_read[i][j]; } else { - size_t k = item[2]; + const size_t k = item[2]; acc_write[i][j][k] = acc_read[i][j][k]; } }); @@ -299,7 +299,7 @@ namespace detail { queue q; std::optional> buf; - int init = 0; + const int init = 0; SECTION("when the buffer is uninitialized") { buf = buffer{1}; }; SECTION("when the buffer is host-initialized") { buf = buffer{&init, 1}; }; diff --git a/test/affinity_tests.cc b/test/affinity_tests.cc index a6985280f..96556f795 100644 --- a/test/affinity_tests.cc +++ b/test/affinity_tests.cc @@ -52,7 +52,7 @@ class raii_test_runtime { public: raii_test_runtime(int n) { // devices are default constructible, and we don't care if we use the same more than once - std::vector devices(n); + const std::vector devices(n); celerity::runtime::init(nullptr, nullptr, devices); } ~raii_test_runtime() { celerity::runtime::shutdown(); } diff --git a/test/dag_benchmarks.cc b/test/dag_benchmarks.cc index b29ceb80d..1a1d0d682 100644 --- a/test/dag_benchmarks.cc +++ b/test/dag_benchmarks.cc @@ -24,7 +24,7 @@ TEMPLATE_TEST_CASE_SIG("benchmark intrusive graph dependency handling with N nod // existing nodes -- this is intentional; both cases are relevant in practise BENCHMARK("creating nodes") { - bench_graph_node nodes[N]; + const bench_graph_node nodes[N]; return nodes[N - 1].get_pseudo_critical_path_length(); // trick the compiler }; diff --git a/test/debug/pretty_printables.cc b/test/debug/pretty_printables.cc index 965a1d1be..f22d616f3 100644 --- a/test/debug/pretty_printables.cc +++ b/test/debug/pretty_printables.cc @@ -9,31 +9,31 @@ static const auto epoch_cmd = std::make_unique( celerity::detail::command_id{123}, epoch_task.get(), celerity::detail::epoch_action::none, std::vector()); int main() { - [[maybe_unused]] celerity::detail::task_id tid = 10; - [[maybe_unused]] celerity::detail::buffer_id bid = 11; - [[maybe_unused]] celerity::detail::node_id nid = 12; - [[maybe_unused]] celerity::detail::command_id cid = 13; - [[maybe_unused]] celerity::detail::collective_group_id cgid = 14; - [[maybe_unused]] celerity::detail::reduction_id rid = 15; - [[maybe_unused]] celerity::detail::host_object_id hoid = 16; - [[maybe_unused]] celerity::detail::hydration_id hyid = 17; - [[maybe_unused]] celerity::detail::transfer_id trid{18, 19}; - [[maybe_unused]] celerity::detail::transfer_id reduction_trid{20, 21, 22}; - [[maybe_unused]] celerity::detail::memory_id mid = 23; - [[maybe_unused]] celerity::detail::raw_allocation_id raid = 24; - [[maybe_unused]] celerity::detail::device_id did = 25; - [[maybe_unused]] celerity::detail::instruction_id iid = 26; - [[maybe_unused]] celerity::detail::allocation_id aid(27, 28); - [[maybe_unused]] celerity::detail::message_id msgid(34); + [[maybe_unused]] const celerity::detail::task_id tid = 10; + [[maybe_unused]] const celerity::detail::buffer_id bid = 11; + [[maybe_unused]] const celerity::detail::node_id nid = 12; + [[maybe_unused]] const celerity::detail::command_id cid = 13; + [[maybe_unused]] const celerity::detail::collective_group_id cgid = 14; + [[maybe_unused]] const celerity::detail::reduction_id rid = 15; + [[maybe_unused]] const celerity::detail::host_object_id hoid = 16; + [[maybe_unused]] const celerity::detail::hydration_id hyid = 17; + [[maybe_unused]] const celerity::detail::transfer_id trid{18, 19}; + [[maybe_unused]] const celerity::detail::transfer_id reduction_trid{20, 21, 22}; + [[maybe_unused]] const celerity::detail::memory_id mid = 23; + [[maybe_unused]] const celerity::detail::raw_allocation_id raid = 24; + [[maybe_unused]] const celerity::detail::device_id did = 25; + [[maybe_unused]] const celerity::detail::instruction_id iid = 26; + [[maybe_unused]] const celerity::detail::allocation_id aid(27, 28); + [[maybe_unused]] const celerity::detail::message_id msgid(34); - [[maybe_unused]] celerity::id<3> id(1, 2, 3); - [[maybe_unused]] celerity::range<3> range(1, 2, 3); - [[maybe_unused]] celerity::subrange<3> subrange(celerity::id(1, 2, 3), celerity::range(4, 5, 6)); - [[maybe_unused]] celerity::chunk<3> chunk(celerity::id(1, 2, 3), celerity::range(4, 5, 6), celerity::range(7, 8, 9)); - [[maybe_unused]] celerity::nd_range<3> nd_range(celerity::range(2, 4, 6), celerity::range(1, 2, 3), celerity::id(7, 8, 9)); - [[maybe_unused]] celerity::detail::box<3> box(celerity::id(1, 2, 3), celerity::id(4, 5, 6)); - [[maybe_unused]] celerity::detail::region<3> empty_region; - [[maybe_unused]] celerity::detail::region<3> region({ + [[maybe_unused]] const celerity::id<3> id(1, 2, 3); + [[maybe_unused]] const celerity::range<3> range(1, 2, 3); + [[maybe_unused]] const celerity::subrange<3> subrange(celerity::id(1, 2, 3), celerity::range(4, 5, 6)); + [[maybe_unused]] const celerity::chunk<3> chunk(celerity::id(1, 2, 3), celerity::range(4, 5, 6), celerity::range(7, 8, 9)); + [[maybe_unused]] const celerity::nd_range<3> nd_range(celerity::range(2, 4, 6), celerity::range(1, 2, 3), celerity::id(7, 8, 9)); + [[maybe_unused]] const celerity::detail::box<3> box(celerity::id(1, 2, 3), celerity::id(4, 5, 6)); + [[maybe_unused]] const celerity::detail::region<3> empty_region; + [[maybe_unused]] const celerity::detail::region<3> region({ celerity::detail::box(celerity::id(1, 2, 3), celerity::id(4, 5, 6)), celerity::detail::box(celerity::id(11, 2, 3), celerity::id(14, 5, 6)), celerity::detail::box(celerity::id(21, 2, 3), celerity::id(24, 5, 6)), @@ -44,12 +44,12 @@ int main() { region_map.update_box(celerity::detail::box<3>({1, 1, 1}, {3, 3, 3}), 69); region_map.update_box(celerity::detail::box<3>({1, 1, 1}, {2, 2, 2}), 1337); - [[maybe_unused]] celerity::detail::region_map region_map_0d(celerity::range<3>(1, 1, 1), 42); + [[maybe_unused]] const celerity::detail::region_map region_map_0d(celerity::range<3>(1, 1, 1), 42); - [[maybe_unused]] celerity::detail::write_command_state wcs_fresh(epoch_cmd.get()); + [[maybe_unused]] const celerity::detail::write_command_state wcs_fresh(epoch_cmd.get()); [[maybe_unused]] celerity::detail::write_command_state wcs_stale(epoch_cmd.get()); wcs_stale.mark_as_stale(); - [[maybe_unused]] celerity::detail::write_command_state wcs_replicated(epoch_cmd.get(), true /* replicated */); + [[maybe_unused]] const celerity::detail::write_command_state wcs_replicated(epoch_cmd.get(), true /* replicated */); // tell GDB to break here so we can examine locals __builtin_trap(); diff --git a/test/device_selection_tests.cc b/test/device_selection_tests.cc index 398fe710f..4ebd4912f 100644 --- a/test/device_selection_tests.cc +++ b/test/device_selection_tests.cc @@ -457,7 +457,7 @@ TEST_CASE("select_backend picks highest-priority available specialized backend", mock_device(0, platform, type_and_name{sycl::info::device_type::gpu, "gpu0"}), mock_device(1, platform, type_and_name{sycl::info::device_type::gpu, "gpu1"}), }; - mock_backend_enumerator enumerator{{mock_backend_type::generic1, mock_backend_type::generic2, mock_backend_type::specialized2}, + const mock_backend_enumerator enumerator{{mock_backend_type::generic1, mock_backend_type::generic2, mock_backend_type::specialized2}, { {devices.at(0), {mock_backend_type::generic1, mock_backend_type::specialized1, mock_backend_type::specialized2}}, {devices.at(1), {mock_backend_type::generic1, mock_backend_type::specialized1, mock_backend_type::specialized2}}, @@ -479,7 +479,7 @@ TEST_CASE("select_backend picks highest-priority available generic backend if th mock_device(0, platform, type_and_name{sycl::info::device_type::gpu, "gpu0"}), mock_device(1, platform, type_and_name{sycl::info::device_type::gpu, "gpu1"}), }; - mock_backend_enumerator enumerator{ + const mock_backend_enumerator enumerator{ {mock_backend_type::generic1, mock_backend_type::generic2, mock_backend_type::specialized1, mock_backend_type::specialized2}, { {devices.at(0), {mock_backend_type::generic1, mock_backend_type::generic2, mock_backend_type::specialized1}}, @@ -501,7 +501,7 @@ TEST_CASE("select_backend picks a generic backend if no compatible specializatio mock_device(0, platform, type_and_name{sycl::info::device_type::gpu, "gpu0"}), mock_device(1, platform, type_and_name{sycl::info::device_type::gpu, "gpu1"}), }; - mock_backend_enumerator enumerator{{mock_backend_type::generic1, mock_backend_type::generic2}, + const mock_backend_enumerator enumerator{{mock_backend_type::generic1, mock_backend_type::generic2}, { {devices.at(0), {mock_backend_type::generic2, mock_backend_type::specialized1, mock_backend_type::specialized2}}, {devices.at(1), {mock_backend_type::generic2, mock_backend_type::specialized1, mock_backend_type::specialized2}}, diff --git a/test/graph_test_utils.h b/test/graph_test_utils.h index 5137613f2..2175214f0 100644 --- a/test/graph_test_utils.h +++ b/test/graph_test_utils.h @@ -140,7 +140,6 @@ class task_builder { } step master_node_host_task() { - std::deque actions; return step(m_tctx, [](handler& cgh) { cgh.host_task(on_master_node, [] {}); }); } diff --git a/test/print_graph_tests.cc b/test/print_graph_tests.cc index 43e5b3277..a3603a688 100644 --- a/test/print_graph_tests.cc +++ b/test/print_graph_tests.cc @@ -61,7 +61,7 @@ int count_occurences(const std::string& str, const std::string& substr) { } // namespace TEST_CASE("command-graph printing is unchanged", "[print_graph][command-graph]") { - size_t num_nodes = 4; + const size_t num_nodes = 4; cdag_test_context cctx(num_nodes); auto buf_0 = cctx.create_buffer(range(1)); diff --git a/test/range_mapper_tests.cc b/test/range_mapper_tests.cc index edabe6835..0fc71ee7c 100644 --- a/test/range_mapper_tests.cc +++ b/test/range_mapper_tests.cc @@ -85,21 +85,21 @@ subrange rm_result_to_subrange(const region& r) { TEST_CASE("range mapper results are clamped to buffer range", "[range-mapper]") { const auto rmfn = [](chunk<3>) { return subrange<3>{{0, 100, 127}, {256, 64, 32}}; }; - range_mapper rm{rmfn, range<3>{128, 128, 128}}; + const range_mapper rm{rmfn, range<3>{128, 128, 128}}; auto sr = rm_result_to_subrange(rm.map_3(chunk<3>{})); REQUIRE(sr.offset == id<3>{0, 100, 127}); REQUIRE(sr.range == range<3>{128, 28, 1}); } TEST_CASE("one_to_one built-in range mapper behaves as expected", "[range-mapper]") { - range_mapper rm{acc::one_to_one{}, range<2>{128, 128}}; + const range_mapper rm{acc::one_to_one{}, range<2>{128, 128}}; auto sr = rm_result_to_subrange(rm.map_2(chunk<2>{{64, 32}, {32, 4}, {128, 128}})); REQUIRE(sr.offset == id<2>{64, 32}); REQUIRE(sr.range == range<2>{32, 4}); } TEST_CASE("fixed built-in range mapper behaves as expected", "[range-mapper]") { - range_mapper rm{acc::fixed<1>({{3}, {97}}), range<1>{128}}; + const range_mapper rm{acc::fixed<1>({{3}, {97}}), range<1>{128}}; auto sr = rm_result_to_subrange(rm.map_1(chunk<2>{{64, 32}, {32, 4}, {128, 128}})); REQUIRE(sr.offset == id<1>{3}); REQUIRE(sr.range == range<1>{97}); @@ -107,19 +107,19 @@ TEST_CASE("fixed built-in range mapper behaves as expected", "[range-mapper]") { TEST_CASE("slice built-in range mapper behaves as expected", "[range-mapper]") { { - range_mapper rm{acc::slice<3>(0), range<3>{128, 128, 128}}; + const range_mapper rm{acc::slice<3>(0), range<3>{128, 128, 128}}; auto sr = rm_result_to_subrange(rm.map_3(chunk<3>{{32, 32, 32}, {32, 32, 32}, {128, 128, 128}})); REQUIRE(sr.offset == id<3>{0, 32, 32}); REQUIRE(sr.range == range<3>{128, 32, 32}); } { - range_mapper rm{acc::slice<3>(1), range<3>{128, 128, 128}}; + const range_mapper rm{acc::slice<3>(1), range<3>{128, 128, 128}}; auto sr = rm_result_to_subrange(rm.map_3(chunk<3>{{32, 32, 32}, {32, 32, 32}, {128, 128, 128}})); REQUIRE(sr.offset == id<3>{32, 0, 32}); REQUIRE(sr.range == range<3>{32, 128, 32}); } { - range_mapper rm{acc::slice<3>(2), range<3>{128, 128, 128}}; + const range_mapper rm{acc::slice<3>(2), range<3>{128, 128, 128}}; auto sr = rm_result_to_subrange(rm.map_3(chunk<3>{{32, 32, 32}, {32, 32, 32}, {128, 128, 128}})); REQUIRE(sr.offset == id<3>{32, 32, 0}); REQUIRE(sr.range == range<3>{32, 32, 128}); @@ -128,19 +128,19 @@ TEST_CASE("slice built-in range mapper behaves as expected", "[range-mapper]") { TEST_CASE("all built-in range mapper behaves as expected", "[range-mapper]") { { - range_mapper rm{acc::all{}, range<1>{128}}; + const range_mapper rm{acc::all{}, range<1>{128}}; auto sr = rm_result_to_subrange(rm.map_1(chunk<1>{})); REQUIRE(sr.offset == id<1>{0}); REQUIRE(sr.range == range<1>{128}); } { - range_mapper rm{acc::all{}, range<2>{128, 64}}; + const range_mapper rm{acc::all{}, range<2>{128, 64}}; auto sr = rm_result_to_subrange(rm.map_2(chunk<1>{})); REQUIRE(sr.offset == id<2>{0, 0}); REQUIRE(sr.range == range<2>{128, 64}); } { - range_mapper rm{acc::all{}, range<3>{128, 64, 32}}; + const range_mapper rm{acc::all{}, range<3>{128, 64, 32}}; auto sr = rm_result_to_subrange(rm.map_3(chunk<1>{})); REQUIRE(sr.offset == id<3>{0, 0, 0}); REQUIRE(sr.range == range<3>{128, 64, 32}); @@ -150,17 +150,17 @@ TEST_CASE("all built-in range mapper behaves as expected", "[range-mapper]") { TEST_CASE("neighborhood built-in range mapper behaves as expected", "[range-mapper]") { SECTION("with shape = bounding_box") { { - range_mapper rm{acc::neighborhood({10}), range<1>{128}}; + const range_mapper rm{acc::neighborhood({10}), range<1>{128}}; const auto r = rm.map_1(chunk<1>{{15}, {10}, {128}}); CHECK(r == box<1>(5, 35)); } { - range_mapper rm{acc::neighborhood({10, 10}), range<2>{128, 128}}; + const range_mapper rm{acc::neighborhood({10, 10}), range<2>{128, 128}}; const auto r = rm.map_2(chunk<2>{{5, 100}, {10, 20}, {128, 128}}); CHECK(r == box<2>({0, 90}, {25, 128})); } { - range_mapper rm{acc::neighborhood({3, 4, 5}), range<3>{128, 128, 128}}; + const range_mapper rm{acc::neighborhood({3, 4, 5}), range<3>{128, 128, 128}}; const auto r = rm.map_3(chunk<3>{{3, 4, 5}, {1, 1, 1}, {128, 128, 128}}); CHECK(r == box<3>({0, 0, 0}, {7, 9, 11})); } @@ -168,17 +168,17 @@ TEST_CASE("neighborhood built-in range mapper behaves as expected", "[range-mapp SECTION("with shape = along_axes") { { - range_mapper rm{acc::neighborhood({10}, neighborhood_shape::along_axes), range<1>{128}}; + const range_mapper rm{acc::neighborhood({10}, neighborhood_shape::along_axes), range<1>{128}}; const auto r = rm.map_1(chunk<1>{{15}, {10}, {128}}); CHECK(r == box<1>({5}, {35})); } { - range_mapper rm{acc::neighborhood({10, 10}, neighborhood_shape::along_axes), range<2>{128, 128}}; + const range_mapper rm{acc::neighborhood({10, 10}, neighborhood_shape::along_axes), range<2>{128, 128}}; const auto r = rm.map_2(chunk<2>{{5, 100}, {10, 20}, {128, 128}}); CHECK(r == region<2>({box<2>({0, 100}, {25, 120}), box<2>({5, 90}, {15, 128})})); } { - range_mapper rm{acc::neighborhood({3, 4, 5}, neighborhood_shape::along_axes), range<3>{128, 128, 128}}; + const range_mapper rm{acc::neighborhood({3, 4, 5}, neighborhood_shape::along_axes), range<3>{128, 128, 128}}; const auto r = rm.map_3(chunk<3>{{3, 4, 5}, {1, 1, 1}, {128, 128, 128}}); CHECK(r == region<3>({box<3>({0, 4, 5}, {7, 5, 6}), box<3>({3, 0, 5}, {4, 9, 6}), box<3>({3, 4, 0}, {4, 5, 11})})); } @@ -187,43 +187,43 @@ TEST_CASE("neighborhood built-in range mapper behaves as expected", "[range-mapp TEST_CASE("even_split built-in range mapper behaves as expected", "[range-mapper]") { { - range_mapper rm{even_split<3>(), range<3>{128, 345, 678}}; + const range_mapper rm{even_split<3>(), range<3>{128, 345, 678}}; auto sr = rm_result_to_subrange(rm.map_3(chunk<1>{{0}, {1}, {8}})); REQUIRE(sr.offset == id<3>{0, 0, 0}); REQUIRE(sr.range == range<3>{16, 345, 678}); } { - range_mapper rm{even_split<3>(), range<3>{128, 345, 678}}; + const range_mapper rm{even_split<3>(), range<3>{128, 345, 678}}; auto sr = rm_result_to_subrange(rm.map_3(chunk<1>{{4}, {2}, {8}})); REQUIRE(sr.offset == id<3>{64, 0, 0}); REQUIRE(sr.range == range<3>{32, 345, 678}); } { - range_mapper rm{even_split<3>(), range<3>{131, 992, 613}}; + const range_mapper rm{even_split<3>(), range<3>{131, 992, 613}}; auto sr = rm_result_to_subrange(rm.map_3(chunk<1>{{5}, {2}, {7}})); REQUIRE(sr.offset == id<3>{95, 0, 0}); REQUIRE(sr.range == range<3>{36, 992, 613}); } { - range_mapper rm{even_split<3>(range<3>(10, 1, 1)), range<3>{128, 345, 678}}; + const range_mapper rm{even_split<3>(range<3>(10, 1, 1)), range<3>{128, 345, 678}}; auto sr = rm_result_to_subrange(rm.map_3(chunk<1>{{0}, {1}, {8}})); REQUIRE(sr.offset == id<3>{0, 0, 0}); REQUIRE(sr.range == range<3>{20, 345, 678}); } { - range_mapper rm{even_split<3>(range<3>(10, 1, 1)), range<3>{131, 992, 613}}; + const range_mapper rm{even_split<3>(range<3>(10, 1, 1)), range<3>{131, 992, 613}}; auto sr = rm_result_to_subrange(rm.map_3(chunk<1>{{0}, {1}, {7}})); REQUIRE(sr.offset == id<3>{0, 0, 0}); REQUIRE(sr.range == range<3>{20, 992, 613}); } { - range_mapper rm{even_split<3>(range<3>(10, 1, 1)), range<3>{131, 992, 613}}; + const range_mapper rm{even_split<3>(range<3>(10, 1, 1)), range<3>{131, 992, 613}}; auto sr = rm_result_to_subrange(rm.map_3(chunk<1>{{5}, {2}, {7}})); REQUIRE(sr.offset == id<3>{100, 0, 0}); REQUIRE(sr.range == range<3>{31, 992, 613}); } { - range_mapper rm{even_split<3>(range<3>(10, 1, 1)), range<3>{236, 992, 613}}; + const range_mapper rm{even_split<3>(range<3>(10, 1, 1)), range<3>{236, 992, 613}}; auto sr = rm_result_to_subrange(rm.map_3(chunk<1>{{6}, {1}, {7}})); REQUIRE(sr.offset == id<3>{200, 0, 0}); REQUIRE(sr.range == range<3>{36, 992, 613}); diff --git a/test/receive_arbiter_tests.cc b/test/receive_arbiter_tests.cc index 2ceb10d57..a36da5713 100644 --- a/test/receive_arbiter_tests.cc +++ b/test/receive_arbiter_tests.cc @@ -40,7 +40,7 @@ class mock_recv_communicator : public communicator { [[nodiscard]] async_event receive_payload(const node_id from, const message_id msgid, void* const base, const stride& stride) override { const auto key = std::pair(from, msgid); REQUIRE(m_pending_recvs.count(key) == 0); - completion_flag flag = std::make_shared(false); + const completion_flag flag = std::make_shared(false); m_pending_recvs.emplace(key, std::tuple(base, stride, flag)); return make_async_event(flag); } diff --git a/test/region_map_tests.cc b/test/region_map_tests.cc index 11fc02ddd..cad207d8f 100644 --- a/test/region_map_tests.cc +++ b/test/region_map_tests.cc @@ -184,7 +184,7 @@ void draw(const region_map_impl& rm) { TEST_CASE("region_map::try_merge does not attempt to merge intermediate results that no longer exist", "[region_map]") { region_map_impl rm(box<2>::full_range({99, 99}), -1); - std::vector, int>> entries = { + const std::vector, int>> entries = { // These first three entries will be merged {{{0, 0}, {33, 66}}, 1}, {{{33, 0}, {66, 66}}, 1}, @@ -235,7 +235,7 @@ TEST_CASE("region_map can be moved", "[region_map]") { TEST_CASE("region_map handles basic operations in 0D", "[region_map]") { const int default_value = -1; - region_map_impl rm{{}, default_value}; + const region_map_impl rm{{}, default_value}; SECTION("query default value") { const auto results = rm.get_region_values({0, 1}); @@ -791,12 +791,12 @@ TEST_CASE("inserting consecutive boxes results in zero overlap", "[region_map][p TEST_CASE("query regions are clamped from both sides in region maps with non-zero offset", "[region_map]") { const auto region_box = box<3>({1, 2, 3}, {7, 9, 11}); - region_map rm(region_box, 42); + const region_map rm(region_box, 42); CHECK(rm.get_region_values(box<3>::full_range({20, 19, 18})) == std::vector{std::pair{region_box, 42}}); } TEMPLATE_TEST_CASE_SIG("get_region_values() returns no boxes", "[region_map]", ((int Dims), Dims), 0, 1, 2, 3) { - region_map rm(range_cast<3>(test_utils::truncate_range({2, 3, 4})), -1); + const region_map rm(range_cast<3>(test_utils::truncate_range({2, 3, 4})), -1); CHECK(rm.get_region_values(box<3>()).empty()); CHECK(rm.get_region_values(region<3>()).empty()); } diff --git a/test/runtime_deprecation_tests.cc b/test/runtime_deprecation_tests.cc index 901349b40..80ce5361f 100644 --- a/test/runtime_deprecation_tests.cc +++ b/test/runtime_deprecation_tests.cc @@ -31,8 +31,7 @@ namespace detail { } TEST_CASE_METHOD(test_utils::runtime_fixture, "an explicit device can be provided to distr_queue", "[deprecated][distr_queue]") { - sycl::default_selector selector; - sycl::device device{selector}; + sycl::device device; SECTION("before the runtime is initialized") { REQUIRE_FALSE(runtime::has_instance()); @@ -83,7 +82,7 @@ namespace detail { experimental::buffer_snapshot full_snapshot = experimental::fence(q, buf).get(); experimental::buffer_snapshot partial_snapshot = experimental::fence(q, buf, subrange<1>(8, 8)).get(); - int ho_value = experimental::fence(q, ho).get(); + const int ho_value = experimental::fence(q, ho).get(); CHECK(full_snapshot.get_range() == range<1>(16)); CHECK(std::equal(init.begin(), init.end(), full_snapshot.get_data())); diff --git a/test/runtime_tests.cc b/test/runtime_tests.cc index eb729227c..968b3f948 100644 --- a/test/runtime_tests.cc +++ b/test/runtime_tests.cc @@ -83,8 +83,8 @@ namespace detail { task_manager tm{1, tdag, nullptr, &delegate}; tm.generate_epoch_task(epoch_action::init); CHECK(delegate.counter == 1); - range<2> gs = {1, 1}; - id<2> go = {}; + const range<2> gs = {1, 1}; + const id<2> go = {}; tm.submit_command_group([=](handler& cgh) { cgh.parallel_for(gs, go, [](auto) {}); }); CHECK(delegate.counter == 2); tm.submit_command_group([](handler& cgh) { cgh.host_task(on_master_node, [] {}); }); @@ -126,13 +126,13 @@ namespace detail { std::vector accs; accs.push_back(buffer_access{0, access_mode::read, std::make_unique>>(subrange<2>{{3, 0}, {10, 20}}, range<2>{30, 30})}); accs.push_back(buffer_access{0, access_mode::read, std::make_unique>>(subrange<2>{{10, 0}, {7, 20}}, range<2>{30, 30})}); - buffer_access_map bam{std::move(accs), task_geometry{2, {100, 100, 1}, {}, {}}}; + const buffer_access_map bam{std::move(accs), task_geometry{2, {100, 100, 1}, {}, {}}}; const auto req = bam.compute_consumed_region(0, subrange<3>({0, 0, 0}, {100, 100, 1})); CHECK(req == box(subrange<3>({3, 0, 0}, {14, 20, 1}))); } TEST_CASE("tasks gracefully handle get_requirements() calls for buffers they don't access", "[task]") { - buffer_access_map bam; + const buffer_access_map bam; const auto req = bam.compute_consumed_region(0, subrange<3>({0, 0, 0}, {100, 1, 1})); CHECK(req == box<3>()); } @@ -836,49 +836,41 @@ namespace detail { const std::string error_string{"Failed to parse/validate environment variables."}; { - std::unordered_map invalid_test_env_var{{"CELERITY_LOG_LEVEL", "a"}}; - const auto test_env = env::scoped_test_environment(invalid_test_env_var); + env::scoped_test_environment ste("CELERITY_LOG_LEVEL", "a"); CHECK_THROWS_WITH((celerity::detail::config(nullptr, nullptr)), error_string); } { - std::unordered_map invalid_test_env_var{{"CELERITY_GRAPH_PRINT_MAX_VERTS", "a"}}; - const auto test_env = env::scoped_test_environment(invalid_test_env_var); + env::scoped_test_environment ste("CELERITY_GRAPH_PRINT_MAX_VERTS", "a"); CHECK_THROWS_WITH((celerity::detail::config(nullptr, nullptr)), error_string); } { - std::unordered_map invalid_test_env_var{{"CELERITY_DEVICES", "a"}}; - const auto test_env = env::scoped_test_environment(invalid_test_env_var); + env::scoped_test_environment ste("CELERITY_DEVICES", "a"); CHECK_THROWS_WITH((celerity::detail::config(nullptr, nullptr)), error_string); } { - std::unordered_map invalid_test_env_var{{"CELERITY_DRY_RUN_NODES", "a"}}; - const auto test_env = env::scoped_test_environment(invalid_test_env_var); + env::scoped_test_environment ste("CELERITY_DRY_RUN_NODES", "a"); CHECK_THROWS_WITH((celerity::detail::config(nullptr, nullptr)), error_string); } { - std::unordered_map invalid_test_env_var{{"CELERITY_PROFILE_KERNEL", "a"}}; - const auto test_env = env::scoped_test_environment(invalid_test_env_var); + env::scoped_test_environment ste("CELERITY_PROFILE_KERNEL", "a"); CHECK_THROWS_WITH((celerity::detail::config(nullptr, nullptr)), error_string); } { - std::unordered_map invalid_test_env_var{{"CELERITY_FORCE_WG", "a"}}; - const auto test_env = env::scoped_test_environment(invalid_test_env_var); + env::scoped_test_environment ste("CELERITY_FORCE_WG", "a"); CHECK_THROWS_WITH((celerity::detail::config(nullptr, nullptr)), error_string); } { - std::unordered_map invalid_test_env_var{{"CELERITY_PROFILE_OCL", "a"}}; - const auto test_env = env::scoped_test_environment(invalid_test_env_var); + env::scoped_test_environment ste("CELERITY_PROFILE_OCL", "a"); CHECK_THROWS_WITH((celerity::detail::config(nullptr, nullptr)), error_string); } { - std::unordered_map invalid_test_env_var{{"CELERITY_TRACY", "foo"}}; - const auto test_env = env::scoped_test_environment(invalid_test_env_var); + env::scoped_test_environment ste("CELERITY_TRACY", "foo"); CHECK_THROWS_WITH((celerity::detail::config(nullptr, nullptr)), error_string); } } diff --git a/test/system/distr_tests.cc b/test/system/distr_tests.cc index f7a9304c4..47d1c7a7f 100644 --- a/test/system/distr_tests.cc +++ b/test/system/distr_tests.cc @@ -216,7 +216,7 @@ namespace detail { { size_t relative = global_linear_id; for(int nd = 0; nd < Dims; ++nd) { - int d = Dims - 1 - nd; + const int d = Dims - 1 - nd; global_id[d] = relative % global_range[d]; relative /= global_range[d]; } diff --git a/test/system_benchmarks.cc b/test/system_benchmarks.cc index d12304989..aa208e641 100644 --- a/test/system_benchmarks.cc +++ b/test/system_benchmarks.cc @@ -215,7 +215,6 @@ void run_rsim_benchmark(const size_t n_tris, const size_t num_iter) { cgh.parallel_for(kij_size, [=](item<2> item) { float val = 0.f; - float included_items = 0.f; for(size_t i = 0; i < t; ++i) { val += read_rad[{i, item.get_id(0)}] * read_kij[{item.get_id(0), item.get_id(1)}]; } diff --git a/test/task_graph_tests.cc b/test/task_graph_tests.cc index 2a7441f4d..9c404716b 100644 --- a/test/task_graph_tests.cc +++ b/test/task_graph_tests.cc @@ -391,24 +391,24 @@ namespace detail { auto buf_a = tt.mbf.create_buffer(range<1>(128)); auto buf_b = tt.mbf.create_buffer(range<1>(128)); - task_id tid_1 = test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { + const task_id tid_1 = test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf_a.get_access(cgh, fixed<1>({0, 64})); buf_b.get_access(cgh, fixed<1>({0, 128})); }); - task_id tid_2 = + const task_id tid_2 = test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf_a.get_access(cgh, fixed<1>({64, 64})); }); - [[maybe_unused]] task_id tid_3 = + [[maybe_unused]] const task_id tid_3 = test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf_a.get_access(cgh, fixed<1>({32, 64})); }); - task_id tid_4 = + const task_id tid_4 = test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf_a.get_access(cgh, fixed<1>({32, 64})); }); const auto horizon = task_manager_testspy::get_current_horizon(tt.tm); CHECK(test_utils::get_num_live_horizons(tt.tdag) == 1); CHECK(horizon != nullptr); - [[maybe_unused]] task_id tid_6 = + [[maybe_unused]] const task_id tid_6 = test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf_b.get_access(cgh, fixed<1>({0, 128})); }); - [[maybe_unused]] task_id tid_7 = + [[maybe_unused]] const task_id tid_7 = test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf_b.get_access(cgh, fixed<1>({0, 128})); }); { @@ -419,7 +419,7 @@ namespace detail { CHECK(region_map_a.get_region_values(make_region(32, 96)).front().second == test_utils::get_task(tt.tdag, tid_4)); } - [[maybe_unused]] task_id tid_8 = + [[maybe_unused]] const task_id tid_8 = test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf_b.get_access(cgh, fixed<1>({0, 128})); }); CHECK(test_utils::get_num_live_horizons(tt.tdag) == 2); @@ -430,7 +430,7 @@ namespace detail { CHECK(region_map_a.get_region_values(make_region(0, 128)).front().second == horizon); } - task_id tid_9 = + const task_id tid_9 = test_utils::add_host_task(tt.tm, on_master_node, [&](handler& cgh) { buf_a.get_access(cgh, fixed<1>({64, 64})); }); { From ae9521008e0bd79a4c50c5ccc1da00e9bfbc3c9d Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Thu, 21 Nov 2024 17:33:23 +0100 Subject: [PATCH 3/4] Add missing includes and consistently order them We can't add the misc-include-cleaner lint because it causes too many false positives with "interface headers" such as sycl.hpp. --- include/access_modes.h | 1 + include/accessor.h | 17 +++++-- include/affinity.h | 1 + include/async_event.h | 2 + include/backend/backend.h | 3 +- include/backend/sycl_backend.h | 14 +++++- include/buffer.h | 8 ++-- include/celerity.h | 1 + include/cgf_diagnostics.h | 3 +- include/closure_hydrator.h | 16 +++++-- include/command_graph_generator.h | 22 +++++---- include/communicator.h | 8 ++++ include/config.h | 6 ++- include/debug.h | 7 ++- include/dense_map.h | 1 + include/device_selection.h | 17 +++++-- include/distr_queue.h | 18 +++++-- include/double_buffered_queue.h | 3 +- include/dry_run_executor.h | 5 ++ include/executor.h | 2 + include/fence.h | 11 +++-- include/grid.h | 8 +++- include/handler.h | 30 ++++++++---- include/hint.h | 2 + include/host_object.h | 8 +++- include/host_utils.h | 4 ++ include/instruction_graph.h | 5 ++ include/instruction_graph_generator.h | 4 ++ include/intrusive_graph.h | 2 + include/item.h | 8 ++++ include/launcher.h | 1 + include/live_executor.h | 4 ++ include/local_communicator.h | 7 +++ include/log.h | 4 +- include/mpi_communicator.h | 6 +++ include/named_threads.h | 1 + include/nd_memory.h | 2 + include/out_of_order_engine.h | 2 + include/partition.h | 3 ++ include/print_graph.h | 6 ++- include/print_utils.h | 4 ++ include/queue.h | 11 +++-- include/range_mapper.h | 10 ++-- include/ranges.h | 8 +++- include/receive_arbiter.h | 8 ++++ include/recorders.h | 15 ++++++ include/reduction.h | 2 + include/region_map.h | 10 ++-- include/runtime.h | 7 +-- include/split.h | 4 +- include/sycl_wrappers.h | 3 +- include/system_info.h | 2 + include/task.h | 18 ++++--- include/task_manager.h | 11 +++-- include/thread_queue.h | 8 ++++ include/tracy.h | 1 + include/types.h | 1 + include/utils.h | 7 ++- include/workaround.h | 1 + src/affinity.cc | 2 + src/backend/sycl_backend.cc | 22 +++++++++ src/backend/sycl_cuda_backend.cc | 18 ++++++- src/backend/sycl_generic_backend.cc | 11 +++++ src/config.cc | 14 ++++-- src/dry_run_executor.cc | 1 + src/grid.cc | 10 ++++ src/instruction_graph_generator.cc | 27 ++++++++++- src/live_executor.cc | 1 + src/mpi_communicator.cc | 13 +++++ src/out_of_order_engine.cc | 2 + src/platform_specific/affinity.unix.cc | 7 ++- src/platform_specific/affinity.win.cc | 8 +++- src/platform_specific/named_threads.unix.cc | 4 ++ src/platform_specific/named_threads.win.cc | 1 + src/print_graph.cc | 20 ++++++-- src/receive_arbiter.cc | 17 ++++++- src/recorders.cc | 13 ++++- src/runtime.cc | 53 ++++++++++++++------- src/split.cc | 8 +++- src/task.cc | 12 +++-- src/task_manager.cc | 14 ++++-- src/utils.cc | 15 ++++++ 82 files changed, 568 insertions(+), 129 deletions(-) diff --git a/include/access_modes.h b/include/access_modes.h index 7aa3ab0a3..858ed2439 100644 --- a/include/access_modes.h +++ b/include/access_modes.h @@ -4,6 +4,7 @@ #include + namespace celerity::detail::access { constexpr std::array all_modes = {sycl::access::mode::atomic, sycl::access::mode::discard_read_write, sycl::access::mode::discard_write, diff --git a/include/accessor.h b/include/accessor.h index 12a2b55dc..8e5cdaeb4 100644 --- a/include/accessor.h +++ b/include/accessor.h @@ -1,16 +1,25 @@ #pragma once -#include - -#include - #include "access_modes.h" #include "buffer.h" #include "cgf_diagnostics.h" #include "closure_hydrator.h" #include "handler.h" +#include "range_mapper.h" +#include "ranges.h" #include "sycl_wrappers.h" +#include "types.h" #include "version.h" +#include "workaround.h" + +#include +#include +#include +#include +#include + +#include + namespace celerity { diff --git a/include/affinity.h b/include/affinity.h index 41ba185bc..91bb693b9 100644 --- a/include/affinity.h +++ b/include/affinity.h @@ -5,6 +5,7 @@ #include #include + // The goal of this thread pinning mechanism, when enabled, is to ensure that threads which benefit from fast communication // are pinned to cores that are close to each other in terms of cache hierarchy. // It currently accomplishes this by pinning threads to cores in a round-robin fashion according to their order in the `thread_type` enum. diff --git a/include/async_event.h b/include/async_event.h index ba02e5ef5..1654356d8 100644 --- a/include/async_event.h +++ b/include/async_event.h @@ -5,6 +5,8 @@ #include #include #include +#include + namespace celerity::detail { diff --git a/include/backend/backend.h b/include/backend/backend.h index a59369e86..dba254822 100644 --- a/include/backend/backend.h +++ b/include/backend/backend.h @@ -2,13 +2,14 @@ #include "async_event.h" #include "closure_hydrator.h" +#include "grid.h" #include "launcher.h" #include "nd_memory.h" #include "types.h" +#include #include -#include namespace celerity::detail { diff --git a/include/backend/sycl_backend.h b/include/backend/sycl_backend.h index 96dcbf1e4..711c73e64 100644 --- a/include/backend/sycl_backend.h +++ b/include/backend/sycl_backend.h @@ -1,15 +1,25 @@ #pragma once #include "async_event.h" - #include "backend/backend.h" +#include "closure_hydrator.h" +#include "grid.h" +#include "launcher.h" +#include "nd_memory.h" +#include "types.h" #include +#include +#include +#include #include #include +#include +#include #include -#include +#include + namespace celerity::detail::sycl_backend_detail { diff --git a/include/buffer.h b/include/buffer.h index cff02c986..a46273d87 100644 --- a/include/buffer.h +++ b/include/buffer.h @@ -1,14 +1,14 @@ #pragma once -#include - -#include - #include "ranges.h" #include "runtime.h" #include "sycl_wrappers.h" #include "tracy.h" +#include + +#include + namespace celerity { diff --git a/include/celerity.h b/include/celerity.h index 1424724a7..659f92362 100644 --- a/include/celerity.h +++ b/include/celerity.h @@ -11,6 +11,7 @@ #include "side_effect.h" #include "version.h" + namespace celerity { namespace runtime { diff --git a/include/cgf_diagnostics.h b/include/cgf_diagnostics.h index 0bb45b001..c8ae50fc7 100644 --- a/include/cgf_diagnostics.h +++ b/include/cgf_diagnostics.h @@ -1,8 +1,9 @@ #pragma once +#include "task.h" + #include -#include "task.h" namespace celerity::detail { diff --git a/include/closure_hydrator.h b/include/closure_hydrator.h index 0d95cb091..d1f731d59 100644 --- a/include/closure_hydrator.h +++ b/include/closure_hydrator.h @@ -1,14 +1,24 @@ #pragma once -#include -#include - #include "grid.h" #include "ranges.h" #include "sycl_wrappers.h" #include "types.h" #include "version.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + namespace celerity::detail { #if CELERITY_ACCESSOR_BOUNDARY_CHECK diff --git a/include/command_graph_generator.h b/include/command_graph_generator.h index 8b461f7f8..d5afa3f37 100644 --- a/include/command_graph_generator.h +++ b/include/command_graph_generator.h @@ -1,24 +1,28 @@ #pragma once +#include "command_graph.h" +#include "grid.h" +#include "intrusive_graph.h" +#include "ranges.h" +#include "recorders.h" +#include "reduction.h" +#include "region_map.h" +#include "types.h" +#include "utils.h" + #include +#include #include #include #include #include +#include +#include #include #include #include #include -#include "command_graph.h" -#include "intrusive_graph.h" -#include "ranges.h" -#include "recorders.h" -#include "reduction.h" -#include "region_map.h" -#include "types.h" -#include "utils.h" - namespace celerity::detail { diff --git a/include/communicator.h b/include/communicator.h index 5a45b123d..771097dd6 100644 --- a/include/communicator.h +++ b/include/communicator.h @@ -2,8 +2,16 @@ #include "async_event.h" #include "pilot.h" +#include "ranges.h" +#include "types.h" #include "utils.h" +#include +#include +#include +#include + + namespace celerity::detail { /// Interface for peer-to-peer and collective communication across nodes to be implemented for MPI or similar system APIs. diff --git a/include/config.h b/include/config.h index 8c3bdabec..0560cc48f 100644 --- a/include/config.h +++ b/include/config.h @@ -1,10 +1,12 @@ #pragma once +#include "affinity.h" +#include "log.h" +#include "types.h" + #include #include -#include "affinity.h" -#include "log.h" namespace celerity { namespace detail { diff --git a/include/debug.h b/include/debug.h index 3970577f3..1dbcafaa5 100644 --- a/include/debug.h +++ b/include/debug.h @@ -1,10 +1,12 @@ -#include - #include "buffer.h" #include "handler.h" +#include + + namespace celerity { namespace debug { + template void set_buffer_name(const celerity::buffer& buff, const std::string& debug_name) { detail::set_buffer_name(buff, debug_name); @@ -16,5 +18,6 @@ namespace debug { } inline void set_task_name(celerity::handler& cgh, const std::string& debug_name) { detail::set_task_name(cgh, debug_name); } + } // namespace debug } // namespace celerity diff --git a/include/dense_map.h b/include/dense_map.h index 2cd66f22b..7d88f6b4c 100644 --- a/include/dense_map.h +++ b/include/dense_map.h @@ -5,6 +5,7 @@ #include #include + namespace celerity::detail { /// Like a simple std::unordered_map, but implemented by indexing into a vector with the integral key type. diff --git a/include/device_selection.h b/include/device_selection.h index f8c1eee6c..8b099d282 100644 --- a/include/device_selection.h +++ b/include/device_selection.h @@ -1,17 +1,26 @@ #pragma once +#include "config.h" +#include "log.h" +#include "types.h" +#include "utils.h" + +#include +#include +#include #include +#include #include #include +#include +#include #include #include -#include "config.h" -#include "log.h" -#include "utils.h" - +#include #include + namespace celerity::detail { // TODO these are required by distr_queue.h, but we don't want to pull all include dependencies of the pick_devices implementation into user code! diff --git a/include/distr_queue.h b/include/distr_queue.h index 2f77eabd6..6487e73c0 100644 --- a/include/distr_queue.h +++ b/include/distr_queue.h @@ -1,11 +1,23 @@ #pragma once -#include -#include - +#include "buffer.h" +#include "device_selection.h" #include "fence.h" +#include "host_object.h" +#include "ranges.h" #include "runtime.h" #include "tracy.h" +#include "types.h" + +#include +#include +#include +#include +#include + +#include +#include + namespace celerity { diff --git a/include/double_buffered_queue.h b/include/double_buffered_queue.h index 231328251..07bd48458 100644 --- a/include/double_buffered_queue.h +++ b/include/double_buffered_queue.h @@ -1,11 +1,12 @@ #pragma once +#include "system_info.h" + #include #include #include #include -#include "system_info.h" namespace celerity::detail { diff --git a/include/dry_run_executor.h b/include/dry_run_executor.h index 70702163d..85aa27e64 100644 --- a/include/dry_run_executor.h +++ b/include/dry_run_executor.h @@ -2,9 +2,14 @@ #include "double_buffered_queue.h" #include "executor.h" +#include "types.h" +#include #include +#include #include +#include + namespace celerity::detail { diff --git a/include/executor.h b/include/executor.h index 5afffc99d..1c39818cb 100644 --- a/include/executor.h +++ b/include/executor.h @@ -3,6 +3,8 @@ #include "types.h" #include +#include + namespace celerity::detail { diff --git a/include/fence.h b/include/fence.h index 5ae900df1..bccc77e23 100644 --- a/include/fence.h +++ b/include/fence.h @@ -1,10 +1,5 @@ #pragma once -#include -#include -#include -#include - #include "buffer.h" #include "host_object.h" #include "range_mapper.h" @@ -14,6 +9,12 @@ #include "task_manager.h" #include "tracy.h" +#include +#include +#include +#include + + namespace celerity::detail { template diff --git a/include/grid.h b/include/grid.h index aecdf4247..55e716ec4 100644 --- a/include/grid.h +++ b/include/grid.h @@ -1,14 +1,18 @@ #pragma once +#include "ranges.h" +#include "workaround.h" + #include +#include +#include #include #include #include +#include #include -#include "ranges.h" -#include "workaround.h" namespace celerity::detail { diff --git a/include/handler.h b/include/handler.h index 25cc0e49c..8e61dbd22 100644 --- a/include/handler.h +++ b/include/handler.h @@ -1,26 +1,38 @@ #pragma once -#include -#include -#include -#include -#include - -#include -#include - #include "buffer.h" #include "cgf_diagnostics.h" +#include "communicator.h" +#include "grid.h" +#include "hint.h" #include "item.h" +#include "launcher.h" #include "partition.h" #include "range_mapper.h" #include "ranges.h" +#include "reduction.h" #include "sycl_wrappers.h" #include "task.h" #include "types.h" #include "version.h" #include "workaround.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + namespace celerity { class handler; } diff --git a/include/hint.h b/include/hint.h index 464fa8534..a472b4d53 100644 --- a/include/hint.h +++ b/include/hint.h @@ -1,7 +1,9 @@ #pragma once +#include #include + namespace celerity { class handler; } diff --git a/include/host_object.h b/include/host_object.h index 57f7ad536..c58eb3bc7 100644 --- a/include/host_object.h +++ b/include/host_object.h @@ -1,11 +1,15 @@ #pragma once +#include "runtime.h" +#include "tracy.h" +#include "types.h" + +#include +#include #include #include #include -#include "runtime.h" -#include "tracy.h" namespace celerity::experimental { diff --git a/include/host_utils.h b/include/host_utils.h index aaf74f4ab..fa368469d 100644 --- a/include/host_utils.h +++ b/include/host_utils.h @@ -2,6 +2,10 @@ #include "item.h" #include "partition.h" +#include "ranges.h" + +#include + namespace celerity::experimental { diff --git a/include/instruction_graph.h b/include/instruction_graph.h index 7e185906d..640a5b40b 100644 --- a/include/instruction_graph.h +++ b/include/instruction_graph.h @@ -7,10 +7,15 @@ #include "ranges.h" #include "types.h" #include "version.h" +#include "workaround.h" #include +#include #include #include +#include +#include +#include #include #include diff --git a/include/instruction_graph_generator.h b/include/instruction_graph_generator.h index 4830cb752..297dc511e 100644 --- a/include/instruction_graph_generator.h +++ b/include/instruction_graph_generator.h @@ -4,7 +4,11 @@ #include "ranges.h" #include "types.h" +#include #include +#include +#include +#include namespace celerity::detail { diff --git a/include/intrusive_graph.h b/include/intrusive_graph.h index ab8c74f5e..67025f6b1 100644 --- a/include/intrusive_graph.h +++ b/include/intrusive_graph.h @@ -2,11 +2,13 @@ #include #include +#include #include #include #include + namespace celerity { namespace detail { diff --git a/include/item.h b/include/item.h index 143ac556c..761a5b060 100644 --- a/include/item.h +++ b/include/item.h @@ -1,6 +1,14 @@ #pragma once #include "ranges.h" +#include "sycl_wrappers.h" + +#include +#include +#include + +#include + namespace celerity { diff --git a/include/launcher.h b/include/launcher.h index 995f6d219..a56be96e6 100644 --- a/include/launcher.h +++ b/include/launcher.h @@ -8,6 +8,7 @@ #include + namespace celerity::detail { class communicator; diff --git a/include/live_executor.h b/include/live_executor.h index e322d5201..9e2914069 100644 --- a/include/live_executor.h +++ b/include/live_executor.h @@ -2,11 +2,15 @@ #include "double_buffered_queue.h" #include "executor.h" +#include "types.h" +#include #include #include #include #include +#include + namespace celerity::detail::live_executor_detail { diff --git a/include/local_communicator.h b/include/local_communicator.h index 284bc6723..c64b3753a 100644 --- a/include/local_communicator.h +++ b/include/local_communicator.h @@ -1,8 +1,15 @@ #pragma once +#include "async_event.h" #include "communicator.h" +#include "pilot.h" +#include "types.h" #include "utils.h" +#include +#include +#include + namespace celerity::detail { diff --git a/include/log.h b/include/log.h index 2d2a851c9..6531daa06 100644 --- a/include/log.h +++ b/include/log.h @@ -1,9 +1,9 @@ #pragma once -#include - #include "print_utils.h" // any translation unit that needs logging probably also wants pretty-printing +#include + #define CELERITY_LOG(level, ...) (::spdlog::should_log(level) ? SPDLOG_LOGGER_CALL(::spdlog::default_logger_raw(), level, __VA_ARGS__) : (void)0) diff --git a/include/mpi_communicator.h b/include/mpi_communicator.h index 35dd8031d..99107cba5 100644 --- a/include/mpi_communicator.h +++ b/include/mpi_communicator.h @@ -1,13 +1,19 @@ #pragma once +#include "async_event.h" #include "communicator.h" +#include "pilot.h" +#include "types.h" +#include #include +#include #include #include #include + namespace celerity::detail { /// Constructor tag for mpi_communicator diff --git a/include/named_threads.h b/include/named_threads.h index c214c6f30..e9234db48 100644 --- a/include/named_threads.h +++ b/include/named_threads.h @@ -3,6 +3,7 @@ #include #include + namespace celerity::detail { std::thread::native_handle_type get_current_thread_handle(); diff --git a/include/nd_memory.h b/include/nd_memory.h index 464f3da35..f2d473fcf 100644 --- a/include/nd_memory.h +++ b/include/nd_memory.h @@ -4,6 +4,8 @@ #include "ranges.h" #include "utils.h" +#include +#include #include #include diff --git a/include/out_of_order_engine.h b/include/out_of_order_engine.h index b60862472..b17cb0dc0 100644 --- a/include/out_of_order_engine.h +++ b/include/out_of_order_engine.h @@ -1,9 +1,11 @@ #include "types.h" +#include #include #include #include + namespace celerity::detail::out_of_order_engine_detail { struct engine_impl; } diff --git a/include/partition.h b/include/partition.h index 12d4b3b13..ef8909534 100644 --- a/include/partition.h +++ b/include/partition.h @@ -1,9 +1,12 @@ #pragma once +#include "communicator.h" #include "ranges.h" #include "utils.h" #include "version.h" +#include + #if CELERITY_ENABLE_MPI #include "mpi_communicator.h" // TODO only used for type cast - move that function to .cc file #include diff --git a/include/print_graph.h b/include/print_graph.h index 8c67fe798..39903b961 100644 --- a/include/print_graph.h +++ b/include/print_graph.h @@ -1,9 +1,11 @@ #pragma once -#include +#include "recorders.h" +#include "types.h" + #include +#include -#include "recorders.h" namespace celerity::detail { diff --git a/include/print_utils.h b/include/print_utils.h index 9347ba6ac..b2c25785b 100644 --- a/include/print_utils.h +++ b/include/print_utils.h @@ -9,7 +9,11 @@ #include "types.h" #include "utils.h" +#include #include +#include +#include +#include #include #include diff --git a/include/queue.h b/include/queue.h index a1fb239ce..aecee9d5c 100644 --- a/include/queue.h +++ b/include/queue.h @@ -1,13 +1,18 @@ #pragma once -#include -#include - +#include "buffer.h" +#include "device_selection.h" #include "fence.h" +#include "host_object.h" +#include "ranges.h" #include "runtime.h" #include "tracy.h" #include "types.h" +#include +#include + + namespace celerity::detail { struct barrier_tag {}; } // namespace celerity::detail diff --git a/include/range_mapper.h b/include/range_mapper.h index 673c51131..c4b8bb033 100644 --- a/include/range_mapper.h +++ b/include/range_mapper.h @@ -1,15 +1,17 @@ #pragma once +#include "grid.h" +#include "ranges.h" + +#include +#include +#include #include #include #include #include -#include "grid.h" -#include "ranges.h" -#include "utils.h" - namespace celerity { namespace detail { diff --git a/include/ranges.h b/include/ranges.h index a17957b88..1a1b73f97 100644 --- a/include/ranges.h +++ b/include/ranges.h @@ -1,8 +1,14 @@ #pragma once -#include "sycl_wrappers.h" #include "workaround.h" +#include +#include +#include + +#include + + namespace celerity { // clang-format off diff --git a/include/receive_arbiter.h b/include/receive_arbiter.h index 630ec3d9e..1f354ac2d 100644 --- a/include/receive_arbiter.h +++ b/include/receive_arbiter.h @@ -1,10 +1,18 @@ #pragma once +#include "async_event.h" #include "communicator.h" +#include "grid.h" #include "pilot.h" +#include "types.h" +#include +#include #include +#include #include +#include + namespace celerity::detail::receive_arbiter_detail { diff --git a/include/recorders.h b/include/recorders.h index 59a62d493..5f5f3c547 100644 --- a/include/recorders.h +++ b/include/recorders.h @@ -1,11 +1,26 @@ #pragma once #include "command_graph.h" +#include "grid.h" #include "instruction_graph.h" +#include "intrusive_graph.h" +#include "matchbox.hh" +#include "nd_memory.h" #include "pilot.h" +#include "ranges.h" +#include "sycl_wrappers.h" #include "task.h" +#include "types.h" +#include +#include +#include #include +#include +#include +#include +#include +#include #include #include diff --git a/include/reduction.h b/include/reduction.h index d6e11f640..f638fdec7 100644 --- a/include/reduction.h +++ b/include/reduction.h @@ -2,9 +2,11 @@ #include "types.h" +#include #include #include + namespace celerity::detail { /// Type-erased runtime reduction operation. Used to prepare and complete reductions on host memory in the executor. diff --git a/include/region_map.h b/include/region_map.h index 9457dba75..50dbd338c 100644 --- a/include/region_map.h +++ b/include/region_map.h @@ -1,19 +1,23 @@ #pragma once +#include "grid.h" +#include "ranges.h" +#include "utils.h" + +#include +#include #include #include -#include #include #include #include +#include #include #include #include #include -#include "grid.h" -#include "utils.h" // Some toggles that affect performance (but also change the behavior!) // TODO: Consider making these template arguments instead (inside some config object), and add these: diff --git a/include/runtime.h b/include/runtime.h index ffd33b80d..6e1addd0f 100644 --- a/include/runtime.h +++ b/include/runtime.h @@ -1,8 +1,5 @@ #pragma once -#include -#include - #include "config.h" #include "device_selection.h" #include "executor.h" @@ -12,6 +9,10 @@ #include "task_manager.h" #include "types.h" +#include +#include + + namespace celerity { namespace detail { diff --git a/include/split.h b/include/split.h index 71e7ffc77..cd8a6fd49 100644 --- a/include/split.h +++ b/include/split.h @@ -1,8 +1,10 @@ #pragma once +#include "ranges.h" + +#include #include -#include "ranges.h" namespace celerity::detail { diff --git a/include/sycl_wrappers.h b/include/sycl_wrappers.h index dfa348623..4fe46838b 100644 --- a/include/sycl_wrappers.h +++ b/include/sycl_wrappers.h @@ -1,9 +1,10 @@ #pragma once -#include "workaround.h" +#include #include + namespace celerity { using access_mode = sycl::access::mode; diff --git a/include/system_info.h b/include/system_info.h index c1295a1c5..7d6e74700 100644 --- a/include/system_info.h +++ b/include/system_info.h @@ -4,6 +4,8 @@ #include "types.h" #include +#include + namespace celerity::detail { diff --git a/include/task.h b/include/task.h index 0b3158e6a..767f3d3fb 100644 --- a/include/task.h +++ b/include/task.h @@ -1,12 +1,5 @@ #pragma once -#include -#include -#include -#include -#include -#include - #include "graph.h" #include "grid.h" #include "hint.h" @@ -17,6 +10,17 @@ #include "reduction.h" #include "sycl_wrappers.h" #include "types.h" +#include "utils.h" + +#include +#include +#include +#include +#include +#include +#include +#include + namespace celerity { diff --git a/include/task_manager.h b/include/task_manager.h index 05d99b36c..487791c11 100644 --- a/include/task_manager.h +++ b/include/task_manager.h @@ -1,13 +1,18 @@ #pragma once +#include "intrusive_graph.h" +#include "ranges.h" +#include "region_map.h" +#include "task.h" +#include "types.h" + +#include #include #include #include #include +#include -#include "region_map.h" -#include "task.h" -#include "types.h" namespace celerity { namespace detail { diff --git a/include/thread_queue.h b/include/thread_queue.h index de11b12f1..e8da9550e 100644 --- a/include/thread_queue.h +++ b/include/thread_queue.h @@ -6,12 +6,20 @@ #include "tracy.h" #include "utils.h" +#include #include +#include +#include #include +#include +#include +#include #include #include +#include #include + namespace celerity::detail { /// A single-thread job queue accepting functors and returning events that conditionally forward job results. diff --git a/include/tracy.h b/include/tracy.h index e527a4bb5..af7acb55b 100644 --- a/include/tracy.h +++ b/include/tracy.h @@ -13,6 +13,7 @@ #include #include + namespace celerity::detail::tracy_detail { // This is intentionally not an atomic, as parts of Celerity (= live_executor) expect it not to change after runtime startup. diff --git a/include/types.h b/include/types.h index 058dcce2c..e1192611f 100644 --- a/include/types.h +++ b/include/types.h @@ -6,6 +6,7 @@ #include #include + namespace celerity::detail { /// Like `false`, but dependent on one or more template parameters. Use as the condition of always-failing static assertions in overloads, template diff --git a/include/utils.h b/include/utils.h index a2453a3d2..26b13e56c 100644 --- a/include/utils.h +++ b/include/utils.h @@ -1,7 +1,11 @@ #pragma once +#include "types.h" + #include #include +#include +#include #include #include #include @@ -9,11 +13,10 @@ #include #include #include +#include #include -#include "types.h" - #define CELERITY_DETAIL_UTILS_CAT_2(a, b) a##b #define CELERITY_DETAIL_UTILS_CAT(a, b) CELERITY_DETAIL_UTILS_CAT_2(a, b) diff --git a/include/workaround.h b/include/workaround.h index df9934d2c..ca221f645 100644 --- a/include/workaround.h +++ b/include/workaround.h @@ -6,6 +6,7 @@ #include + #if CELERITY_SYCL_IS_DPCPP #define CELERITY_WORKAROUND_DPCPP 1 #else diff --git a/src/affinity.cc b/src/affinity.cc index 38f917499..10ad78c02 100644 --- a/src/affinity.cc +++ b/src/affinity.cc @@ -8,8 +8,10 @@ #include #include +#include #include + namespace celerity::detail::thread_pinning { std::string thread_type_to_string(const thread_type t_type) { diff --git a/src/backend/sycl_backend.cc b/src/backend/sycl_backend.cc index 86c28b88b..7e855c38a 100644 --- a/src/backend/sycl_backend.cc +++ b/src/backend/sycl_backend.cc @@ -1,17 +1,39 @@ #include "backend/sycl_backend.h" #include "affinity.h" +#include "async_event.h" +#include "backend/backend.h" #include "closure_hydrator.h" #include "dense_map.h" +#include "fmt/format.h" +#include "grid.h" +#include "launcher.h" #include "nd_memory.h" +#include "sycl_wrappers.h" #include "system_info.h" #include "thread_queue.h" +#include "tracy.h" #include "types.h" +#include "utils.h" +#include "workaround.h" +#include #include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include #include +#include + namespace celerity::detail::sycl_backend_detail { diff --git a/src/backend/sycl_cuda_backend.cc b/src/backend/sycl_cuda_backend.cc index 112900bf2..3371225ee 100644 --- a/src/backend/sycl_cuda_backend.cc +++ b/src/backend/sycl_cuda_backend.cc @@ -1,15 +1,29 @@ #include "backend/sycl_backend.h" -#include - +#include "async_event.h" +#include "grid.h" #include "log.h" #include "nd_memory.h" #include "ranges.h" #include "system_info.h" #include "tracy.h" +#include "types.h" #include "utils.h" #include "version.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + #define CELERITY_STRINGIFY2(f) #f #define CELERITY_STRINGIFY(f) CELERITY_STRINGIFY2(f) #define CELERITY_CUDA_CHECK(f, ...) \ diff --git a/src/backend/sycl_generic_backend.cc b/src/backend/sycl_generic_backend.cc index b90a2cca6..56a205aaf 100644 --- a/src/backend/sycl_generic_backend.cc +++ b/src/backend/sycl_generic_backend.cc @@ -1,11 +1,22 @@ #include "backend/sycl_backend.h" +#include "async_event.h" +#include "grid.h" #include "log.h" #include "nd_memory.h" #include "ranges.h" #include "tracy.h" #include "types.h" +#include +#include +#include +#include +#include + +#include + + namespace celerity::detail::sycl_backend_detail { void nd_copy_device_chunked(sycl::queue& queue, const void* const source_base, void* const dest_base, const box<3>& source_box, const box<3>& dest_box, diff --git a/src/config.cc b/src/config.cc index d5509f480..b9563d2e1 100644 --- a/src/config.cc +++ b/src/config.cc @@ -1,15 +1,21 @@ #include "config.h" -#include -#include - #include "affinity.h" #include "log.h" +#include "types.h" +#include "workaround.h" -#include +#include +#include +#include +#include +#include +#include +#include #include + namespace env { template <> diff --git a/src/dry_run_executor.cc b/src/dry_run_executor.cc index 7ca23e224..44582864c 100644 --- a/src/dry_run_executor.cc +++ b/src/dry_run_executor.cc @@ -1,4 +1,5 @@ #include "dry_run_executor.h" + #include "host_object.h" #include "instruction_graph.h" #include "log.h" diff --git a/src/grid.cc b/src/grid.cc index 7427b4073..9bb383b27 100644 --- a/src/grid.cc +++ b/src/grid.cc @@ -1,6 +1,16 @@ #include "grid.h" + +#include "ranges.h" #include "utils.h" +#include +#include +#include +#include +#include +#include + + namespace celerity::detail::grid_detail { // Regions have a storage dimensionality (the `Dims` template parameter of `class region`) and an effective dimensionality that is smaller iff all contained diff --git a/src/instruction_graph_generator.cc b/src/instruction_graph_generator.cc index 8941ddf38..3478a35d5 100644 --- a/src/instruction_graph_generator.cc +++ b/src/instruction_graph_generator.cc @@ -2,11 +2,20 @@ #include "access_modes.h" #include "command_graph.h" +#include "dense_map.h" +#include "fmt/base.h" +#include "fmt/format.h" #include "grid.h" +#include "hint.h" #include "instruction_graph.h" +#include "launcher.h" #include "log.h" -#include "print_utils.h" +#include "matchbox.hh" +#include "nd_memory.h" +#include "pilot.h" +#include "ranges.h" #include "recorders.h" +#include "reduction.h" #include "region_map.h" #include "split.h" #include "system_info.h" @@ -14,11 +23,25 @@ #include "tracy.h" #include "types.h" #include "utils.h" - +#include "workaround.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include +#include +#include #include +#include + namespace celerity::detail::instruction_graph_generator_detail { diff --git a/src/live_executor.cc b/src/live_executor.cc index c76997b81..b72d1fb56 100644 --- a/src/live_executor.cc +++ b/src/live_executor.cc @@ -1,4 +1,5 @@ #include "live_executor.h" + #include "affinity.h" #include "backend/backend.h" #include "closure_hydrator.h" diff --git a/src/mpi_communicator.cc b/src/mpi_communicator.cc index 7602be1fd..ac24dd1ee 100644 --- a/src/mpi_communicator.cc +++ b/src/mpi_communicator.cc @@ -1,12 +1,25 @@ #include "mpi_communicator.h" + +#include "async_event.h" +#include "communicator.h" #include "log.h" +#include "pilot.h" #include "ranges.h" +#include "types.h" +#include +#include #include #include +#include +#include +#include +#include +#include #include + namespace celerity::detail::mpi_detail { /// async_event wrapper around an MPI_Request. diff --git a/src/out_of_order_engine.cc b/src/out_of_order_engine.cc index 1b13adad1..1100b6b3f 100644 --- a/src/out_of_order_engine.cc +++ b/src/out_of_order_engine.cc @@ -1,4 +1,5 @@ #include "out_of_order_engine.h" + #include "dense_map.h" #include "instruction_graph.h" #include "system_info.h" @@ -12,6 +13,7 @@ #include + namespace celerity::detail::out_of_order_engine_detail { using target = out_of_order_engine::target; diff --git a/src/platform_specific/affinity.unix.cc b/src/platform_specific/affinity.unix.cc index 8c084a498..8b46939ba 100644 --- a/src/platform_specific/affinity.unix.cc +++ b/src/platform_specific/affinity.unix.cc @@ -1,6 +1,11 @@ +#include "affinity.h" + +#include "log.h" + #include #include #include +#include #include #include #include @@ -12,8 +17,6 @@ #include #include -#include "affinity.h" -#include "log.h" namespace { diff --git a/src/platform_specific/affinity.win.cc b/src/platform_specific/affinity.win.cc index 39893aca2..2593ceb17 100644 --- a/src/platform_specific/affinity.win.cc +++ b/src/platform_specific/affinity.win.cc @@ -1,11 +1,15 @@ -#include - #include "affinity.h" + #include "log.h" +#include + + namespace celerity::detail::thread_pinning { + thread_pinner::thread_pinner(const runtime_configuration& cfg) { if(cfg.enabled) { CELERITY_WARN("Thread pinning is currently not supported on Windows."); } } thread_pinner::~thread_pinner() {} + } // namespace celerity::detail::thread_pinning diff --git a/src/platform_specific/named_threads.unix.cc b/src/platform_specific/named_threads.unix.cc index 9a5afe256..8fb601179 100644 --- a/src/platform_specific/named_threads.unix.cc +++ b/src/platform_specific/named_threads.unix.cc @@ -1,11 +1,15 @@ #include "named_threads.h" + #include "version.h" #include +#include +#include #include #include + namespace celerity::detail { static_assert(std::is_same_v, "Unexpected native thread handle type"); diff --git a/src/platform_specific/named_threads.win.cc b/src/platform_specific/named_threads.win.cc index 8c1f0e6c9..e894675f5 100644 --- a/src/platform_specific/named_threads.win.cc +++ b/src/platform_specific/named_threads.win.cc @@ -6,6 +6,7 @@ #include + namespace celerity::detail { static_assert(std::is_same_v, "Unexpected native thread handle type"); diff --git a/src/print_graph.cc b/src/print_graph.cc index f9ca29b0a..f375a5c3a 100644 --- a/src/print_graph.cc +++ b/src/print_graph.cc @@ -1,18 +1,32 @@ #include "print_graph.h" -#include - #include "access_modes.h" #include "grid.h" #include "instruction_graph.h" +#include "intrusive_graph.h" #include "log.h" -#include "print_utils.h" +#include "matchbox.hh" +#include "ranges.h" #include "recorders.h" +#include "sycl_wrappers.h" #include "task.h" +#include "types.h" +#include "utils.h" +#include +#include +#include +#include #include #include +#include +#include #include +#include +#include + +#include + namespace celerity::detail { diff --git a/src/receive_arbiter.cc b/src/receive_arbiter.cc index 4e1c762ad..3b817c233 100644 --- a/src/receive_arbiter.cc +++ b/src/receive_arbiter.cc @@ -1,10 +1,23 @@ #include "receive_arbiter.h" + +#include "async_event.h" +#include "communicator.h" #include "grid.h" +#include "pilot.h" +#include "ranges.h" +#include "types.h" +#include "utils.h" + +#include +#include +#include +#include +#include +#include +#include #include -#include -#include namespace celerity::detail::receive_arbiter_detail { diff --git a/src/recorders.cc b/src/recorders.cc index c1dd2c3be..8aaf852bf 100644 --- a/src/recorders.cc +++ b/src/recorders.cc @@ -1,8 +1,19 @@ #include "recorders.h" +#include "command_graph.h" +#include "grid.h" +#include "instruction_graph.h" +#include "ranges.h" +#include "task.h" +#include "types.h" + +#include +#include #include +#include +#include +#include -#include "command_graph.h" namespace celerity::detail { diff --git a/src/runtime.cc b/src/runtime.cc index 1252b2edb..21d40e85e 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -1,32 +1,19 @@ #include "runtime.h" -#include -#include -#include -#include - -#ifdef _MSC_VER -#include -#else -#include -#endif - -#if CELERITY_USE_MIMALLOC -// override default new/delete operators to use the mimalloc memory allocator -#include -#endif - #include "affinity.h" #include "backend/sycl_backend.h" #include "cgf_diagnostics.h" #include "command_graph_generator.h" +#include "config.h" #include "device_selection.h" #include "dry_run_executor.h" +#include "executor.h" #include "host_object.h" #include "instruction_graph_generator.h" #include "live_executor.h" #include "log.h" #include "print_graph.h" +#include "ranges.h" #include "reduction.h" #include "scheduler.h" #include "system_info.h" @@ -37,6 +24,40 @@ #include "utils.h" #include "version.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef _MSC_VER +#include +#else +#include +#endif + +#if CELERITY_USE_MIMALLOC +// override default new/delete operators to use the mimalloc memory allocator +#include +#endif + #if CELERITY_ENABLE_MPI #include "mpi_communicator.h" #include diff --git a/src/split.cc b/src/split.cc index dc65db1b2..271b33800 100644 --- a/src/split.cc +++ b/src/split.cc @@ -1,10 +1,16 @@ #include "split.h" +#include "grid.h" +#include "ranges.h" + +#include #include +#include #include +#include #include +#include -#include "grid.h" namespace { diff --git a/src/task.cc b/src/task.cc index 441fbf309..5779d1b87 100644 --- a/src/task.cc +++ b/src/task.cc @@ -1,10 +1,5 @@ #include "task.h" -#include -#include -#include -#include - #include "access_modes.h" #include "grid.h" #include "range_mapper.h" @@ -12,6 +7,13 @@ #include "types.h" #include "utils.h" +#include +#include +#include +#include +#include + + namespace celerity::detail { template diff --git a/src/task_manager.cc b/src/task_manager.cc index e3ad28fc1..1ad0268b2 100644 --- a/src/task_manager.cc +++ b/src/task_manager.cc @@ -1,17 +1,25 @@ #include "task_manager.h" -#include "access_modes.h" #include "grid.h" #include "intrusive_graph.h" #include "log.h" +#include "ranges.h" #include "recorders.h" #include "task.h" #include "types.h" - -#include +#include "utils.h" #include +#include +#include +#include +#include #include +#include +#include + +#include + namespace celerity { namespace detail { diff --git a/src/utils.cc b/src/utils.cc index 49bff3387..6227787ae 100644 --- a/src/utils.cc +++ b/src/utils.cc @@ -1,9 +1,24 @@ #include "utils.h" + #include "log.h" +#include "types.h" #include +#include +#include +#include +#include +#include +#include +#include #include #include +#include +#include +#include +#include + +#include #if !defined(_MSC_VER) // Required for kernel name demangling in Clang From 3d2d8d8ecfbf590746f3e9d39f7e323deb8f973f Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Fri, 22 Nov 2024 16:49:31 +0100 Subject: [PATCH 4/4] [RM] fixup includes --- include/recorders.h | 3 ++- src/backend/sycl_backend.cc | 2 +- src/backend/sycl_cuda_backend.cc | 4 +--- src/instruction_graph_generator.cc | 5 ++--- src/print_graph.cc | 2 +- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/include/recorders.h b/include/recorders.h index 5f5f3c547..071d3c22e 100644 --- a/include/recorders.h +++ b/include/recorders.h @@ -4,7 +4,6 @@ #include "grid.h" #include "instruction_graph.h" #include "intrusive_graph.h" -#include "matchbox.hh" #include "nd_memory.h" #include "pilot.h" #include "ranges.h" @@ -24,6 +23,8 @@ #include #include +#include + namespace celerity::detail { diff --git a/src/backend/sycl_backend.cc b/src/backend/sycl_backend.cc index 7e855c38a..82ca47b28 100644 --- a/src/backend/sycl_backend.cc +++ b/src/backend/sycl_backend.cc @@ -5,7 +5,6 @@ #include "backend/backend.h" #include "closure_hydrator.h" #include "dense_map.h" -#include "fmt/format.h" #include "grid.h" #include "launcher.h" #include "nd_memory.h" @@ -31,6 +30,7 @@ #include #include +#include #include #include diff --git a/src/backend/sycl_cuda_backend.cc b/src/backend/sycl_cuda_backend.cc index 3371225ee..b9b6be712 100644 --- a/src/backend/sycl_cuda_backend.cc +++ b/src/backend/sycl_cuda_backend.cc @@ -13,14 +13,12 @@ #include #include -#include -#include -#include #include #include #include #include +#include #include diff --git a/src/instruction_graph_generator.cc b/src/instruction_graph_generator.cc index 3478a35d5..ce3e91d56 100644 --- a/src/instruction_graph_generator.cc +++ b/src/instruction_graph_generator.cc @@ -3,14 +3,11 @@ #include "access_modes.h" #include "command_graph.h" #include "dense_map.h" -#include "fmt/base.h" -#include "fmt/format.h" #include "grid.h" #include "hint.h" #include "instruction_graph.h" #include "launcher.h" #include "log.h" -#include "matchbox.hh" #include "nd_memory.h" #include "pilot.h" #include "ranges.h" @@ -40,7 +37,9 @@ #include #include +#include #include +#include namespace celerity::detail::instruction_graph_generator_detail { diff --git a/src/print_graph.cc b/src/print_graph.cc index f375a5c3a..40d0ee7fe 100644 --- a/src/print_graph.cc +++ b/src/print_graph.cc @@ -5,7 +5,6 @@ #include "instruction_graph.h" #include "intrusive_graph.h" #include "log.h" -#include "matchbox.hh" #include "ranges.h" #include "recorders.h" #include "sycl_wrappers.h" @@ -26,6 +25,7 @@ #include #include +#include namespace celerity::detail {