From 88401ada639312d8df9113c0d203e104cfb92abf Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Thu, 27 Jul 2023 09:20:30 -0400 Subject: [PATCH 1/2] Implement APIv2 BruteForce --- .../brute_force_vs_bvh_timpl.hpp | 10 +- src/ArborX_BruteForce.hpp | 134 ++++++++++++++---- src/details/ArborX_DetailsBruteForceImpl.hpp | 91 ++++++------ test/CMakeLists.txt | 11 +- 4 files changed, 162 insertions(+), 84 deletions(-) diff --git a/benchmarks/brute_force_vs_bvh/brute_force_vs_bvh_timpl.hpp b/benchmarks/brute_force_vs_bvh/brute_force_vs_bvh_timpl.hpp index 7fe63d569..92230eb67 100644 --- a/benchmarks/brute_force_vs_bvh/brute_force_vs_bvh_timpl.hpp +++ b/benchmarks/brute_force_vs_bvh/brute_force_vs_bvh_timpl.hpp @@ -1,5 +1,5 @@ /**************************************************************************** - * Copyright (c) 2017-2022 by the ArborX authors * + * Copyright (c) 2017-2023 by the ArborX authors * * All rights reserved. * * * * This file is part of the ArborX library. ArborX is * @@ -10,7 +10,6 @@ ****************************************************************************/ #include -#include #include #include #include @@ -84,7 +83,6 @@ static void run_fp(int nprimitives, int nqueries, int nrepeats) Placeholder predicates{nqueries}; using Point = ArborX::ExperimentalHyperGeometry::Point; - using Box = ArborX::ExperimentalHyperGeometry::Box; for (int i = 0; i < nrepeats; i++) { @@ -111,7 +109,11 @@ static void run_fp(int nprimitives, int nqueries, int nrepeats) { Kokkos::Timer timer; - ArborX::BruteForce brute{space, primitives}; + ArborX::BasicBruteForce> + brute{space, + ArborX::Details::LegacyValues{ + primitives}}; Kokkos::View indices("Benchmark::indices", 0); Kokkos::View offset("Benchmark::offset", 0); diff --git a/src/ArborX_BruteForce.hpp b/src/ArborX_BruteForce.hpp index b621daa27..d290e1c1b 100644 --- a/src/ArborX_BruteForce.hpp +++ b/src/ArborX_BruteForce.hpp @@ -18,25 +18,35 @@ #include #include #include +#include +#include #include namespace ArborX { -template -class BruteForce +template >>, + typename GeometryTraits::coordinate_type>>::type>> +class BasicBruteForce { public: using memory_space = MemorySpace; static_assert(Kokkos::is_memory_space::value); using size_type = typename MemorySpace::size_type; using bounding_volume_type = BoundingVolume; + using value_type = Value; - BruteForce() = default; + BasicBruteForce() = default; - template - BruteForce(ExecutionSpace const &space, Primitives const &primitives); + template + BasicBruteForce(ExecutionSpace const &space, Values const &values, + IndexableGetter const &indexable_getter = IndexableGetter()); KOKKOS_FUNCTION size_type size() const noexcept { return _size; } @@ -76,43 +86,103 @@ class BruteForce private: size_type _size{0}; bounding_volume_type _bounds; - Kokkos::View _bounding_volumes; + Kokkos::View _values; + IndexableGetter _indexable_getter; +}; + +template +class BruteForce + : public BasicBruteForce, + Details::DefaultIndexableGetter, BoundingVolume> +{ + using base_type = + BasicBruteForce, + Details::DefaultIndexableGetter, BoundingVolume>; + +public: + using legacy_tree = void; + + using bounding_volume_type = Box; + + BruteForce() = default; + + template + BruteForce(ExecutionSpace const &space, Primitives const &primitives) + : base_type( + space, + // Validate the primitives before calling the base constructor + (Details::check_valid_access_traits(PrimitivesTag{}, primitives), + Details::LegacyValues{ + primitives}), + Details::DefaultIndexableGetter()) + {} + + template + void query(ExecutionSpace const &space, Predicates const &predicates, + Callback const &callback, Ignore = Ignore()) const + { + base_type::query(space, predicates, + Details::LegacyCallbackWrapper< + Callback, typename base_type::value_type>{callback}); + } + + template + std::enable_if_t>> + query(ExecutionSpace const &space, Predicates const &predicates, + CallbackOrView &&callback_or_view, View &&view, Args &&...args) const + { + base_type::query(space, predicates, + std::forward(callback_or_view), + std::forward(view), std::forward(args)...); + } }; -template -template -BruteForce::BruteForce( - ExecutionSpace const &space, Primitives const &primitives) - : _size(AccessTraits::size(primitives)) - , _bounding_volumes( - Kokkos::view_alloc(space, Kokkos::WithoutInitializing, - "ArborX::BruteForce::bounding_volumes"), - _size) +template +template +BasicBruteForce:: + BasicBruteForce(ExecutionSpace const &space, Values const &user_values, + IndexableGetter const &indexable_getter) + : _size(AccessTraits::size(user_values)) + , _values(Kokkos::view_alloc(space, Kokkos::WithoutInitializing, + "ArborX::BruteForce::values"), + _size) + , _indexable_getter(indexable_getter) { static_assert( KokkosExt::is_accessible_from::value); - // FIXME for now, do not check the return type of get() - Details::check_valid_access_traits( - PrimitivesTag{}, primitives, Details::DoNotCheckGetReturnType()); - using Access = AccessTraits; + // FIXME redo with RangeTraits + Details::check_valid_access_traits( + PrimitivesTag{}, user_values, Details::DoNotCheckGetReturnType()); + using Access = AccessTraits; static_assert(KokkosExt::is_accessible_from::value, - "Primitives must be accessible from the execution space"); + "Values must be accessible from the execution space"); - Kokkos::Profiling::pushRegion("ArborX::BruteForce::BruteForce"); + KokkosExt::ScopedProfileRegion guard("ArborX::BruteForce::BruteForce"); - Details::BruteForceImpl::initializeBoundingVolumesAndReduceBoundsOfTheScene( - space, primitives, _bounding_volumes, _bounds); + if (empty()) + { + return; + } - Kokkos::Profiling::popRegion(); + Details::AccessValues values{user_values}; + + Details::BruteForceImpl::initializeBoundingVolumesAndReduceBoundsOfTheScene( + space, values, _indexable_getter, _values, _bounds); } -template +template template -void BruteForce::query( - ExecutionSpace const &space, Predicates const &predicates, - Callback const &callback, Ignore) const +void BasicBruteForce::query(ExecutionSpace const &space, + Predicates const &predicates, + Callback const &callback, + Ignore) const { static_assert( KokkosExt::is_accessible_from::value); @@ -124,13 +194,15 @@ void BruteForce::query( using Tag = typename Details::AccessTraitsHelper::tag; static_assert(std::is_same{}, "nearest query not implemented yet"); - using Value = int; Details::check_valid_callback(callback, predicates); Kokkos::Profiling::pushRegion("ArborX::BruteForce::query::spatial"); - Details::BruteForceImpl::query(space, _bounding_volumes, predicates, - callback); + Details::BruteForceImpl::query( + space, predicates, _values, + Details::Indexables{ + _values, _indexable_getter}, + callback); Kokkos::Profiling::popRegion(); } diff --git a/src/details/ArborX_DetailsBruteForceImpl.hpp b/src/details/ArborX_DetailsBruteForceImpl.hpp index 8b50ae38a..b426b7e29 100644 --- a/src/details/ArborX_DetailsBruteForceImpl.hpp +++ b/src/details/ArborX_DetailsBruteForceImpl.hpp @@ -24,91 +24,89 @@ namespace Details { struct BruteForceImpl { - template + template static void initializeBoundingVolumesAndReduceBoundsOfTheScene( - ExecutionSpace const &space, Primitives const &primitives, - BoundingVolumes const &bounding_volumes, Bounds &bounds) + ExecutionSpace const &space, Values const &values, + IndexableGetter const &indexable_getter, Nodes const &nodes, + Bounds &bounds) { - using Access = AccessTraits; - - int const n = Access::size(primitives); - Kokkos::parallel_reduce( "ArborX::BruteForce::BruteForce::" - "initialize_bounding_volumes_and_reduce_bounds", - Kokkos::RangePolicy(space, 0, n), + "initialize_values_and_reduce_bounds", + Kokkos::RangePolicy(space, 0, values.size()), KOKKOS_LAMBDA(int i, Bounds &update) { + nodes(i) = values(i); + using Details::expand; Bounds bounding_volume{}; - expand(bounding_volume, Access::get(primitives, i)); - bounding_volumes(i) = bounding_volume; + expand(bounding_volume, indexable_getter(nodes(i))); update += bounding_volume; }, Kokkos::Sum{bounds}); } - template - static void query(ExecutionSpace const &space, Primitives const &primitives, - Predicates const &predicates, Callback const &callback) + template + static void query(ExecutionSpace const &space, Predicates const &predicates, + Values const &values, Indexables const &indexables, + Callback const &callback) { using TeamPolicy = Kokkos::TeamPolicy; - using AccessPrimitives = AccessTraits; using AccessPredicates = AccessTraits; using PredicateType = typename AccessTraitsHelper::type; - using PrimitiveType = typename AccessTraitsHelper::type; + using IndexableType = std::decay_t; - int const n_primitives = AccessPrimitives::size(primitives); + int const n_indexables = values.size(); int const n_predicates = AccessPredicates::size(predicates); int max_scratch_size = TeamPolicy::scratch_size_max(0); - // half of the scratch memory used by predicates and half for primitives + // half of the scratch memory used by predicates and half for indexables int const predicates_per_team = max_scratch_size / 2 / sizeof(PredicateType); - int const primitives_per_team = - max_scratch_size / 2 / sizeof(PrimitiveType); + int const indexables_per_team = + max_scratch_size / 2 / sizeof(IndexableType); ARBORX_ASSERT(predicates_per_team > 0); - ARBORX_ASSERT(primitives_per_team > 0); + ARBORX_ASSERT(indexables_per_team > 0); - int const n_primitive_tiles = - std::ceil((float)n_primitives / primitives_per_team); + int const n_indexable_tiles = + std::ceil((float)n_indexables / indexables_per_team); int const n_predicate_tiles = std::ceil((float)n_predicates / predicates_per_team); - int const n_teams = n_primitive_tiles * n_predicate_tiles; + int const n_teams = n_indexable_tiles * n_predicate_tiles; using ScratchPredicateType = Kokkos::View>; - using ScratchPrimitiveType = - Kokkos::View>; int scratch_size = ScratchPredicateType::shmem_size(predicates_per_team) + - ScratchPrimitiveType::shmem_size(primitives_per_team); + ScratchIndexableType::shmem_size(indexables_per_team); Kokkos::parallel_for( "ArborX::BruteForce::query::spatial::" - "check_all_predicates_against_all_primitives", + "check_all_predicates_against_all_indexables", TeamPolicy(space, n_teams, Kokkos::AUTO, 1) .set_scratch_size(0, Kokkos::PerTeam(scratch_size)), KOKKOS_LAMBDA(typename TeamPolicy::member_type const &teamMember) { - // select the tiles of predicates/primitives checked by each team + // select the tiles of predicates/indexables checked by each team int predicate_start = predicates_per_team * - (teamMember.league_rank() / n_primitive_tiles); - int primitive_start = primitives_per_team * - (teamMember.league_rank() % n_primitive_tiles); + (teamMember.league_rank() / n_indexable_tiles); + int indexable_start = indexables_per_team * + (teamMember.league_rank() % n_indexable_tiles); int predicates_in_this_team = KokkosExt::min( predicates_per_team, n_predicates - predicate_start); - int primitives_in_this_team = KokkosExt::min( - primitives_per_team, n_primitives - primitive_start); + int indexables_in_this_team = KokkosExt::min( + indexables_per_team, n_indexables - indexable_start); ScratchPredicateType scratch_predicates(teamMember.team_scratch(0), predicates_per_team); - ScratchPrimitiveType scratch_primitives(teamMember.team_scratch(0), - primitives_per_team); - // fill the scratch space with the predicates / primitives in the tile + ScratchIndexableType scratch_indexables(teamMember.team_scratch(0), + indexables_per_team); + // fill the scratch space with the predicates / indexables in the tile Kokkos::parallel_for( Kokkos::TeamVectorRange(teamMember, predicates_in_this_team), [&](const int q) { @@ -116,26 +114,25 @@ struct BruteForceImpl AccessPredicates::get(predicates, predicate_start + q); }); Kokkos::parallel_for( - Kokkos::TeamVectorRange(teamMember, primitives_in_this_team), + Kokkos::TeamVectorRange(teamMember, indexables_in_this_team), [&](const int j) { - scratch_primitives(j) = - AccessPrimitives::get(primitives, primitive_start + j); + scratch_indexables(j) = indexables(indexable_start + j); }); teamMember.team_barrier(); - // start threads for every predicate / primitive combination + // start threads for every predicate / indexable combination Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, primitives_in_this_team), + Kokkos::TeamThreadRange(teamMember, indexables_in_this_team), [&](int j) { Kokkos::parallel_for( Kokkos::ThreadVectorRange(teamMember, predicates_in_this_team), [&](const int q) { auto const &predicate = scratch_predicates(q); - auto const &primitive = scratch_primitives(j); - if (predicate(primitive)) + auto const &indexable = scratch_indexables(j); + if (predicate(indexable)) { - callback(predicate, j + primitive_start); + callback(predicate, values(indexable_start + j)); } }); }); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 226635996..8825d058e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -95,8 +95,15 @@ foreach(_test Callbacks Degenerate ManufacturedSolution ComparisonWithBoost) file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/tstQueryTree${_test}_BF.cpp.tmp" "#include \n" "#include \n" - "template using ArborX__BruteForce = ArborX::BruteForce;\n" - "#define ARBORX_TEST_TREE_TYPES Tuple\n" + "#include \"ArborXTest_LegacyTree.hpp\"\n" + "template \n" + "using ArborX_BruteForce_Box = ArborX::BruteForce;\n" + "template \n" + "using ArborX_Legacy_BasicBruteForce_Box =\n" + " LegacyTree,\n" + " ArborX::Details::DefaultIndexableGetter, ArborX::Box>>;\n" + "#define ARBORX_TEST_TREE_TYPES Tuple\n" "#define ARBORX_TEST_DEVICE_TYPES std::tuple<${ARBORX_DEVICE_TYPES}>\n" "#define ARBORX_TEST_DISABLE_NEAREST_QUERY\n" "#define ARBORX_TEST_DISABLE_CALLBACK_EARLY_EXIT\n" From fab0039a37a1a0c4e5a95fc98499613c2e8786a8 Mon Sep 17 00:00:00 2001 From: Andrey Prokopenko Date: Thu, 26 Oct 2023 14:54:05 -0400 Subject: [PATCH 2/2] Minor update to address review comments --- src/ArborX_BruteForce.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ArborX_BruteForce.hpp b/src/ArborX_BruteForce.hpp index d290e1c1b..5c309db92 100644 --- a/src/ArborX_BruteForce.hpp +++ b/src/ArborX_BruteForce.hpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -102,7 +103,7 @@ class BruteForce public: using legacy_tree = void; - using bounding_volume_type = Box; + using bounding_volume_type = typename base_type::bounding_volume_type; BruteForce() = default;