diff --git a/src/ArborX_LinearBVH.hpp b/src/ArborX_LinearBVH.hpp index 890cac64d..f90e189ac 100644 --- a/src/ArborX_LinearBVH.hpp +++ b/src/ArborX_LinearBVH.hpp @@ -241,7 +241,7 @@ BasicBoundingVolumeHierarchy:: Kokkos::Profiling::pushRegion("ArborX::BVH::BVH::assign_morton_codes"); // calculate Morton codes of all objects - Kokkos::View morton_indices( + Kokkos::View morton_indices( Kokkos::view_alloc(space, Kokkos::WithoutInitializing, "ArborX::BVH::BVH::morton"), size()); diff --git a/src/details/ArborX_DetailsBatchedQueries.hpp b/src/details/ArborX_DetailsBatchedQueries.hpp index 51fbf3b08..4376666e3 100644 --- a/src/details/ArborX_DetailsBatchedQueries.hpp +++ b/src/details/ArborX_DetailsBatchedQueries.hpp @@ -66,6 +66,8 @@ struct BatchedQueries using Details::returnCentroid; Point xyz = returnCentroid(getGeometry(Access::get(predicates, i))); translateAndScale(xyz, xyz, scene_bounding_box); + // Use 32-bit Morton indices instead of 64-bit as in construction. For + // most (all?) situations, 64-bit just adds a penalty with no benefit. morton_codes(i) = morton32(xyz[0], xyz[1], xyz[2]); }); diff --git a/src/details/ArborX_DetailsTreeConstruction.hpp b/src/details/ArborX_DetailsTreeConstruction.hpp index 4cba17c8c..50ee39204 100644 --- a/src/details/ArborX_DetailsTreeConstruction.hpp +++ b/src/details/ArborX_DetailsTreeConstruction.hpp @@ -69,7 +69,7 @@ assignMortonCodesImpl(ExecutionSpace const &space, Primitives const &primitives, Point xyz; centroid(Access::get(primitives, i), xyz); translateAndScale(xyz, xyz, scene_bounding_box); - morton_codes(i) = morton32(xyz[0], xyz[1], xyz[2]); + morton_codes(i) = morton64(xyz[0], xyz[1], xyz[2]); }); } @@ -87,7 +87,7 @@ assignMortonCodesImpl(ExecutionSpace const &space, Primitives const &primitives, Kokkos::RangePolicy(space, 0, n), KOKKOS_LAMBDA(int i) { Point xyz; translateAndScale(Access::get(primitives, i), xyz, scene_bounding_box); - morton_codes(i) = morton32(xyz[0], xyz[1], xyz[2]); + morton_codes(i) = morton64(xyz[0], xyz[1], xyz[2]); }); } @@ -95,7 +95,8 @@ template inline void assignMortonCodes( ExecutionSpace const &space, Primitives const &primitives, - Kokkos::View morton_codes, + Kokkos::View + morton_codes, Box const &scene_bounding_box) { using Access = AccessTraits; @@ -152,7 +153,7 @@ class GenerateHierarchy ExecutionSpace const &space, Primitives const &primitives, Kokkos::View permutation_indices, - Kokkos::View + Kokkos::View sorted_morton_codes, Kokkos::View leaf_nodes, Kokkos::View internal_nodes) @@ -176,7 +177,7 @@ class GenerateHierarchy } KOKKOS_FUNCTION - int delta(int const i) const + long long delta(int const i) const { // Per Apetrei: // Because we already know where the highest differing bit is for each @@ -191,7 +192,7 @@ class GenerateHierarchy // This check is here simply to avoid code complications in the main // operator if (i < 0 || i >= _num_internal_nodes) - return INT_MAX; + return LLONG_MAX; // The Apetrei's paper does not mention dealing with duplicate indices. We // follow the original Karras idea in this situation: @@ -202,10 +203,17 @@ class GenerateHierarchy // concatenation. // In this case, if the Morton indices are the same, we want to compare is. // We also want the result in this situation to always be less than any - // Morton comparison. Thus, we add INT_MIN to it. - // We also avoid if/else statement by doing a "x + !x*" trick. + // Morton comparison. Thus, we add LLONG_MIN to it. auto x = _sorted_morton_codes(i) ^ _sorted_morton_codes(i + 1); - return x + (!x) * (INT_MIN + (i ^ (i + 1))); + if (x != 0) + { + // When using 63 bits for Morton codes, the LLONG_MAX is actually a valid + // code. As we want the return statement above to return a value always + // greater than anything here, we downshift by 1. + return x - 1; + } + + return LLONG_MIN + (i ^ (i + 1)); } KOKKOS_FUNCTION Node *getNodePtr(int i) const @@ -240,7 +248,7 @@ class GenerateHierarchy template KOKKOS_FUNCTION std::enable_if_t{}> - setRope(Node *node, int range_right, int delta_right) const + setRope(Node *node, int range_right, long long delta_right) const { int rope; if (range_right != _num_internal_nodes) @@ -284,8 +292,8 @@ class GenerateHierarchy int range_left = i - leaf_nodes_shift; int range_right = range_left; - int delta_left = delta(range_left - 1); - int delta_right = delta(range_right); + auto delta_left = delta(range_left - 1); + auto delta_right = delta(range_right); setRope(leaf_node, range_right, delta_right); @@ -381,7 +389,7 @@ class GenerateHierarchy private: Primitives _primitives; Kokkos::View _permutation_indices; - Kokkos::View _sorted_morton_codes; + Kokkos::View _sorted_morton_codes; Kokkos::View _leaf_nodes; Kokkos::View _internal_nodes; Kokkos::View _ranges; @@ -397,7 +405,7 @@ void generateHierarchy( ExecutionSpace const &space, Primitives const &primitives, Kokkos::View permutation_indices, - Kokkos::View + Kokkos::View sorted_morton_codes, Kokkos::View leaf_nodes, Kokkos::View internal_nodes) @@ -405,7 +413,7 @@ void generateHierarchy( using ConstPermutationIndices = Kokkos::View; using ConstMortonCodes = - Kokkos::View; + Kokkos::View; using MemorySpace = typename decltype(internal_nodes)::memory_space; diff --git a/test/tstDetailsTreeConstruction.cpp b/test/tstDetailsTreeConstruction.cpp index 728c00e79..15569125a 100644 --- a/test/tstDetailsTreeConstruction.cpp +++ b/test/tstDetailsTreeConstruction.cpp @@ -30,24 +30,35 @@ namespace tt = boost::test_tools; BOOST_AUTO_TEST_CASE_TEMPLATE(assign_morton_codes, DeviceType, ARBORX_DEVICE_TYPES) { - std::vector points = { - {{0.0, 0.0, 0.0}}, {{0.25, 0.75, 0.25}}, {{0.75, 0.25, 0.25}}, - {{0.75, 0.75, 0.25}}, {{1.33, 2.33, 3.33}}, {{1.66, 2.66, 3.66}}, - {{1024.0, 1024.0, 1024.0}}, - }; + // N is the number of Morton grid cells in each dimension for 64-bit Morton + // codes. + constexpr unsigned long long N = 1 << 21; + std::vector points = {{{0.0, 0.0, 0.0}}, + {{0.25, 0.75, 0.25}}, + {{0.75, 0.25, 0.25}}, + {{0.75, 0.75, 0.25}}, + {{1.33, 2.33, 3.33}}, + {{1.66, 2.66, 3.66}}, + {{(float)N, (float)N, (float)N}}}; int const n = points.size(); // lower left front corner corner of the octant the points fall in - std::vector> anchors = { - {{0, 0, 0}}, {{0, 0, 0}}, {{0, 0, 0}}, {{0, 0, 0}}, - {{1, 2, 3}}, {{1, 2, 3}}, {{1023, 1023, 1023}}}; - auto fun = [](std::array const &anchor) { + std::vector> anchors = { + {{0, 0, 0}}, + {{0, 0, 0}}, + {{0, 0, 0}}, + {{0, 0, 0}}, + {{1, 2, 3}}, + {{1, 2, 3}}, + {{N - 1, N - 1, N - 1}}}; + auto fun = [](std::array const &anchor) { using ArborX::Details::expandBitsBy2; - unsigned int i = std::get<0>(anchor); - unsigned int j = std::get<1>(anchor); - unsigned int k = std::get<2>(anchor); + auto i = std::get<0>(anchor); + auto j = std::get<1>(anchor); + auto k = std::get<2>(anchor); return 4 * expandBitsBy2(i) + 2 * expandBitsBy2(j) + expandBitsBy2(k); }; - std::vector ref(n, std::numeric_limits::max()); + std::vector ref( + n, std::numeric_limits::max()); for (int i = 0; i < n; ++i) ref[i] = fun(anchors[i]); // using points rather than boxes for convenience here but still have to @@ -64,9 +75,10 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(assign_morton_codes, DeviceType, space, boxes, scene_host); BOOST_TEST(ArborX::Details::equals( - scene_host, {{{0., 0., 0.}}, {{1024., 1024., 1024.}}})); + scene_host, {{{0.0, 0.0, 0.0}}, {{(float)N, (float)N, (float)N}}})); - Kokkos::View morton_codes("morton_codes", n); + Kokkos::View morton_codes("morton_codes", + n); ArborX::Details::TreeConstruction::assignMortonCodes( space, boxes, morton_codes, scene_host); auto morton_codes_host = Kokkos::create_mirror_view(morton_codes); @@ -231,7 +243,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(example_tree_construction, DeviceType, // See // https://devblogs.nvidia.com/parallelforall/thinking-parallel-part-iii-tree-construction-gpu/ int const n = 8; - Kokkos::View sorted_morton_codes( + Kokkos::View sorted_morton_codes( "sorted_morton_codes", n); std::vector s{ "00001", "00010", "00100", "00101", "10011", "11000", "11001", "11110", @@ -239,8 +251,8 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(example_tree_construction, DeviceType, for (int i = 0; i < n; ++i) { std::bitset<6> b(s[i]); - BOOST_TEST_MESSAGE(b << " " << b.to_ulong()); - sorted_morton_codes(i) = b.to_ulong(); + BOOST_TEST_MESSAGE(b << " " << b.to_ullong()); + sorted_morton_codes(i) = b.to_ullong(); } Kokkos::View primitives(