Skip to content

Commit

Permalink
adds catch2 tests for device-scope merge sort
Browse files Browse the repository at this point in the history
  • Loading branch information
elstehle committed Jan 24, 2024
1 parent 54fd4c0 commit 572938e
Show file tree
Hide file tree
Showing 6 changed files with 634 additions and 425 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/******************************************************************************
* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand All @@ -25,53 +25,34 @@
*
******************************************************************************/

/******************************************************************************
* Test of DeviceMergeSort utilities using large user types (i.e., with vsmem utilities)
******************************************************************************/

// Ensure printing of CUDA runtime errors to console
#define CUB_STDERR

#include <cstdio>
#include <new> // for std::bad_alloc

#include "test_device_merge_sort.cuh"
#include "test_util.h"

int main(int argc, char** argv)
struct custom_less_op_t
{
CommandLineArgs args(argc, argv);

// Initialize device
CubDebugExit(args.DeviceInit());

using DataType = int64_t;
template <typename T>
__host__ __device__ bool operator()(const T& lhs, const T& rhs)
{
return lhs < rhs;
}
};

thrust::default_random_engine rng;
for (unsigned int pow2 = 9; pow2 < 22; pow2 += 2)
struct compare_first_lt_op_t
{
/**
* We need to be able to have two different types for lhs and rhs, as the call to std::stable_sort with a
* zip-iterator, will pass a thrust::tuple for lhs and a tuple_of_iterator_references for rhs.
*/
template <typename LhsT, typename RhsT>
__host__ __device__ bool operator()(const LhsT& lhs, const RhsT& rhs) const
{
try
{
const unsigned int num_items = 1 << pow2;
// Testing vsmem facility with a fallback policy
TestHelper<true>::AllocateAndTest<HugeDataType<128>, DataType>(rng, num_items);
// Testing vsmem facility with virtual shared memory
TestHelper<true>::AllocateAndTest<HugeDataType<256>, DataType>(rng, num_items);
}
catch (std::bad_alloc& e)
{
if (pow2 > 20)
{ // Some cards don't have enough memory for large allocations, these
// can be skipped.
printf("Skipping large memory test. (num_items=2^%u): %s\n", pow2, e.what());
}
else
{ // For smaller problem sizes, treat as an error:
printf("Error (num_items=2^%u): %s", pow2, e.what());
throw;
}
}
return thrust::get<0>(lhs) < thrust::get<0>(rhs);
}
};

return 0;
}
template <typename T>
struct mod_op_t
{
T mod;
__host__ __device__ T operator()(T val) const
{
return val % mod;
}
};
279 changes: 279 additions & 0 deletions cub/test/catch2_test_device_merge_sort.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
/******************************************************************************
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#include <cub/device/device_merge_sort.cuh>

#include <thrust/copy.h>
#include <thrust/detail/raw_pointer_cast.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/random.h>
#include <thrust/sequence.h>
#include <thrust/shuffle.h>

#include <algorithm>

#include "catch2_device_merge_sort_common.cuh"
#include "catch2_test_helper.h"
#include "catch2_test_launch_helper.h"

// %PARAM% TEST_LAUNCH lid 0:1:2

DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::SortPairs, sort_pairs);
DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::SortPairsCopy, sort_pairs_copy);
DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::StableSortPairs, stable_sort_pairs);

DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::SortKeys, sort_keys);
DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::SortKeysCopy, sort_keys_copy);
DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::StableSortKeys, stable_sort_keys);
DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::StableSortKeysCopy, stable_sort_keys_copy);

using key_types = c2h::type_list<std::uint8_t, std::uint32_t, double>;
using wide_key_types = c2h::type_list<std::uint32_t, double>;

template <typename OffsetT, typename KeyT>
struct rank_to_key_op_t
{
__device__ __host__ KeyT operator()(const OffsetT& val)
{
return static_cast<KeyT>(val);
}
};

template <typename OffsetT>
void get_key_ranks(thrust::device_vector<OffsetT>& key_ranks)
{
thrust::sequence(key_ranks.begin(), key_ranks.end());
thrust::shuffle(key_ranks.begin(), key_ranks.end(), thrust::default_random_engine{});
}

CUB_TEST("DeviceMergeSort::SortKeysCopy works", "[merge][sort][device]", wide_key_types)
{
using key_t = typename c2h::get<0, TestType>;
using offset_t = std::int32_t;

// Prepare input
const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000}));
thrust::device_vector<offset_t> key_ranks(num_items);
get_key_ranks(key_ranks);
thrust::device_vector<key_t> keys_in(num_items);
thrust::transform(key_ranks.begin(), key_ranks.end(), keys_in.begin(), rank_to_key_op_t<offset_t, key_t>{});

// Perform sort
thrust::device_vector<key_t> keys_out(num_items);
sort_keys_copy(
thrust::raw_pointer_cast(keys_in.data()), thrust::raw_pointer_cast(keys_out.data()), num_items, custom_less_op_t{});

// Verify results
thrust::host_vector<key_t> keys_expected(num_items);
auto key_ranks_it = thrust::make_counting_iterator(offset_t{});
thrust::transform(key_ranks_it, key_ranks_it + num_items, keys_expected.begin(), rank_to_key_op_t<offset_t, key_t>{});

REQUIRE(keys_expected == keys_out);
}

CUB_TEST("DeviceMergeSort::SortKeys works", "[merge][sort][device]", wide_key_types)
{
using key_t = typename c2h::get<0, TestType>;
using offset_t = std::int32_t;

// Prepare input
const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000}));
thrust::device_vector<offset_t> key_ranks(num_items);
get_key_ranks(key_ranks);
thrust::device_vector<key_t> keys_in_out(num_items);
thrust::transform(key_ranks.begin(), key_ranks.end(), keys_in_out.begin(), rank_to_key_op_t<offset_t, key_t>{});

// Perform sort
sort_keys(thrust::raw_pointer_cast(keys_in_out.data()), num_items, custom_less_op_t{});

// Verify results
thrust::host_vector<key_t> keys_expected(num_items);
auto key_ranks_it = thrust::make_counting_iterator(offset_t{});
thrust::transform(key_ranks_it, key_ranks_it + num_items, keys_expected.begin(), rank_to_key_op_t<offset_t, key_t>{});

REQUIRE(keys_expected == keys_in_out);
}

CUB_TEST("DeviceMergeSort::StableSortKeysCopy works and performs a stable sort", "[merge][sort][device]", key_types)
{
using key_t = typename c2h::get<0, TestType>;
using offset_t = std::int32_t;

// Prepare input
const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000}));
thrust::device_vector<offset_t> key_ranks(num_items);
get_key_ranks(key_ranks);
thrust::device_vector<thrust::tuple<key_t, offset_t>> keys_in(num_items);
// We make sure to generate a few items that compare equally to check for stability
auto key_rank_mod_it = thrust::make_transform_iterator(key_ranks.begin(), mod_op_t<offset_t>{128});
auto sort_key_it = thrust::make_transform_iterator(key_rank_mod_it, rank_to_key_op_t<offset_t, key_t>{});
auto keys_in_it = thrust::make_zip_iterator(sort_key_it, key_ranks.begin());
thrust::copy(keys_in_it, keys_in_it + num_items, keys_in.begin());

// Perform sort
thrust::device_vector<thrust::tuple<key_t, offset_t>> keys_out(num_items);
stable_sort_keys_copy(
thrust::raw_pointer_cast(keys_in.data()),
thrust::raw_pointer_cast(keys_out.data()),
num_items,
compare_first_lt_op_t{});

// Verify results
thrust::host_vector<thrust::tuple<key_t, offset_t>> keys_expected(keys_in);
std::stable_sort(keys_expected.begin(), keys_expected.end(), compare_first_lt_op_t{});

REQUIRE(keys_expected == keys_out);
}

CUB_TEST("DeviceMergeSort::StableSortKeys works and performs a stable sort", "[merge][sort][device]", key_types)
{
using key_t = typename c2h::get<0, TestType>;
using offset_t = std::int32_t;

// Prepare input
const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000}));
thrust::device_vector<offset_t> key_ranks(num_items);
get_key_ranks(key_ranks);
thrust::device_vector<thrust::tuple<key_t, offset_t>> keys_in_out(num_items);
// We make sure to generate a few items that compare equally to check for stability
auto key_rank_mod_it = thrust::make_transform_iterator(key_ranks.begin(), mod_op_t<offset_t>{128});
auto sort_key_it = thrust::make_transform_iterator(key_rank_mod_it, rank_to_key_op_t<offset_t, key_t>{});
auto keys_in_it = thrust::make_zip_iterator(sort_key_it, key_ranks.begin());
thrust::copy(keys_in_it, keys_in_it + num_items, keys_in_out.begin());

// Perform sort
stable_sort_keys(thrust::raw_pointer_cast(keys_in_out.data()), num_items, compare_first_lt_op_t{});

// Verify results
thrust::host_vector<thrust::tuple<key_t, offset_t>> keys_expected(keys_in_out);
std::stable_sort(keys_expected.begin(), keys_expected.end(), compare_first_lt_op_t{});

REQUIRE(keys_expected == keys_in_out);
}

CUB_TEST("DeviceMergeSort::SortPairsCopy works", "[merge][sort][device]", wide_key_types)
{
using key_t = typename c2h::get<0, TestType>;
using offset_t = std::int32_t;

// Prepare input
const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000}));
thrust::device_vector<offset_t> key_ranks(num_items);
get_key_ranks(key_ranks);
thrust::device_vector<key_t> keys_in(num_items);
thrust::transform(key_ranks.begin(), key_ranks.end(), keys_in.begin(), rank_to_key_op_t<offset_t, key_t>{});

// Perform sort
thrust::device_vector<key_t> keys_out(num_items);
thrust::device_vector<offset_t> values_out(num_items);
sort_pairs_copy(
thrust::raw_pointer_cast(keys_in.data()),
thrust::raw_pointer_cast(key_ranks.data()),
thrust::raw_pointer_cast(keys_out.data()),
thrust::raw_pointer_cast(values_out.data()),
num_items,
custom_less_op_t{});

// Verify results
thrust::host_vector<key_t> keys_expected(num_items);
thrust::host_vector<offset_t> values_expected(num_items);
auto key_ranks_it = thrust::make_counting_iterator(offset_t{});
thrust::transform(key_ranks_it, key_ranks_it + num_items, keys_expected.begin(), rank_to_key_op_t<offset_t, key_t>{});
thrust::sequence(values_expected.begin(), values_expected.end());

REQUIRE(keys_expected == keys_out);
REQUIRE(values_expected == values_out);
}

CUB_TEST("DeviceMergeSort::SortPairs works", "[merge][sort][device]", wide_key_types)
{
using key_t = typename c2h::get<0, TestType>;
using offset_t = std::int32_t;

// Prepare input
const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000}));
thrust::device_vector<offset_t> key_ranks(num_items);
get_key_ranks(key_ranks);
thrust::device_vector<key_t> keys_in_out(num_items);
thrust::transform(key_ranks.begin(), key_ranks.end(), keys_in_out.begin(), rank_to_key_op_t<offset_t, key_t>{});

// Perform sort
sort_pairs(thrust::raw_pointer_cast(keys_in_out.data()),
thrust::raw_pointer_cast(key_ranks.data()),
num_items,
custom_less_op_t{});

// Verify results
thrust::host_vector<key_t> keys_expected(num_items);
thrust::host_vector<offset_t> values_expected(num_items);
auto key_ranks_it = thrust::make_counting_iterator(offset_t{});
thrust::transform(key_ranks_it, key_ranks_it + num_items, keys_expected.begin(), rank_to_key_op_t<offset_t, key_t>{});
thrust::sequence(values_expected.begin(), values_expected.end());

REQUIRE(keys_expected == keys_in_out);
REQUIRE(values_expected == key_ranks);
}

CUB_TEST("DeviceMergeSort::StableSortPairs works and performs a stable sort", "[merge][sort][device]", key_types)
{
using key_t = typename c2h::get<0, TestType>;
using offset_t = std::int32_t;

// Prepare input
const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000}));
thrust::device_vector<offset_t> key_ranks(num_items);
get_key_ranks(key_ranks);
thrust::device_vector<key_t> keys_in_out(num_items);
// We make sure to generate a few items that compare equally to check for stability
auto key_rank_mod_it = thrust::make_transform_iterator(key_ranks.begin(), mod_op_t<offset_t>{128});
thrust::transform(
key_rank_mod_it, key_rank_mod_it + num_items, keys_in_out.begin(), rank_to_key_op_t<offset_t, key_t>{});

// Prepare host data for verification
thrust::host_vector<key_t> keys_expected(num_items);
thrust::host_vector<offset_t> values_expected(key_ranks);
auto expected_value_mod_it = thrust::make_transform_iterator(values_expected.begin(), mod_op_t<offset_t>{128});
thrust::transform(expected_value_mod_it,
expected_value_mod_it + num_items,
keys_expected.begin(),
rank_to_key_op_t<offset_t, key_t>{});
auto zipped_expected_it = thrust::make_zip_iterator(keys_expected.begin(), values_expected.begin());
std::stable_sort(zipped_expected_it, zipped_expected_it + num_items, compare_first_lt_op_t{});

// Perform sort
stable_sort_pairs(thrust::raw_pointer_cast(keys_in_out.data()),
thrust::raw_pointer_cast(key_ranks.data()),
num_items,
custom_less_op_t{});

REQUIRE(keys_expected == keys_in_out);
REQUIRE(values_expected == key_ranks);
}
Loading

0 comments on commit 572938e

Please sign in to comment.