forked from rapidsai/cudf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpartition.cpp
73 lines (60 loc) · 2.49 KB
/
partition.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/*
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>
#include <cudf/partitioning.hpp>
#include <algorithm>
#include <numeric>
class Hashing : public cudf::benchmark {};
template <class T>
void BM_hash_partition(benchmark::State& state)
{
auto const num_rows = state.range(0);
auto const num_cols = state.range(1);
auto const num_partitions = state.range(2);
// Create owning columns
auto input_table = create_sequence_table(cycle_dtypes({cudf::type_to_id<T>()}, num_cols),
row_count{static_cast<cudf::size_type>(num_rows)});
auto input = cudf::table_view(*input_table);
auto columns_to_hash = std::vector<cudf::size_type>(num_cols);
std::iota(columns_to_hash.begin(), columns_to_hash.end(), 0);
for (auto _ : state) {
cuda_event_timer timer(state, true);
auto output = cudf::hash_partition(input, columns_to_hash, num_partitions);
}
auto const bytes_read = num_rows * num_cols * sizeof(T);
auto const bytes_written = num_rows * num_cols * sizeof(T);
auto const partition_bytes = num_partitions * sizeof(cudf::size_type);
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
(bytes_read + bytes_written + partition_bytes));
}
BENCHMARK_DEFINE_F(Hashing, hash_partition)
(::benchmark::State& state) { BM_hash_partition<double>(state); }
static void CustomRanges(benchmark::internal::Benchmark* b)
{
for (int columns = 1; columns <= 256; columns *= 16) {
for (int partitions = 64; partitions <= 1024; partitions *= 2) {
for (int rows = 1 << 17; rows <= 1 << 21; rows *= 2) {
b->Args({rows, columns, partitions});
}
}
}
}
BENCHMARK_REGISTER_F(Hashing, hash_partition)
->Apply(CustomRanges)
->Unit(benchmark::kMillisecond)
->UseManualTime();