From 6a3bd1d11297794790d79bd40700a695af8eb6db Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Wed, 26 Feb 2025 20:03:48 +0000 Subject: [PATCH] added benchmark --- cpp/benchmarks/CMakeLists.txt | 2 +- cpp/benchmarks/join/cardinality_join.cu | 81 +++++++++++++++++++++++++ cpp/benchmarks/join/join_common.hpp | 25 +++++++- 3 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 cpp/benchmarks/join/cardinality_join.cu diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 03f11cc957b..f60f2b8898d 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -202,7 +202,7 @@ ConfigureNVBench( # * join benchmark -------------------------------------------------------------------------------- ConfigureNVBench( JOIN_NVBENCH join/left_join.cu join/conditional_join.cu join/join.cu join/mixed_join.cu - join/distinct_join.cu + join/distinct_join.cu join/cardinality_join.cu ) # ################################################################################################## diff --git a/cpp/benchmarks/join/cardinality_join.cu b/cpp/benchmarks/join/cardinality_join.cu new file mode 100644 index 00000000000..4b1f4dac4f0 --- /dev/null +++ b/cpp/benchmarks/join/cardinality_join.cu @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +template +void nvbench_inner_join(nvbench::state& state, + nvbench::type_list>) +{ + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const selectivity = static_cast(state.get_float64("selectivity")); + auto join = [](cudf::table_view const& left_input, + cudf::table_view const& right_input, + cudf::null_equality compare_nulls) { + return cudf::inner_join(left_input, right_input, compare_nulls); + }; + BM_join(state, join, selectivity, cardinality); +} + +template +void nvbench_left_join(nvbench::state& state, nvbench::type_list>) +{ + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const selectivity = static_cast(state.get_float64("selectivity")); + auto join = [](cudf::table_view const& left_input, + cudf::table_view const& right_input, + cudf::null_equality compare_nulls) { + return cudf::left_join(left_input, right_input, compare_nulls); + }; + BM_join(state, join, selectivity, cardinality); +} + +template +void nvbench_full_join(nvbench::state& state, nvbench::type_list>) +{ + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const selectivity = static_cast(state.get_float64("selectivity")); + auto join = [](cudf::table_view const& left_input, + cudf::table_view const& right_input, + cudf::null_equality compare_nulls) { + return cudf::full_join(left_input, right_input, compare_nulls); + }; + BM_join(state, join, selectivity, cardinality); +} + +NVBENCH_BENCH_TYPES(nvbench_inner_join, NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("low_cardinality_inner_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("cardinality", {10, 20, 50, 100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000}) + .add_float64_axis("selectivity", {0.3, 0.6, 0.9}); + +NVBENCH_BENCH_TYPES(nvbench_left_join, NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("low_cardinality_left_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("cardinality", {10, 20, 50, 100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000}) + .add_float64_axis("selectivity", {0.3, 0.6, 0.9}); + +NVBENCH_BENCH_TYPES(nvbench_full_join, NVBENCH_TYPE_AXES(JOIN_KEY_TYPE_RANGE, JOIN_NULLABLE_RANGE)) + .set_name("low_cardinality_full_join") + .set_type_axes_names({"Key", "Nullable"}) + .add_int64_axis("left_size", JOIN_SIZE_RANGE) + .add_int64_axis("right_size", JOIN_SIZE_RANGE) + .add_int64_axis("cardinality", {10, 20, 50, 100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000}) + .add_float64_axis("selectivity", {0.3, 0.6, 0.9}); diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index adb7cd26754..41d6daa260e 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -60,12 +61,27 @@ struct null75_generator { enum class join_t { CONDITIONAL, MIXED, HASH }; +namespace { +void print_statistics(cudf::table_view t) +{ + std::cout << "=====================================\n"; + std::cout << "Number of rows = " << t.num_rows() << ", number of columns = " << t.num_columns() + << "\n"; + for (cudf::size_type i = 0; i < t.num_columns(); i++) { + auto num_unique = + cudf::distinct_count(t.column(i), cudf::null_policy::EXCLUDE, cudf::nan_policy::NAN_IS_NULL); + std::cout << "Number of unique elements in row " << i << " = " << num_unique << std::endl; + } + std::cout << "=====================================\n"; +} +} // namespace + template -void BM_join(state_type& state, Join JoinFunc) +void BM_join(state_type& state, Join JoinFunc, double selectivity = 0.3, int multiplicity = 1) { auto const right_size = static_cast(state.get_int64("right_size")); auto const left_size = static_cast(state.get_int64("left_size")); @@ -75,8 +91,10 @@ void BM_join(state_type& state, Join JoinFunc) return; } + /* double const selectivity = 0.3; int const multiplicity = 1; + */ // Generate build and probe tables auto right_random_null_mask = [](int size) { @@ -147,6 +165,11 @@ void BM_join(state_type& state, Join JoinFunc) cudf::table_view left_table( {left_key_column0->view(), left_key_column1->view(), *left_payload_column}); + std::cout << "Probe table stats\n"; + print_statistics(left_table); + std::cout << "Build table stats\n"; + print_statistics(right_table); + // Setup join parameters and result table [[maybe_unused]] std::vector columns_to_join = {0}; state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));