diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 21cc9b8c9e2..cc674732ba4 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cachetools - clang-tools=16.0.6 - clang==16.0.6 -- cmake>=3.26.4,!=3.30.0 +- cmake>=3.30.4 - cramjam - cubinlinker - cuda-nvtx=11.8 diff --git a/conda/environments/all_cuda-128_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml index 939d6ff9eb9..7593a72cc68 100644 --- a/conda/environments/all_cuda-128_arch-x86_64.yaml +++ b/conda/environments/all_cuda-128_arch-x86_64.yaml @@ -15,7 +15,7 @@ dependencies: - cachetools - clang-tools=16.0.6 - clang==16.0.6 -- cmake>=3.26.4,!=3.30.0 +- cmake>=3.30.4 - cramjam - cuda-cudart-dev - cuda-nvcc diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml index a4a6a0910ce..bab277b8f60 100644 --- a/conda/recipes/cudf/conda_build_config.yaml +++ b/conda/recipes/cudf/conda_build_config.yaml @@ -13,7 +13,7 @@ c_stdlib_version: - "2.28" cmake_version: - - ">=3.26.4,!=3.30.0" + - ">=3.30.4" cuda_compiler: - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] diff --git a/conda/recipes/cudf_kafka/conda_build_config.yaml b/conda/recipes/cudf_kafka/conda_build_config.yaml index a4a6a0910ce..bab277b8f60 100644 --- a/conda/recipes/cudf_kafka/conda_build_config.yaml +++ b/conda/recipes/cudf_kafka/conda_build_config.yaml @@ -13,7 +13,7 @@ c_stdlib_version: - "2.28" cmake_version: - - ">=3.26.4,!=3.30.0" + - ">=3.30.4" cuda_compiler: - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index 4d75646da78..48b2acf3a02 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -17,7 +17,7 @@ c_stdlib_version: - "2.28" cmake_version: - - ">=3.26.4,!=3.30.0" + - ">=3.30.4" dlpack_version: - ">=0.8,<1.0" diff --git a/conda/recipes/pylibcudf/conda_build_config.yaml b/conda/recipes/pylibcudf/conda_build_config.yaml index a4a6a0910ce..bab277b8f60 100644 --- a/conda/recipes/pylibcudf/conda_build_config.yaml +++ b/conda/recipes/pylibcudf/conda_build_config.yaml @@ -13,7 +13,7 @@ c_stdlib_version: - "2.28" cmake_version: - - ">=3.26.4,!=3.30.0" + - ">=3.30.4" cuda_compiler: - cuda-nvcc # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")] diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2e4dd21667e..bb4d20f837c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -12,7 +12,7 @@ # the License. # ============================================================================= -cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../rapids_config.cmake) include(rapids-cmake) diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt index 8e89b461e30..455494a40eb 100644 --- a/cpp/examples/basic/CMakeLists.txt +++ b/cpp/examples/basic/CMakeLists.txt @@ -1,6 +1,6 @@ # Copyright (c) 2020-2025, NVIDIA CORPORATION. -cmake_minimum_required(VERSION 3.26.4) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../set_cuda_architecture.cmake) diff --git a/cpp/examples/billion_rows/CMakeLists.txt b/cpp/examples/billion_rows/CMakeLists.txt index 603c8d0b457..f7dbd3e79b1 100644 --- a/cpp/examples/billion_rows/CMakeLists.txt +++ b/cpp/examples/billion_rows/CMakeLists.txt @@ -1,6 +1,6 @@ # Copyright (c) 2024-2025, NVIDIA CORPORATION. -cmake_minimum_required(VERSION 3.26.4) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../set_cuda_architecture.cmake) diff --git a/cpp/examples/interop/CMakeLists.txt b/cpp/examples/interop/CMakeLists.txt index 6f1249beaaa..37a55b98093 100644 --- a/cpp/examples/interop/CMakeLists.txt +++ b/cpp/examples/interop/CMakeLists.txt @@ -1,6 +1,6 @@ # Copyright (c) 2024-2025, NVIDIA CORPORATION. -cmake_minimum_required(VERSION 3.26.4) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../set_cuda_architecture.cmake) diff --git a/cpp/examples/nested_types/CMakeLists.txt b/cpp/examples/nested_types/CMakeLists.txt index e7972d1531b..4df41f2acd6 100644 --- a/cpp/examples/nested_types/CMakeLists.txt +++ b/cpp/examples/nested_types/CMakeLists.txt @@ -1,6 +1,6 @@ # Copyright (c) 2023-2025, NVIDIA CORPORATION. -cmake_minimum_required(VERSION 3.26.4) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../set_cuda_architecture.cmake) diff --git a/cpp/examples/parquet_io/CMakeLists.txt b/cpp/examples/parquet_io/CMakeLists.txt index 17f86fdf5e0..da12b7056fb 100644 --- a/cpp/examples/parquet_io/CMakeLists.txt +++ b/cpp/examples/parquet_io/CMakeLists.txt @@ -1,6 +1,6 @@ # Copyright (c) 2024-2025, NVIDIA CORPORATION. -cmake_minimum_required(VERSION 3.26.4) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../set_cuda_architecture.cmake) diff --git a/cpp/examples/strings/CMakeLists.txt b/cpp/examples/strings/CMakeLists.txt index 9010d495715..a0831488d60 100644 --- a/cpp/examples/strings/CMakeLists.txt +++ b/cpp/examples/strings/CMakeLists.txt @@ -1,6 +1,6 @@ # Copyright (c) 2022-2025, NVIDIA CORPORATION. -cmake_minimum_required(VERSION 3.26.4) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../set_cuda_architecture.cmake) diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index 9760ecfe067..26c81e7fd2f 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -11,7 +11,7 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. # ============================================================================= -cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../../rapids_config.cmake) include(rapids-cmake) diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 6fc49afd7ac..4237e3f0954 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -308,11 +308,11 @@ std::unique_ptr for_each_concatenate(host_span views, auto count = 0; for (auto& v : views) { - cudaMemcpyAsync(m_view.begin() + count, - v.begin(), - v.size() * sizeof(T), - cudaMemcpyDeviceToDevice, - stream.value()); + CUDF_CUDA_TRY(cudaMemcpyAsync(m_view.begin() + count, + v.begin(), + v.size() * sizeof(T), + cudaMemcpyDefault, + stream.value())); count += v.size(); } diff --git a/cpp/src/io/comp/comp.cpp b/cpp/src/io/comp/comp.cpp index 3800835eaf1..280c07a4ff1 100644 --- a/cpp/src/io/comp/comp.cpp +++ b/cpp/src/io/comp/comp.cpp @@ -18,7 +18,6 @@ #include "gpuinflate.hpp" #include "io/utilities/getenv_or.hpp" -#include "io/utilities/hostdevice_vector.hpp" #include "nvcomp_adapter.hpp" #include @@ -32,14 +31,17 @@ #include #include // GZIP compression +#include + namespace cudf::io::detail { namespace { auto& h_comp_pool() { - static std::size_t pool_size = - getenv_or("LIBCUDF_HOST_COMPRESSION_NUM_THREADS", std::thread::hardware_concurrency()); + static const std::size_t default_pool_size = std::min(32u, std::thread::hardware_concurrency()); + static const std::size_t pool_size = + getenv_or("LIBCUDF_HOST_COMPRESSION_NUM_THREADS", default_pool_size); static BS::thread_pool pool(pool_size); return pool; } @@ -92,35 +94,199 @@ std::vector compress_gzip(host_span src) return dst; } -/** - * @brief SNAPPY device compressor - */ -std::vector compress_snappy(host_span src, - rmm::cuda_stream_view stream) +namespace snappy { + +template +[[nodiscard]] T load(uint8_t const* ptr) +{ + T value; + std::memcpy(&value, ptr, sizeof(T)); + return value; +} + +class hash_table { + std::vector tbl; + static constexpr int hash_table_bits = 15; + + public: + hash_table() : tbl(1 << hash_table_bits, 0) {} + + void clear() { std::fill(tbl.begin(), tbl.end(), 0); } + + [[nodiscard]] uint16_t* entry(uint32_t bytes) + { + constexpr uint32_t multiplier = 0x1e35a7bd; + auto const hash = (bytes * multiplier) >> (31 - hash_table_bits); + return tbl.data() + hash / sizeof(uint16_t); + } +}; + +uint8_t* emit_literal(uint8_t* out_begin, uint8_t const* literal_begin, uint8_t const* literal_end) +{ + auto const literal_size = literal_end - literal_begin; + if (literal_size == 0) { return out_begin; } + auto const n = literal_size - 1; + + auto out_it = out_begin; + if (n < 60) { + // Fits into a single tag byte + *out_it++ = n << 2; + } else { + auto const log2_n = 31 - __builtin_clz(n); + auto const count = (log2_n >> 3) + 1; + *out_it++ = (59 + count) << 2; + std::memcpy(out_it, &n, count); + out_it += count; + } + std::memcpy(out_it, literal_begin, literal_size); + return out_it + literal_size; +} + +uint8_t* emit_copy(uint8_t* out_begin, size_t offset, size_t len) +{ + while (len > 0) { + auto const copy_len = std::min(len, 64ul); + auto const out_val = 2 + ((copy_len - 1) << 2) + (offset << 8); + std::memcpy(out_begin, &out_val, 3); + + out_begin += 3; + len -= copy_len; + } + return out_begin; +} + +size_t compress_block(host_span input, hash_table& table, host_span output) +{ + auto const [in_remain, out_remain] = [&]() -> std::pair { + auto in_it = input.begin(); + auto out_it = output.begin(); + + // The algorithm reads 8 bytes at a time, so we need to ensure there are at least 8 bytes + auto const input_max = input.end() - sizeof(uint64_t); + while (in_it < input_max) { + auto const next_emit = in_it++; + auto data = load(in_it); + uint32_t stride = 1; + uint8_t const* candidate = nullptr; + + auto word_match_found = [&]() { + if (input_max - in_it < 16) { return false; } + for (size_t word_idx = 0; word_idx < 4; ++word_idx) { + for (size_t byte_idx = 0; byte_idx < sizeof(uint32_t); ++byte_idx) { + auto const offset = sizeof(uint32_t) * word_idx + byte_idx; + auto* const entry = table.entry(static_cast(data)); + candidate = input.begin() + *entry; + *entry = in_it - input.data() + offset; + + if (load(candidate) == static_cast(data)) { + *(out_it++) = offset * sizeof(uint32_t); + std::memcpy(out_it, next_emit, offset + 1); + in_it += offset; + out_it += offset + 1; + stride = 1; + return true; + } + data >>= 8; + } + // Fetch the next eight bytes + data = load(in_it + sizeof(uint32_t) * (word_idx + 1)); + } + in_it += 16; + return false; + }(); + + if (not word_match_found) { + // keep looking for a match with increasing stride + while (true) { + auto* const entry = table.entry(static_cast(data)); + candidate = input.begin() + *entry; + *entry = in_it - input.begin(); + if (static_cast(data) == load(candidate)) { + stride = 1; + break; + } + + auto const next_input = in_it + stride; + if (next_input > input_max) { + // Reached the end of the input without finding a match + return {next_emit, out_it}; + } + + data = load(next_input); + in_it = next_input; + stride += 1; + } + + // Emit data prior to the match as literal + out_it = emit_literal(out_it, next_emit, in_it); + } + + // Emit match(es) + do { + auto const match_len = std::mismatch(in_it, input.end(), candidate).first - in_it; + out_it = emit_copy(out_it, in_it - candidate, match_len); + + in_it += match_len; + if (in_it >= input_max) { + // Reached the end of the input, no more matches to look for + return {in_it, out_it}; + } + data = load(in_it); + *table.entry(load(in_it - 1)) = in_it - input.begin() - 1; + auto* const entry = table.entry(data); + candidate = input.begin() + *entry; + *entry = in_it - input.begin(); + + } while (static_cast(data) == load(candidate)); + } + + return {in_it, out_it}; + }(); + + // Emit the remaining data as a literal + return emit_literal(out_remain, in_remain, input.end()) - output.begin(); +} + +void append_varint(std::vector& output, size_t v) +{ + while (v > 127) { + output.push_back((v & 0x7F) | 0x80); + v >>= 7; + } + output.push_back(v); +} + +[[nodiscard]] std::vector compress(host_span src) { - auto const d_src = - cudf::detail::make_device_uvector_async(src, stream, cudf::get_current_device_resource_ref()); - cudf::detail::hostdevice_vector> inputs(1, stream); - inputs[0] = d_src; - inputs.host_to_device_async(stream); - - auto dst_size = compress_max_output_chunk_size(nvcomp::compression_type::SNAPPY, src.size()); - rmm::device_uvector d_dst(dst_size, stream); - cudf::detail::hostdevice_vector> outputs(1, stream); - outputs[0] = d_dst; - outputs.host_to_device_async(stream); - - cudf::detail::hostdevice_vector hd_status(1, stream); - hd_status[0] = {}; - hd_status.host_to_device_async(stream); - - nvcomp::batched_compress(nvcomp::compression_type::SNAPPY, inputs, outputs, hd_status, stream); - - hd_status.device_to_host_sync(stream); - CUDF_EXPECTS(hd_status[0].status == compression_status::SUCCESS, "snappy compression failed"); - return cudf::detail::make_std_vector_sync(d_dst, stream); + std::vector dst; + append_varint(dst, src.size()); + dst.reserve(dst.size() + max_compressed_size(compression_type::SNAPPY, src.size())); + + hash_table table; // reuse hash table across blocks + constexpr size_t block_size = 1 << 16; + auto const block_max_compressed_size = max_compressed_size(compression_type::SNAPPY, block_size); + for (std::size_t src_offset = 0; src_offset < src.size(); src_offset += block_size) { + // Compress data in blocks of limited size + auto const block = src.subspan(src_offset, std::min(src.size() - src_offset, block_size)); + + auto const previous_size = dst.size(); + auto const curr_block_max_comp_size = + (block.size() == block_size) ? block_max_compressed_size + : max_compressed_size(compression_type::SNAPPY, block.size()); + dst.resize(previous_size + curr_block_max_comp_size); + auto const block_dst = + host_span{dst.data() + previous_size, dst.size() - previous_size}; + + table.clear(); + auto const comp_block_size = compress_block(block, table, block_dst); + dst.resize(previous_size + comp_block_size); + } + + return dst; } +} // namespace snappy + void device_compress(compression_type compression, device_span const> inputs, device_span const> outputs, @@ -156,6 +322,13 @@ void host_compress(compression_type compression, auto const h_outputs = cudf::detail::make_host_vector_async(outputs, stream); stream.synchronize(); + // Generate order vector to submit largest tasks first + std::vector task_order(num_chunks); + std::iota(task_order.begin(), task_order.end(), 0); + std::sort(task_order.begin(), task_order.end(), [&](size_t a, size_t b) { + return h_inputs[a].size() > h_inputs[b].size(); + }); + std::vector> tasks; auto const num_streams = std::min({num_chunks, @@ -163,9 +336,12 @@ void host_compress(compression_type compression, h_comp_pool().get_thread_count()}); auto const streams = cudf::detail::fork_streams(stream, num_streams); for (size_t i = 0; i < num_chunks; ++i) { + auto const idx = task_order[i]; auto const cur_stream = streams[i % streams.size()]; - auto task = [d_in = h_inputs[i], d_out = h_outputs[i], cur_stream, compression]() -> size_t { - auto const h_in = cudf::detail::make_host_vector_sync(d_in, cur_stream); + auto task = + [d_in = h_inputs[idx], d_out = h_outputs[idx], cur_stream, compression]() -> size_t { + auto h_in = cudf::detail::make_pinned_vector_async(d_in.size(), cur_stream); + cudf::detail::cuda_memcpy(h_in, d_in, cur_stream); auto const h_out = compress(compression, h_in, cur_stream); cudf::detail::cuda_memcpy(d_out.subspan(0, h_out.size()), h_out, cur_stream); return h_out.size(); @@ -174,7 +350,7 @@ void host_compress(compression_type compression, } for (auto i = 0ul; i < num_chunks; ++i) { - h_results[i] = {tasks[i].get(), compression_status::SUCCESS}; + h_results[task_order[i]] = {tasks[i].get(), compression_status::SUCCESS}; } cudf::detail::cuda_memcpy_async(results, h_results, stream); } @@ -183,6 +359,7 @@ void host_compress(compression_type compression, { switch (compression) { case compression_type::GZIP: + case compression_type::SNAPPY: case compression_type::NONE: return true; default: return false; } @@ -212,7 +389,7 @@ void host_compress(compression_type compression, if (not host_compression_supported(compression)) { return false; } if (not device_compression_supported(compression)) { return true; } // If both host and device compression are supported, use the host if the env var is set - return getenv_or("LIBCUDF_USE_HOST_COMPRESSION", 0); + return getenv_or("LIBCUDF_HOST_COMPRESSION", std::string{"OFF"}) == "ON"; } } // namespace @@ -249,12 +426,12 @@ std::optional compress_max_allowed_chunk_size(compression_type compressi std::vector compress(compression_type compression, host_span src, - rmm::cuda_stream_view stream) + rmm::cuda_stream_view) { CUDF_FUNC_RANGE(); switch (compression) { case compression_type::GZIP: return compress_gzip(src); - case compression_type::SNAPPY: return compress_snappy(src, stream); + case compression_type::SNAPPY: return snappy::compress(src); default: CUDF_FAIL("Unsupported compression type"); } } diff --git a/cpp/src/io/utilities/getenv_or.hpp b/cpp/src/io/utilities/getenv_or.hpp index acfd2221797..4d5c3ec6d22 100644 --- a/cpp/src/io/utilities/getenv_or.hpp +++ b/cpp/src/io/utilities/getenv_or.hpp @@ -45,7 +45,7 @@ T getenv_or(std::string_view env_var_name, T default_val) ss.str()); } - if (env_val == nullptr) { return default_val; } + if (env_val == nullptr) { return std::move(default_val); } std::stringstream sstream(env_val); T converted_val; diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index fd8cb3f22f2..cfc6a0dc425 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -298,7 +298,7 @@ ConfigureTest( # ################################################################################################## # * io tests -------------------------------------------------------------------------------------- -ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cpp) +ConfigureTest(COMPRESSION_TEST io/comp/comp_test.cpp) ConfigureTest(ROW_SELECTION_TEST io/row_selection_test.cpp) ConfigureTest( diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/comp_test.cpp similarity index 86% rename from cpp/tests/io/comp/decomp_test.cpp rename to cpp/tests/io/comp/comp_test.cpp index 5bbe8b63c47..e3bee708485 100644 --- a/cpp/tests/io/comp/decomp_test.cpp +++ b/cpp/tests/io/comp/comp_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,9 @@ * limitations under the License. */ +#include "io/comp/comp.hpp" #include "io/comp/gpuinflate.hpp" +#include "io/comp/io_uncomp.hpp" #include "io/utilities/hostdevice_vector.hpp" #include @@ -34,6 +36,12 @@ using cudf::io::detail::compression_result; using cudf::io::detail::compression_status; namespace nvcomp = cudf::io::detail::nvcomp; +[[nodiscard]] std::vector vector_from_string(std::string const& str) +{ + return {reinterpret_cast(str.data()), + reinterpret_cast(str.data() + str.size())}; +} + /** * @brief Base test fixture for decompression * @@ -42,12 +50,6 @@ namespace nvcomp = cudf::io::detail::nvcomp; */ template struct DecompressTest : public cudf::test::BaseFixture { - [[nodiscard]] std::vector vector_from_string(std::string const str) const - { - return {reinterpret_cast(str.c_str()), - reinterpret_cast(str.c_str()) + strlen(str.c_str())}; - } - void Decompress(std::vector& decompressed, uint8_t const* compressed, size_t compressed_size) @@ -76,6 +78,11 @@ struct DecompressTest : public cudf::test::BaseFixture { } }; +struct HostCompressTest : public cudf::test::BaseFixture { + HostCompressTest() { setenv("LIBCUDF_HOST_COMPRESSION", "ON", 1); } + ~HostCompressTest() override { unsetenv("LIBCUDF_HOST_COMPRESSION"); } +}; + /** * @brief Derived fixture for GZIP decompression */ @@ -222,4 +229,23 @@ TEST_F(NvcompConfigTest, Decompression) EXPECT_TRUE(decomp_disabled(compression_type::SNAPPY, {false, false})); } +TEST_F(HostCompressTest, SnappyCompression) +{ + std::vector expected; + expected.reserve(8 * (32 << 20)); + for (size_t size = 1; size < 32 << 20; size *= 2) { + // Using number strings to generate data that is compressible, but not trivially so + for (size_t i = size / 2; i < size; ++i) { + auto const num_string = std::to_string(i); + // Keep adding to the test data + expected.insert(expected.end(), num_string.begin(), num_string.end()); + } + auto const compressed = cudf::io::detail::compress( + cudf::io::compression_type::SNAPPY, expected, cudf::get_default_stream()); + auto const decompressed = + cudf::io::detail::decompress(cudf::io::compression_type::SNAPPY, compressed); + EXPECT_EQ(expected, decompressed); + } +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/dependencies.yaml b/dependencies.yaml index 83f4e96c748..e7840d56880 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -400,7 +400,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - &cmake_ver cmake>=3.26.4,!=3.30.0 + - &cmake_ver cmake>=3.30.4 - &ninja ninja build_all: common: diff --git a/java/ci/Dockerfile.rocky b/java/ci/Dockerfile.rocky index 9f3305278cb..277e33bb8eb 100644 --- a/java/ci/Dockerfile.rocky +++ b/java/ci/Dockerfile.rocky @@ -33,7 +33,7 @@ RUN dnf --enablerepo=powertools install -y scl-utils gcc-toolset-${TOOLSET_VERS RUN mkdir /usr/local/rapids /rapids && chmod 777 /usr/local/rapids /rapids # 3.22.3+: CUDA architecture 'native' support + flexible CMAKE__*_LAUNCHER for ccache -ARG CMAKE_VERSION=3.28.6 +ARG CMAKE_VERSION=3.30.7 # default x86_64 from x86 build, aarch64 cmake for arm build ARG CMAKE_ARCH=x86_64 RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \ diff --git a/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java index 372f919532e..009f5e12815 100644 --- a/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,17 +23,34 @@ * that will be used by the ORC writer to write the file. */ public class ORCWriterOptions extends CompressionMetadataWriterOptions { + private int stripeSizeRows; private ORCWriterOptions(Builder builder) { super(builder); + this.stripeSizeRows = builder.stripeSizeRows; } public static Builder builder() { return new Builder(); } + public int getStripeSizeRows() { + return stripeSizeRows; + } + public static class Builder extends CompressionMetadataWriterOptions.Builder { + // < 1M rows default orc stripe rows, defined in cudf/cpp/include/cudf/io/orc.hpp + private int stripeSizeRows = 1000000; + + public Builder withStripeSizeRows(int stripeSizeRows) { + // maximum stripe size cannot be smaller than 512 + if (stripeSizeRows < 512) { + throw new IllegalArgumentException("Maximum stripe size cannot be smaller than 512"); + } + this.stripeSizeRows = stripeSizeRows; + return this; + } public ORCWriterOptions build() { return new ORCWriterOptions(this); diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 298f2cff6f3..422989143c7 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -475,6 +475,7 @@ private static native long writeORCFileBegin(String[] columnNames, int compression, int[] precisions, boolean[] isMapValues, + int stripeSizeRows, String filename) throws CudfException; /** @@ -501,6 +502,7 @@ private static native long writeORCBufferBegin(String[] columnNames, int compression, int[] precisions, boolean[] isMapValues, + int stripeSizeRows, HostBufferConsumer consumer, HostMemoryAllocator hostMemoryAllocator ) throws CudfException; @@ -1823,6 +1825,7 @@ private ORCTableWriter(ORCWriterOptions options, File outputFile) { options.getCompressionType().nativeId, options.getFlatPrecision(), options.getFlatIsMap(), + options.getStripeSizeRows(), outputFile.getAbsolutePath())); this.consumer = null; } @@ -1838,6 +1841,7 @@ private ORCTableWriter(ORCWriterOptions options, HostBufferConsumer consumer, options.getCompressionType().nativeId, options.getFlatPrecision(), options.getFlatIsMap(), + options.getStripeSizeRows(), consumer, hostMemoryAllocator)); this.consumer = consumer; } diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 3923d8b45e3..1fa6f6d561f 100644 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -11,7 +11,7 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. # ============================================================================= -cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../../../../rapids_config.cmake) include(rapids-cmake) diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 50c6ae842f4..e1b487b1f7c 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -2480,6 +2480,7 @@ Java_ai_rapids_cudf_Table_writeORCBufferBegin(JNIEnv* env, jint j_compression, jintArray j_precisions, jbooleanArray j_is_map, + jint j_stripe_size_rows, jobject consumer, jobject host_memory_allocator) { @@ -2535,6 +2536,7 @@ Java_ai_rapids_cudf_Table_writeORCBufferBegin(JNIEnv* env, .enable_statistics(ORC_STATISTICS_ROW_GROUP) .key_value_metadata(kv_metadata) .compression_statistics(stats) + .stripe_size_rows(j_stripe_size_rows) .build(); auto writer_ptr = std::make_unique(opts); cudf::jni::native_orc_writer_handle* ret = new cudf::jni::native_orc_writer_handle( @@ -2555,6 +2557,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(JNIEnv* env, jint j_compression, jintArray j_precisions, jbooleanArray j_is_map, + jint j_stripe_size_rows, jstring j_output_path) { JNI_NULL_CHECK(env, j_col_names, "null columns", 0); @@ -2606,6 +2609,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(JNIEnv* env, .enable_statistics(ORC_STATISTICS_ROW_GROUP) .key_value_metadata(kv_metadata) .compression_statistics(stats) + .stripe_size_rows(j_stripe_size_rows) .build(); auto writer_ptr = std::make_unique(opts); cudf::jni::native_orc_writer_handle* ret = diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt index 7193ada5b93..2a17bc5dbb7 100644 --- a/python/cudf/CMakeLists.txt +++ b/python/cudf/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -12,7 +12,7 @@ # the License. # ============================================================================= -cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../../rapids_config.cmake) include(rapids-cuda) diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index d716114cf7e..16cd97677ef 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -118,7 +118,7 @@ build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true" requires = [ - "cmake>=3.26.4,!=3.30.0", + "cmake>=3.30.4", "cython>=3.0.3", "libcudf==25.4.*,>=0.0.0a0", "librmm==25.4.*,>=0.0.0a0", diff --git a/python/cudf/udf_cpp/CMakeLists.txt b/python/cudf/udf_cpp/CMakeLists.txt index fa7855cfc65..9f6b67d0cdc 100644 --- a/python/cudf/udf_cpp/CMakeLists.txt +++ b/python/cudf/udf_cpp/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -12,7 +12,7 @@ # the License. # ============================================================================= -cmake_minimum_required(VERSION 3.26.4) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(rapids-cmake) include(rapids-cpm) diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt index fd835010c4e..3e12eb6aa41 100644 --- a/python/cudf_kafka/CMakeLists.txt +++ b/python/cudf_kafka/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -12,7 +12,7 @@ # the License. # ============================================================================= -cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../../rapids_config.cmake) diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index 4a7143e1134..424010e632c 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -83,7 +83,7 @@ build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true" requires = [ - "cmake>=3.26.4,!=3.30.0", + "cmake>=3.30.4", "cython>=3.0.3", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt index 259492b98d1..d5450639471 100644 --- a/python/libcudf/CMakeLists.txt +++ b/python/libcudf/CMakeLists.txt @@ -12,7 +12,7 @@ # the License. # ============================================================================= -cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../../rapids_config.cmake) diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml index 18aa824c6df..01fe6097936 100644 --- a/python/libcudf/pyproject.toml +++ b/python/libcudf/pyproject.toml @@ -79,7 +79,7 @@ build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" requires = [ - "cmake>=3.26.4,!=3.30.0", + "cmake>=3.30.4", "libkvikio==25.4.*,>=0.0.0a0", "librmm==25.4.*,>=0.0.0a0", "ninja", diff --git a/python/pylibcudf/CMakeLists.txt b/python/pylibcudf/CMakeLists.txt index a4b831790fb..fe6e73a3f14 100644 --- a/python/pylibcudf/CMakeLists.txt +++ b/python/pylibcudf/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -12,7 +12,7 @@ # the License. # ============================================================================= -cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) +cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR) include(../../rapids_config.cmake) include(rapids-cuda) diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml index 2f846b5f0b9..939da65c1ec 100644 --- a/python/pylibcudf/pyproject.toml +++ b/python/pylibcudf/pyproject.toml @@ -109,7 +109,7 @@ build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true" requires = [ - "cmake>=3.26.4,!=3.30.0", + "cmake>=3.30.4", "cython>=3.0.3", "libcudf==25.4.*,>=0.0.0a0", "librmm==25.4.*,>=0.0.0a0",