diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 21cc9b8c9e2..cc674732ba4 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -15,7 +15,7 @@ dependencies:
 - cachetools
 - clang-tools=16.0.6
 - clang==16.0.6
-- cmake>=3.26.4,!=3.30.0
+- cmake>=3.30.4
 - cramjam
 - cubinlinker
 - cuda-nvtx=11.8
diff --git a/conda/environments/all_cuda-128_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml
index 939d6ff9eb9..7593a72cc68 100644
--- a/conda/environments/all_cuda-128_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-128_arch-x86_64.yaml
@@ -15,7 +15,7 @@ dependencies:
 - cachetools
 - clang-tools=16.0.6
 - clang==16.0.6
-- cmake>=3.26.4,!=3.30.0
+- cmake>=3.30.4
 - cramjam
 - cuda-cudart-dev
 - cuda-nvcc
diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml
index a4a6a0910ce..bab277b8f60 100644
--- a/conda/recipes/cudf/conda_build_config.yaml
+++ b/conda/recipes/cudf/conda_build_config.yaml
@@ -13,7 +13,7 @@ c_stdlib_version:
   - "2.28"
 
 cmake_version:
-  - ">=3.26.4,!=3.30.0"
+  - ">=3.30.4"
 
 cuda_compiler:
   - cuda-nvcc  # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
diff --git a/conda/recipes/cudf_kafka/conda_build_config.yaml b/conda/recipes/cudf_kafka/conda_build_config.yaml
index a4a6a0910ce..bab277b8f60 100644
--- a/conda/recipes/cudf_kafka/conda_build_config.yaml
+++ b/conda/recipes/cudf_kafka/conda_build_config.yaml
@@ -13,7 +13,7 @@ c_stdlib_version:
   - "2.28"
 
 cmake_version:
-  - ">=3.26.4,!=3.30.0"
+  - ">=3.30.4"
 
 cuda_compiler:
   - cuda-nvcc  # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
index 4d75646da78..48b2acf3a02 100644
--- a/conda/recipes/libcudf/conda_build_config.yaml
+++ b/conda/recipes/libcudf/conda_build_config.yaml
@@ -17,7 +17,7 @@ c_stdlib_version:
   - "2.28"
 
 cmake_version:
-  - ">=3.26.4,!=3.30.0"
+  - ">=3.30.4"
 
 dlpack_version:
   - ">=0.8,<1.0"
diff --git a/conda/recipes/pylibcudf/conda_build_config.yaml b/conda/recipes/pylibcudf/conda_build_config.yaml
index a4a6a0910ce..bab277b8f60 100644
--- a/conda/recipes/pylibcudf/conda_build_config.yaml
+++ b/conda/recipes/pylibcudf/conda_build_config.yaml
@@ -13,7 +13,7 @@ c_stdlib_version:
   - "2.28"
 
 cmake_version:
-  - ">=3.26.4,!=3.30.0"
+  - ">=3.30.4"
 
 cuda_compiler:
   - cuda-nvcc  # [not os.environ.get("RAPIDS_CUDA_VERSION", "").startswith("11")]
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2e4dd21667e..bb4d20f837c 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../rapids_config.cmake)
 include(rapids-cmake)
diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt
index 8e89b461e30..455494a40eb 100644
--- a/cpp/examples/basic/CMakeLists.txt
+++ b/cpp/examples/basic/CMakeLists.txt
@@ -1,6 +1,6 @@
 # Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
-cmake_minimum_required(VERSION 3.26.4)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../set_cuda_architecture.cmake)
 
diff --git a/cpp/examples/billion_rows/CMakeLists.txt b/cpp/examples/billion_rows/CMakeLists.txt
index 603c8d0b457..f7dbd3e79b1 100644
--- a/cpp/examples/billion_rows/CMakeLists.txt
+++ b/cpp/examples/billion_rows/CMakeLists.txt
@@ -1,6 +1,6 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
-cmake_minimum_required(VERSION 3.26.4)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../set_cuda_architecture.cmake)
 
diff --git a/cpp/examples/interop/CMakeLists.txt b/cpp/examples/interop/CMakeLists.txt
index 6f1249beaaa..37a55b98093 100644
--- a/cpp/examples/interop/CMakeLists.txt
+++ b/cpp/examples/interop/CMakeLists.txt
@@ -1,6 +1,6 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
-cmake_minimum_required(VERSION 3.26.4)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../set_cuda_architecture.cmake)
 
diff --git a/cpp/examples/nested_types/CMakeLists.txt b/cpp/examples/nested_types/CMakeLists.txt
index e7972d1531b..4df41f2acd6 100644
--- a/cpp/examples/nested_types/CMakeLists.txt
+++ b/cpp/examples/nested_types/CMakeLists.txt
@@ -1,6 +1,6 @@
 # Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
-cmake_minimum_required(VERSION 3.26.4)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../set_cuda_architecture.cmake)
 
diff --git a/cpp/examples/parquet_io/CMakeLists.txt b/cpp/examples/parquet_io/CMakeLists.txt
index 17f86fdf5e0..da12b7056fb 100644
--- a/cpp/examples/parquet_io/CMakeLists.txt
+++ b/cpp/examples/parquet_io/CMakeLists.txt
@@ -1,6 +1,6 @@
 # Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
-cmake_minimum_required(VERSION 3.26.4)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../set_cuda_architecture.cmake)
 
diff --git a/cpp/examples/strings/CMakeLists.txt b/cpp/examples/strings/CMakeLists.txt
index 9010d495715..a0831488d60 100644
--- a/cpp/examples/strings/CMakeLists.txt
+++ b/cpp/examples/strings/CMakeLists.txt
@@ -1,6 +1,6 @@
 # Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
-cmake_minimum_required(VERSION 3.26.4)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../set_cuda_architecture.cmake)
 
diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt
index 9760ecfe067..26c81e7fd2f 100644
--- a/cpp/libcudf_kafka/CMakeLists.txt
+++ b/cpp/libcudf_kafka/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
 # =============================================================================
-cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../../rapids_config.cmake)
 include(rapids-cmake)
diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu
index 6fc49afd7ac..4237e3f0954 100644
--- a/cpp/src/copying/concatenate.cu
+++ b/cpp/src/copying/concatenate.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -308,11 +308,11 @@ std::unique_ptr<column> for_each_concatenate(host_span<column_view const> views,
 
   auto count = 0;
   for (auto& v : views) {
-    cudaMemcpyAsync(m_view.begin<T>() + count,
-                    v.begin<T>(),
-                    v.size() * sizeof(T),
-                    cudaMemcpyDeviceToDevice,
-                    stream.value());
+    CUDF_CUDA_TRY(cudaMemcpyAsync(m_view.begin<T>() + count,
+                                  v.begin<T>(),
+                                  v.size() * sizeof(T),
+                                  cudaMemcpyDefault,
+                                  stream.value()));
     count += v.size();
   }
 
diff --git a/cpp/src/io/comp/comp.cpp b/cpp/src/io/comp/comp.cpp
index 3800835eaf1..280c07a4ff1 100644
--- a/cpp/src/io/comp/comp.cpp
+++ b/cpp/src/io/comp/comp.cpp
@@ -18,7 +18,6 @@
 
 #include "gpuinflate.hpp"
 #include "io/utilities/getenv_or.hpp"
-#include "io/utilities/hostdevice_vector.hpp"
 #include "nvcomp_adapter.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
@@ -32,14 +31,17 @@
 #include <BS_thread_pool.hpp>
 #include <zlib.h>  // GZIP compression
 
+#include <numeric>
+
 namespace cudf::io::detail {
 
 namespace {
 
 auto& h_comp_pool()
 {
-  static std::size_t pool_size =
-    getenv_or("LIBCUDF_HOST_COMPRESSION_NUM_THREADS", std::thread::hardware_concurrency());
+  static const std::size_t default_pool_size = std::min(32u, std::thread::hardware_concurrency());
+  static const std::size_t pool_size =
+    getenv_or("LIBCUDF_HOST_COMPRESSION_NUM_THREADS", default_pool_size);
   static BS::thread_pool pool(pool_size);
   return pool;
 }
@@ -92,35 +94,199 @@ std::vector<std::uint8_t> compress_gzip(host_span<uint8_t const> src)
   return dst;
 }
 
-/**
- * @brief SNAPPY device compressor
- */
-std::vector<std::uint8_t> compress_snappy(host_span<uint8_t const> src,
-                                          rmm::cuda_stream_view stream)
+namespace snappy {
+
+template <typename T>
+[[nodiscard]] T load(uint8_t const* ptr)
+{
+  T value;
+  std::memcpy(&value, ptr, sizeof(T));
+  return value;
+}
+
+class hash_table {
+  std::vector<uint16_t> tbl;
+  static constexpr int hash_table_bits = 15;
+
+ public:
+  hash_table() : tbl(1 << hash_table_bits, 0) {}
+
+  void clear() { std::fill(tbl.begin(), tbl.end(), 0); }
+
+  [[nodiscard]] uint16_t* entry(uint32_t bytes)
+  {
+    constexpr uint32_t multiplier = 0x1e35a7bd;
+    auto const hash               = (bytes * multiplier) >> (31 - hash_table_bits);
+    return tbl.data() + hash / sizeof(uint16_t);
+  }
+};
+
+uint8_t* emit_literal(uint8_t* out_begin, uint8_t const* literal_begin, uint8_t const* literal_end)
+{
+  auto const literal_size = literal_end - literal_begin;
+  if (literal_size == 0) { return out_begin; }
+  auto const n = literal_size - 1;
+
+  auto out_it = out_begin;
+  if (n < 60) {
+    // Fits into a single tag byte
+    *out_it++ = n << 2;
+  } else {
+    auto const log2_n = 31 - __builtin_clz(n);
+    auto const count  = (log2_n >> 3) + 1;
+    *out_it++         = (59 + count) << 2;
+    std::memcpy(out_it, &n, count);
+    out_it += count;
+  }
+  std::memcpy(out_it, literal_begin, literal_size);
+  return out_it + literal_size;
+}
+
+uint8_t* emit_copy(uint8_t* out_begin, size_t offset, size_t len)
+{
+  while (len > 0) {
+    auto const copy_len = std::min(len, 64ul);
+    auto const out_val  = 2 + ((copy_len - 1) << 2) + (offset << 8);
+    std::memcpy(out_begin, &out_val, 3);
+
+    out_begin += 3;
+    len -= copy_len;
+  }
+  return out_begin;
+}
+
+size_t compress_block(host_span<uint8_t const> input, hash_table& table, host_span<uint8_t> output)
+{
+  auto const [in_remain, out_remain] = [&]() -> std::pair<uint8_t const*, uint8_t*> {
+    auto in_it  = input.begin();
+    auto out_it = output.begin();
+
+    // The algorithm reads 8 bytes at a time, so we need to ensure there are at least 8 bytes
+    auto const input_max = input.end() - sizeof(uint64_t);
+    while (in_it < input_max) {
+      auto const next_emit     = in_it++;
+      auto data                = load<uint64_t>(in_it);
+      uint32_t stride          = 1;
+      uint8_t const* candidate = nullptr;
+
+      auto word_match_found = [&]() {
+        if (input_max - in_it < 16) { return false; }
+        for (size_t word_idx = 0; word_idx < 4; ++word_idx) {
+          for (size_t byte_idx = 0; byte_idx < sizeof(uint32_t); ++byte_idx) {
+            auto const offset = sizeof(uint32_t) * word_idx + byte_idx;
+            auto* const entry = table.entry(static_cast<uint32_t>(data));
+            candidate         = input.begin() + *entry;
+            *entry            = in_it - input.data() + offset;
+
+            if (load<uint32_t>(candidate) == static_cast<uint32_t>(data)) {
+              *(out_it++) = offset * sizeof(uint32_t);
+              std::memcpy(out_it, next_emit, offset + 1);
+              in_it += offset;
+              out_it += offset + 1;
+              stride = 1;
+              return true;
+            }
+            data >>= 8;
+          }
+          // Fetch the next eight bytes
+          data = load<uint64_t>(in_it + sizeof(uint32_t) * (word_idx + 1));
+        }
+        in_it += 16;
+        return false;
+      }();
+
+      if (not word_match_found) {
+        // keep looking for a match with increasing stride
+        while (true) {
+          auto* const entry = table.entry(static_cast<uint32_t>(data));
+          candidate         = input.begin() + *entry;
+          *entry            = in_it - input.begin();
+          if (static_cast<uint32_t>(data) == load<uint32_t>(candidate)) {
+            stride = 1;
+            break;
+          }
+
+          auto const next_input = in_it + stride;
+          if (next_input > input_max) {
+            // Reached the end of the input without finding a match
+            return {next_emit, out_it};
+          }
+
+          data  = load<uint32_t>(next_input);
+          in_it = next_input;
+          stride += 1;
+        }
+
+        // Emit data prior to the match as literal
+        out_it = emit_literal(out_it, next_emit, in_it);
+      }
+
+      // Emit match(es)
+      do {
+        auto const match_len = std::mismatch(in_it, input.end(), candidate).first - in_it;
+        out_it               = emit_copy(out_it, in_it - candidate, match_len);
+
+        in_it += match_len;
+        if (in_it >= input_max) {
+          // Reached the end of the input, no more matches to look for
+          return {in_it, out_it};
+        }
+        data                                    = load<uint64_t>(in_it);
+        *table.entry(load<uint32_t>(in_it - 1)) = in_it - input.begin() - 1;
+        auto* const entry                       = table.entry(data);
+        candidate                               = input.begin() + *entry;
+        *entry                                  = in_it - input.begin();
+
+      } while (static_cast<uint32_t>(data) == load<uint32_t>(candidate));
+    }
+
+    return {in_it, out_it};
+  }();
+
+  // Emit the remaining data as a literal
+  return emit_literal(out_remain, in_remain, input.end()) - output.begin();
+}
+
+void append_varint(std::vector<uint8_t>& output, size_t v)
+{
+  while (v > 127) {
+    output.push_back((v & 0x7F) | 0x80);
+    v >>= 7;
+  }
+  output.push_back(v);
+}
+
+[[nodiscard]] std::vector<std::uint8_t> compress(host_span<uint8_t const> src)
 {
-  auto const d_src =
-    cudf::detail::make_device_uvector_async(src, stream, cudf::get_current_device_resource_ref());
-  cudf::detail::hostdevice_vector<device_span<uint8_t const>> inputs(1, stream);
-  inputs[0] = d_src;
-  inputs.host_to_device_async(stream);
-
-  auto dst_size = compress_max_output_chunk_size(nvcomp::compression_type::SNAPPY, src.size());
-  rmm::device_uvector<uint8_t> d_dst(dst_size, stream);
-  cudf::detail::hostdevice_vector<device_span<uint8_t>> outputs(1, stream);
-  outputs[0] = d_dst;
-  outputs.host_to_device_async(stream);
-
-  cudf::detail::hostdevice_vector<compression_result> hd_status(1, stream);
-  hd_status[0] = {};
-  hd_status.host_to_device_async(stream);
-
-  nvcomp::batched_compress(nvcomp::compression_type::SNAPPY, inputs, outputs, hd_status, stream);
-
-  hd_status.device_to_host_sync(stream);
-  CUDF_EXPECTS(hd_status[0].status == compression_status::SUCCESS, "snappy compression failed");
-  return cudf::detail::make_std_vector_sync<uint8_t>(d_dst, stream);
+  std::vector<uint8_t> dst;
+  append_varint(dst, src.size());
+  dst.reserve(dst.size() + max_compressed_size(compression_type::SNAPPY, src.size()));
+
+  hash_table table;  // reuse hash table across blocks
+  constexpr size_t block_size          = 1 << 16;
+  auto const block_max_compressed_size = max_compressed_size(compression_type::SNAPPY, block_size);
+  for (std::size_t src_offset = 0; src_offset < src.size(); src_offset += block_size) {
+    // Compress data in blocks of limited size
+    auto const block = src.subspan(src_offset, std::min(src.size() - src_offset, block_size));
+
+    auto const previous_size = dst.size();
+    auto const curr_block_max_comp_size =
+      (block.size() == block_size) ? block_max_compressed_size
+                                   : max_compressed_size(compression_type::SNAPPY, block.size());
+    dst.resize(previous_size + curr_block_max_comp_size);
+    auto const block_dst =
+      host_span<uint8_t>{dst.data() + previous_size, dst.size() - previous_size};
+
+    table.clear();
+    auto const comp_block_size = compress_block(block, table, block_dst);
+    dst.resize(previous_size + comp_block_size);
+  }
+
+  return dst;
 }
 
+}  // namespace snappy
+
 void device_compress(compression_type compression,
                      device_span<device_span<uint8_t const> const> inputs,
                      device_span<device_span<uint8_t> const> outputs,
@@ -156,6 +322,13 @@ void host_compress(compression_type compression,
   auto const h_outputs  = cudf::detail::make_host_vector_async(outputs, stream);
   stream.synchronize();
 
+  // Generate order vector to submit largest tasks first
+  std::vector<size_t> task_order(num_chunks);
+  std::iota(task_order.begin(), task_order.end(), 0);
+  std::sort(task_order.begin(), task_order.end(), [&](size_t a, size_t b) {
+    return h_inputs[a].size() > h_inputs[b].size();
+  });
+
   std::vector<std::future<size_t>> tasks;
   auto const num_streams =
     std::min<std::size_t>({num_chunks,
@@ -163,9 +336,12 @@ void host_compress(compression_type compression,
                            h_comp_pool().get_thread_count()});
   auto const streams = cudf::detail::fork_streams(stream, num_streams);
   for (size_t i = 0; i < num_chunks; ++i) {
+    auto const idx        = task_order[i];
     auto const cur_stream = streams[i % streams.size()];
-    auto task = [d_in = h_inputs[i], d_out = h_outputs[i], cur_stream, compression]() -> size_t {
-      auto const h_in  = cudf::detail::make_host_vector_sync(d_in, cur_stream);
+    auto task =
+      [d_in = h_inputs[idx], d_out = h_outputs[idx], cur_stream, compression]() -> size_t {
+      auto h_in = cudf::detail::make_pinned_vector_async<uint8_t>(d_in.size(), cur_stream);
+      cudf::detail::cuda_memcpy<uint8_t>(h_in, d_in, cur_stream);
       auto const h_out = compress(compression, h_in, cur_stream);
       cudf::detail::cuda_memcpy<uint8_t>(d_out.subspan(0, h_out.size()), h_out, cur_stream);
       return h_out.size();
@@ -174,7 +350,7 @@ void host_compress(compression_type compression,
   }
 
   for (auto i = 0ul; i < num_chunks; ++i) {
-    h_results[i] = {tasks[i].get(), compression_status::SUCCESS};
+    h_results[task_order[i]] = {tasks[i].get(), compression_status::SUCCESS};
   }
   cudf::detail::cuda_memcpy_async<compression_result>(results, h_results, stream);
 }
@@ -183,6 +359,7 @@ void host_compress(compression_type compression,
 {
   switch (compression) {
     case compression_type::GZIP:
+    case compression_type::SNAPPY:
     case compression_type::NONE: return true;
     default: return false;
   }
@@ -212,7 +389,7 @@ void host_compress(compression_type compression,
   if (not host_compression_supported(compression)) { return false; }
   if (not device_compression_supported(compression)) { return true; }
   // If both host and device compression are supported, use the host if the env var is set
-  return getenv_or("LIBCUDF_USE_HOST_COMPRESSION", 0);
+  return getenv_or("LIBCUDF_HOST_COMPRESSION", std::string{"OFF"}) == "ON";
 }
 
 }  // namespace
@@ -249,12 +426,12 @@ std::optional<size_t> compress_max_allowed_chunk_size(compression_type compressi
 
 std::vector<std::uint8_t> compress(compression_type compression,
                                    host_span<uint8_t const> src,
-                                   rmm::cuda_stream_view stream)
+                                   rmm::cuda_stream_view)
 {
   CUDF_FUNC_RANGE();
   switch (compression) {
     case compression_type::GZIP: return compress_gzip(src);
-    case compression_type::SNAPPY: return compress_snappy(src, stream);
+    case compression_type::SNAPPY: return snappy::compress(src);
     default: CUDF_FAIL("Unsupported compression type");
   }
 }
diff --git a/cpp/src/io/utilities/getenv_or.hpp b/cpp/src/io/utilities/getenv_or.hpp
index acfd2221797..4d5c3ec6d22 100644
--- a/cpp/src/io/utilities/getenv_or.hpp
+++ b/cpp/src/io/utilities/getenv_or.hpp
@@ -45,7 +45,7 @@ T getenv_or(std::string_view env_var_name, T default_val)
                   ss.str());
   }
 
-  if (env_val == nullptr) { return default_val; }
+  if (env_val == nullptr) { return std::move(default_val); }
 
   std::stringstream sstream(env_val);
   T converted_val;
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index fd8cb3f22f2..cfc6a0dc425 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -298,7 +298,7 @@ ConfigureTest(
 
 # ##################################################################################################
 # * io tests --------------------------------------------------------------------------------------
-ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cpp)
+ConfigureTest(COMPRESSION_TEST io/comp/comp_test.cpp)
 ConfigureTest(ROW_SELECTION_TEST io/row_selection_test.cpp)
 
 ConfigureTest(
diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/comp_test.cpp
similarity index 86%
rename from cpp/tests/io/comp/decomp_test.cpp
rename to cpp/tests/io/comp/comp_test.cpp
index 5bbe8b63c47..e3bee708485 100644
--- a/cpp/tests/io/comp/decomp_test.cpp
+++ b/cpp/tests/io/comp/comp_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,9 @@
  * limitations under the License.
  */
 
+#include "io/comp/comp.hpp"
 #include "io/comp/gpuinflate.hpp"
+#include "io/comp/io_uncomp.hpp"
 #include "io/utilities/hostdevice_vector.hpp"
 
 #include <cudf_test/base_fixture.hpp>
@@ -34,6 +36,12 @@ using cudf::io::detail::compression_result;
 using cudf::io::detail::compression_status;
 namespace nvcomp = cudf::io::detail::nvcomp;
 
+[[nodiscard]] std::vector<uint8_t> vector_from_string(std::string const& str)
+{
+  return {reinterpret_cast<uint8_t const*>(str.data()),
+          reinterpret_cast<uint8_t const*>(str.data() + str.size())};
+}
+
 /**
  * @brief Base test fixture for decompression
  *
@@ -42,12 +50,6 @@ namespace nvcomp = cudf::io::detail::nvcomp;
  */
 template <typename Decompressor>
 struct DecompressTest : public cudf::test::BaseFixture {
-  [[nodiscard]] std::vector<uint8_t> vector_from_string(std::string const str) const
-  {
-    return {reinterpret_cast<uint8_t const*>(str.c_str()),
-            reinterpret_cast<uint8_t const*>(str.c_str()) + strlen(str.c_str())};
-  }
-
   void Decompress(std::vector<uint8_t>& decompressed,
                   uint8_t const* compressed,
                   size_t compressed_size)
@@ -76,6 +78,11 @@ struct DecompressTest : public cudf::test::BaseFixture {
   }
 };
 
+struct HostCompressTest : public cudf::test::BaseFixture {
+  HostCompressTest() { setenv("LIBCUDF_HOST_COMPRESSION", "ON", 1); }
+  ~HostCompressTest() override { unsetenv("LIBCUDF_HOST_COMPRESSION"); }
+};
+
 /**
  * @brief Derived fixture for GZIP decompression
  */
@@ -222,4 +229,23 @@ TEST_F(NvcompConfigTest, Decompression)
   EXPECT_TRUE(decomp_disabled(compression_type::SNAPPY, {false, false}));
 }
 
+TEST_F(HostCompressTest, SnappyCompression)
+{
+  std::vector<uint8_t> expected;
+  expected.reserve(8 * (32 << 20));
+  for (size_t size = 1; size < 32 << 20; size *= 2) {
+    // Using number strings to generate data that is compressible, but not trivially so
+    for (size_t i = size / 2; i < size; ++i) {
+      auto const num_string = std::to_string(i);
+      // Keep adding to the test data
+      expected.insert(expected.end(), num_string.begin(), num_string.end());
+    }
+    auto const compressed = cudf::io::detail::compress(
+      cudf::io::compression_type::SNAPPY, expected, cudf::get_default_stream());
+    auto const decompressed =
+      cudf::io::detail::decompress(cudf::io::compression_type::SNAPPY, compressed);
+    EXPECT_EQ(expected, decompressed);
+  }
+}
+
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/dependencies.yaml b/dependencies.yaml
index 83f4e96c748..e7840d56880 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -400,7 +400,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - &cmake_ver cmake>=3.26.4,!=3.30.0
+          - &cmake_ver cmake>=3.30.4
           - &ninja ninja
   build_all:
     common:
diff --git a/java/ci/Dockerfile.rocky b/java/ci/Dockerfile.rocky
index 9f3305278cb..277e33bb8eb 100644
--- a/java/ci/Dockerfile.rocky
+++ b/java/ci/Dockerfile.rocky
@@ -33,7 +33,7 @@ RUN dnf --enablerepo=powertools install -y  scl-utils gcc-toolset-${TOOLSET_VERS
 RUN mkdir /usr/local/rapids /rapids && chmod 777 /usr/local/rapids /rapids
 
 # 3.22.3+: CUDA architecture 'native' support + flexible CMAKE_<LANG>_*_LAUNCHER for ccache
-ARG CMAKE_VERSION=3.28.6
+ARG CMAKE_VERSION=3.30.7
 # default x86_64 from x86 build, aarch64 cmake for arm build
 ARG CMAKE_ARCH=x86_64
 RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \
diff --git a/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java
index 372f919532e..009f5e12815 100644
--- a/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java
+++ b/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java
@@ -1,6 +1,6 @@
 /*
  *
- *  Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *  Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -23,17 +23,34 @@
  * that will be used by the ORC writer to write the file.
  */
 public class ORCWriterOptions extends CompressionMetadataWriterOptions {
+  private int stripeSizeRows;
 
   private ORCWriterOptions(Builder builder) {
     super(builder);
+    this.stripeSizeRows = builder.stripeSizeRows;
   }
 
   public static Builder builder() {
     return new Builder();
   }
 
+  public int getStripeSizeRows() {
+    return stripeSizeRows;
+  }
+
   public static class Builder extends CompressionMetadataWriterOptions.Builder
           <Builder, ORCWriterOptions> {
+    // < 1M rows default orc stripe rows, defined in cudf/cpp/include/cudf/io/orc.hpp
+    private int stripeSizeRows = 1000000;
+
+    public Builder withStripeSizeRows(int stripeSizeRows) {
+      // maximum stripe size cannot be smaller than 512
+      if (stripeSizeRows < 512) {
+        throw new IllegalArgumentException("Maximum stripe size cannot be smaller than 512");
+      }
+      this.stripeSizeRows = stripeSizeRows;
+      return this;
+    }
 
     public ORCWriterOptions build() {
       return new ORCWriterOptions(this);
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index 298f2cff6f3..422989143c7 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -475,6 +475,7 @@ private static native long writeORCFileBegin(String[] columnNames,
                                                int compression,
                                                int[] precisions,
                                                boolean[] isMapValues,
+                                               int stripeSizeRows,
                                                String filename) throws CudfException;
 
   /**
@@ -501,6 +502,7 @@ private static native long writeORCBufferBegin(String[] columnNames,
                                                  int compression,
                                                  int[] precisions,
                                                  boolean[] isMapValues,
+                                                 int stripeSizeRows,
                                                  HostBufferConsumer consumer,
                                                  HostMemoryAllocator hostMemoryAllocator
                                                  ) throws CudfException;
@@ -1823,6 +1825,7 @@ private ORCTableWriter(ORCWriterOptions options, File outputFile) {
           options.getCompressionType().nativeId,
           options.getFlatPrecision(),
           options.getFlatIsMap(),
+          options.getStripeSizeRows(),
           outputFile.getAbsolutePath()));
       this.consumer = null;
     }
@@ -1838,6 +1841,7 @@ private ORCTableWriter(ORCWriterOptions options, HostBufferConsumer consumer,
           options.getCompressionType().nativeId,
           options.getFlatPrecision(),
           options.getFlatIsMap(),
+          options.getStripeSizeRows(),
           consumer, hostMemoryAllocator));
       this.consumer = consumer;
     }
diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
index 3923d8b45e3..1fa6f6d561f 100644
--- a/java/src/main/native/CMakeLists.txt
+++ b/java/src/main/native/CMakeLists.txt
@@ -11,7 +11,7 @@
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
 # =============================================================================
-cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../../../../rapids_config.cmake)
 include(rapids-cmake)
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index 50c6ae842f4..e1b487b1f7c 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -2480,6 +2480,7 @@ Java_ai_rapids_cudf_Table_writeORCBufferBegin(JNIEnv* env,
                                               jint j_compression,
                                               jintArray j_precisions,
                                               jbooleanArray j_is_map,
+                                              jint j_stripe_size_rows,
                                               jobject consumer,
                                               jobject host_memory_allocator)
 {
@@ -2535,6 +2536,7 @@ Java_ai_rapids_cudf_Table_writeORCBufferBegin(JNIEnv* env,
                                         .enable_statistics(ORC_STATISTICS_ROW_GROUP)
                                         .key_value_metadata(kv_metadata)
                                         .compression_statistics(stats)
+                                        .stripe_size_rows(j_stripe_size_rows)
                                         .build();
     auto writer_ptr                          = std::make_unique<cudf::io::orc_chunked_writer>(opts);
     cudf::jni::native_orc_writer_handle* ret = new cudf::jni::native_orc_writer_handle(
@@ -2555,6 +2557,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(JNIEnv* env,
                                                                    jint j_compression,
                                                                    jintArray j_precisions,
                                                                    jbooleanArray j_is_map,
+                                                                   jint j_stripe_size_rows,
                                                                    jstring j_output_path)
 {
   JNI_NULL_CHECK(env, j_col_names, "null columns", 0);
@@ -2606,6 +2609,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(JNIEnv* env,
                                         .enable_statistics(ORC_STATISTICS_ROW_GROUP)
                                         .key_value_metadata(kv_metadata)
                                         .compression_statistics(stats)
+                                        .stripe_size_rows(j_stripe_size_rows)
                                         .build();
     auto writer_ptr = std::make_unique<cudf::io::orc_chunked_writer>(opts);
     cudf::jni::native_orc_writer_handle* ret =
diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index 7193ada5b93..2a17bc5dbb7 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../../rapids_config.cmake)
 include(rapids-cuda)
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index d716114cf7e..16cd97677ef 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -118,7 +118,7 @@ build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
 matrix-entry = "cuda_suffixed=true"
 requires = [
-    "cmake>=3.26.4,!=3.30.0",
+    "cmake>=3.30.4",
     "cython>=3.0.3",
     "libcudf==25.4.*,>=0.0.0a0",
     "librmm==25.4.*,>=0.0.0a0",
diff --git a/python/cudf/udf_cpp/CMakeLists.txt b/python/cudf/udf_cpp/CMakeLists.txt
index fa7855cfc65..9f6b67d0cdc 100644
--- a/python/cudf/udf_cpp/CMakeLists.txt
+++ b/python/cudf/udf_cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-cmake_minimum_required(VERSION 3.26.4)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(rapids-cmake)
 include(rapids-cpm)
diff --git a/python/cudf_kafka/CMakeLists.txt b/python/cudf_kafka/CMakeLists.txt
index fd835010c4e..3e12eb6aa41 100644
--- a/python/cudf_kafka/CMakeLists.txt
+++ b/python/cudf_kafka/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../../rapids_config.cmake)
 
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index 4a7143e1134..424010e632c 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -83,7 +83,7 @@ build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
 matrix-entry = "cuda_suffixed=true"
 requires = [
-    "cmake>=3.26.4,!=3.30.0",
+    "cmake>=3.30.4",
     "cython>=3.0.3",
     "ninja",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt
index 259492b98d1..d5450639471 100644
--- a/python/libcudf/CMakeLists.txt
+++ b/python/libcudf/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../../rapids_config.cmake)
 
diff --git a/python/libcudf/pyproject.toml b/python/libcudf/pyproject.toml
index 18aa824c6df..01fe6097936 100644
--- a/python/libcudf/pyproject.toml
+++ b/python/libcudf/pyproject.toml
@@ -79,7 +79,7 @@ build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
 matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true"
 requires = [
-    "cmake>=3.26.4,!=3.30.0",
+    "cmake>=3.30.4",
     "libkvikio==25.4.*,>=0.0.0a0",
     "librmm==25.4.*,>=0.0.0a0",
     "ninja",
diff --git a/python/pylibcudf/CMakeLists.txt b/python/pylibcudf/CMakeLists.txt
index a4b831790fb..fe6e73a3f14 100644
--- a/python/pylibcudf/CMakeLists.txt
+++ b/python/pylibcudf/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.30.4 FATAL_ERROR)
 
 include(../../rapids_config.cmake)
 include(rapids-cuda)
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index 2f846b5f0b9..939da65c1ec 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -109,7 +109,7 @@ build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
 matrix-entry = "cuda_suffixed=true"
 requires = [
-    "cmake>=3.26.4,!=3.30.0",
+    "cmake>=3.30.4",
     "cython>=3.0.3",
     "libcudf==25.4.*,>=0.0.0a0",
     "librmm==25.4.*,>=0.0.0a0",