diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 11104037c5e..148861c0fa2 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -34,6 +34,7 @@ jobs:
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
+      node_type: "cpu16"
   python-build:
     needs: [cpp-build]
     secrets: inherit
@@ -77,6 +78,7 @@ jobs:
       branch: ${{ inputs.branch }}
       sha: ${{ inputs.sha }}
       date: ${{ inputs.date }}
+      node_type: "cpu16"
       script: ci/build_wheel_libcudf.sh
   wheel-publish-libcudf:
     needs: wheel-build-libcudf
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 7046fd0e5dc..8357a12e221 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -46,18 +46,6 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/test_cpp_memcheck.sh"
-  static-configure:
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.04
-    with:
-      build_type: ${{ inputs.build_type }}
-      branch: ${{ inputs.branch }}
-      date: ${{ inputs.date }}
-      sha: ${{ inputs.sha }}
-      # Use the wheel container so we can skip conda solves and since our
-      # primary static consumers (Spark) are not in conda anyway.
-      container_image: "rapidsai/ci-wheel:latest"
-      run_script: "ci/configure_cpp_static.sh"
   cpp-linters:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.04
diff --git a/ci/test_cudf_polars_polars_tests.sh b/ci/test_cudf_polars_polars_tests.sh
index 3466edacfc5..1df7bb61834 100755
--- a/ci/test_cudf_polars_polars_tests.sh
+++ b/ci/test_cudf_polars_polars_tests.sh
@@ -26,6 +26,8 @@ git clone https://github.com/pola-rs/polars.git --branch "${TAG}" --depth 1
 
 # Install requirements for running polars tests
 rapids-logger "Install polars test requirements"
+# TODO: Remove sed command when polars-cloud supports 1.23
+sed -i '/^polars-cloud$/d' polars/py-polars/requirements-dev.txt
 rapids-pip-retry install -r polars/py-polars/requirements-dev.txt -r polars/py-polars/requirements-ci.txt
 
 # shellcheck disable=SC2317
diff --git a/ci/test_narwhals.sh b/ci/test_narwhals.sh
index 4a32ff0b0fd..28eceff2f80 100755
--- a/ci/test_narwhals.sh
+++ b/ci/test_narwhals.sh
@@ -26,6 +26,7 @@ rapids-logger "Run narwhals tests for cuDF"
 python -m pytest \
     --cache-clear \
     --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-narwhals.xml" \
+    -p cudf.testing.narwhals_test_plugin \
     --numprocesses=8 \
     --dist=worksteal \
     --constructors=cudf
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index e7dbb765099..a23981b4e72 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -66,7 +66,7 @@ dependencies:
 - pandas
 - pandas>=2.0,<2.2.4dev0
 - pandoc
-- polars>=1.20,<1.23
+- polars>=1.20,<1.24
 - pre-commit
 - ptxcompiler
 - pyarrow>=14.0.0,<20.0.0a0
diff --git a/conda/environments/all_cuda-128_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml
index 342ec8d4b59..e2b9302dc36 100644
--- a/conda/environments/all_cuda-128_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-128_arch-x86_64.yaml
@@ -64,7 +64,7 @@ dependencies:
 - pandas
 - pandas>=2.0,<2.2.4dev0
 - pandoc
-- polars>=1.20,<1.23
+- polars>=1.20,<1.24
 - pre-commit
 - pyarrow>=14.0.0,<20.0.0a0
 - pydata-sphinx-theme>=0.15.4
diff --git a/conda/recipes/cudf-polars/meta.yaml b/conda/recipes/cudf-polars/meta.yaml
index 1d36ab2a3e4..64a147d3c63 100644
--- a/conda/recipes/cudf-polars/meta.yaml
+++ b/conda/recipes/cudf-polars/meta.yaml
@@ -43,7 +43,7 @@ requirements:
   run:
     - python
     - pylibcudf ={{ version }}
-    - polars >=1.20,<1.23
+    - polars >=1.20,<1.24
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
 
 test:
diff --git a/cpp/benchmarks/common/random_distribution_factory.cuh b/cpp/benchmarks/common/random_distribution_factory.cuh
index c27616132d0..32424fbaaa3 100644
--- a/cpp/benchmarks/common/random_distribution_factory.cuh
+++ b/cpp/benchmarks/common/random_distribution_factory.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,6 +29,7 @@
 #include <thrust/tabulate.h>
 
 #include <algorithm>
+#include <functional>
 #include <memory>
 #include <type_traits>
 
diff --git a/cpp/include/nvtext/minhash.hpp b/cpp/include/nvtext/minhash.hpp
index 43f060fdafa..5f978a0d8ec 100644
--- a/cpp/include/nvtext/minhash.hpp
+++ b/cpp/include/nvtext/minhash.hpp
@@ -125,5 +125,99 @@ std::unique_ptr<cudf::column> minhash64(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
+/**
+ * @brief Returns the minhash values for each input row
+ *
+ * This function uses MurmurHash3_x86_32 for the hash algorithm.
+ *
+ * The input row is first hashed using the given `seed` over a sliding window
+ * of `ngrams` of strings. These hash values are then combined with the `a`
+ * and `b` parameter values using the following formula:
+ * ```
+ *   max_hash = max of uint32
+ *   mp = (1 << 61) - 1
+ *   hv[i] = hash value of a ngrams at i
+ *   pv[i] = ((hv[i] * a[i] + b[i]) % mp) & max_hash
+ * ```
+ *
+ * This calculation is performed on each set of ngrams and the minimum value
+ * is computed as follows:
+ * ```
+ *   mh[j,i] = min(pv[i]) for all ngrams in row j
+ *                        and where i=[0,a.size())
+ * ```
+ *
+ * Any null row entries result in corresponding null output rows.
+ *
+ * @throw std::invalid_argument if the ngrams < 2
+ * @throw std::invalid_argument if parameter_a is empty
+ * @throw std::invalid_argument if `parameter_b.size() != parameter_a.size()`
+ * @throw std::overflow_error if `parameter_a.size() * input.size()` exceeds the column size limit
+ *
+ * @param input Strings column to compute minhash
+ * @param ngrams The number of strings to hash within each row
+ * @param seed Seed value used for the hash algorithm
+ * @param parameter_a Values used for the permuted calculation
+ * @param parameter_b Values used for the permuted calculation
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return List column of minhash values for each string per seed
+ */
+std::unique_ptr<cudf::column> minhash_ngrams(
+  cudf::lists_column_view const& input,
+  cudf::size_type ngrams,
+  uint32_t seed,
+  cudf::device_span<uint32_t const> parameter_a,
+  cudf::device_span<uint32_t const> parameter_b,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
+/**
+ * @brief Returns the minhash values for each input row
+ *
+ * This function uses MurmurHash3_x64_128 for the hash algorithm.
+ *
+ * The input row is first hashed using the given `seed` over a sliding window
+ * of `ngrams` of strings. These hash values are then combined with the `a`
+ * and `b` parameter values using the following formula:
+ * ```
+ *   max_hash = max of uint64
+ *   mp = (1 << 61) - 1
+ *   hv[i] = hash value of a ngrams at i
+ *   pv[i] = ((hv[i] * a[i] + b[i]) % mp) & max_hash
+ * ```
+ *
+ * This calculation is performed on each set of ngrams and the minimum value
+ * is computed as follows:
+ * ```
+ *   mh[j,i] = min(pv[i]) for all ngrams in row j
+ *                        and where i=[0,a.size())
+ * ```
+ *
+ * Any null row entries result in corresponding null output rows.
+ *
+ * @throw std::invalid_argument if the ngrams < 2
+ * @throw std::invalid_argument if parameter_a is empty
+ * @throw std::invalid_argument if `parameter_b.size() != parameter_a.size()`
+ * @throw std::overflow_error if `parameter_a.size() * input.size()` exceeds the column size limit
+ *
+ * @param input List strings column to compute minhash
+ * @param ngrams The number of strings to hash within each row
+ * @param seed Seed value used for the hash algorithm
+ * @param parameter_a Values used for the permuted calculation
+ * @param parameter_b Values used for the permuted calculation
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return List column of minhash values for each string per seed
+ */
+std::unique_ptr<cudf::column> minhash64_ngrams(
+  cudf::lists_column_view const& input,
+  cudf::size_type ngrams,
+  uint64_t seed,
+  cudf::device_span<uint64_t const> parameter_a,
+  cudf::device_span<uint64_t const> parameter_b,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
 /** @} */  // end of group
 }  // namespace CUDF_EXPORT nvtext
diff --git a/cpp/src/column/column_device_view.cu b/cpp/src/column/column_device_view.cu
index 9dc39f01ab3..c304d705f9b 100644
--- a/cpp/src/column/column_device_view.cu
+++ b/cpp/src/column/column_device_view.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 
+#include <functional>
 #include <numeric>
 
 namespace cudf {
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 53c1d335a40..204aca8a69c 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -36,6 +36,7 @@
 #include <cudf/utilities/error.hpp>
 
 #include <algorithm>
+#include <functional>
 #include <utility>
 
 namespace cudf::io {
diff --git a/cpp/src/io/json/host_tree_algorithms.cu b/cpp/src/io/json/host_tree_algorithms.cu
index 7b9fc25d1cc..e506d60a2be 100644
--- a/cpp/src/io/json/host_tree_algorithms.cu
+++ b/cpp/src/io/json/host_tree_algorithms.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -46,6 +46,7 @@
 
 #include <algorithm>
 #include <deque>
+#include <functional>
 
 namespace cudf::io::json::detail {
 
diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu
index 0c95c2b05e8..c265ac5e316 100644
--- a/cpp/src/io/json/read_json.cu
+++ b/cpp/src/io/json/read_json.cu
@@ -43,6 +43,7 @@
 #include <BS_thread_pool.hpp>
 #include <BS_thread_pool_utils.hpp>
 
+#include <functional>
 #include <numeric>
 
 namespace cudf::io::json::detail {
diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp
index 050bf692c14..77643d294e8 100644
--- a/cpp/src/io/orc/aggregate_orc_metadata.cpp
+++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp
@@ -19,6 +19,7 @@
 #include "io/utilities/row_selection.hpp"
 
 #include <algorithm>
+#include <functional>
 #include <numeric>
 
 namespace cudf::io::orc::detail {
diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index dbf5e293c4e..3a20ffbce19 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -64,6 +64,7 @@
 
 #include <algorithm>
 #include <cstring>
+#include <functional>
 #include <numeric>
 #include <tuple>
 #include <utility>
diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu
index 03a37327e9b..be1e7d38fff 100644
--- a/cpp/src/io/parquet/reader_impl_chunking.cu
+++ b/cpp/src/io/parquet/reader_impl_chunking.cu
@@ -40,6 +40,7 @@
 #include <thrust/transform_scan.h>
 #include <thrust/unique.h>
 
+#include <functional>
 #include <numeric>
 
 namespace cudf::io::parquet::detail {
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 9e50fafa8a7..4a410cec558 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -53,6 +53,7 @@
 
 #include <algorithm>
 #include <cstring>
+#include <functional>
 #include <iterator>
 #include <numeric>
 #include <utility>
diff --git a/cpp/src/lists/dremel.cu b/cpp/src/lists/dremel.cu
index 469442d46d4..d7b1bf360fe 100644
--- a/cpp/src/lists/dremel.cu
+++ b/cpp/src/lists/dremel.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,6 +36,8 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/discard_iterator.h>
 
+#include <functional>
+
 namespace cudf::detail {
 namespace {
 /**
diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh
index d22fb04696c..6071a9fdd2d 100644
--- a/cpp/src/strings/regex/regex.cuh
+++ b/cpp/src/strings/regex/regex.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,6 +27,7 @@
 #include <cuda_runtime.h>
 #include <thrust/pair.h>
 
+#include <functional>
 #include <memory>
 
 namespace cudf {
diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu
index 0777253bb38..af8b53ccd8c 100644
--- a/cpp/src/strings/replace/multi_re.cu
+++ b/cpp/src/strings/replace/multi_re.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,6 +39,7 @@
 #include <thrust/pair.h>
 
 #include <algorithm>
+#include <functional>
 
 namespace cudf {
 namespace strings {
diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu
index 990c4855a14..d77cc0cf17a 100644
--- a/cpp/src/table/row_operators.cu
+++ b/cpp/src/table/row_operators.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,6 +33,8 @@
 
 #include <thrust/iterator/transform_iterator.h>
 
+#include <functional>
+
 namespace cudf {
 namespace experimental {
 
diff --git a/cpp/src/text/bpe/load_merge_pairs.cu b/cpp/src/text/bpe/load_merge_pairs.cu
index a13a435a271..9118fe54ab2 100644
--- a/cpp/src/text/bpe/load_merge_pairs.cu
+++ b/cpp/src/text/bpe/load_merge_pairs.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,6 +33,7 @@
 #include <cuda/functional>
 
 #include <fstream>
+#include <functional>
 #include <iostream>
 #include <vector>
 
diff --git a/cpp/src/text/minhash.cu b/cpp/src/text/minhash.cu
index 50c16c8ba6c..663595af5df 100644
--- a/cpp/src/text/minhash.cu
+++ b/cpp/src/text/minhash.cu
@@ -21,6 +21,7 @@
 #include <cudf/detail/device_scalar.hpp>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/offsets_iterator_factory.cuh>
 #include <cudf/detail/sequence.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/hashing/detail/hashing.hpp>
@@ -62,19 +63,20 @@ constexpr cudf::thread_index_type tile_size = block_size;
 constexpr cuda::std::size_t params_per_thread = 16;
 
 // Separate kernels are used to process strings above and below this value (in bytes).
-constexpr cudf::size_type wide_string_threshold = 1 << 18;  // 256K
+constexpr cudf::size_type wide_row_threshold = 1 << 18;  // 256K
 // The number of blocks per string for the above-threshold kernel processing.
-constexpr cudf::size_type blocks_per_string = 64;
+constexpr cudf::size_type blocks_per_row = 64;
 // The above values were determined using the redpajama and books_sample datasets
 
 /**
  * @brief Hashing kernel launched as a thread per tile-size (block or warp)
+ * for strings column
  *
  * This kernel computes the hashes for each string using the seed and the specified
  * hash function. The width is used to compute rolling substrings to hash over.
  * The hashes are stored in d_hashes to be used in the minhash_kernel.
  *
- * This kernel also counts the number of strings above the wide_string_threshold
+ * This kernel also counts the number of strings above the wide_row_threshold
  * and proactively initializes the output values for those strings.
  *
  * @tparam HashFunction The hash function to use for this kernel
@@ -84,7 +86,7 @@ constexpr cudf::size_type blocks_per_string = 64;
  * @param seed The seed used for the hash function
  * @param width Width in characters used for determining substrings to hash
  * @param d_hashes The resulting hash values are stored here
- * @param threshold_count Stores the number of strings above wide_string_threshold
+ * @param threshold_count Stores the number of strings above wide_row_threshold
  * @param param_count Number of parameters (used for the proactive initialize)
  * @param d_results Final results vector (used for the proactive initialize)
  */
@@ -146,7 +148,7 @@ CUDF_KERNEL void minhash_seed_kernel(cudf::column_device_view const d_strings,
   }
 
   // logic appended here so an extra kernel is not required
-  if (size_bytes >= wide_string_threshold) {
+  if (size_bytes >= wide_row_threshold) {
     if (lane_idx == 0) {
       // count the number of wide strings
       cuda::atomic_ref<cudf::size_type, cuda::thread_scope_device> ref{*threshold_count};
@@ -160,31 +162,130 @@ CUDF_KERNEL void minhash_seed_kernel(cudf::column_device_view const d_strings,
   }
 }
 
+/**
+ * @brief Hashing kernel launched as a thread per tile-size (block or warp)
+ * for a lists column
+ *
+ * This kernel computes the hashes for each row using the seed and the specified
+ * hash function. The ngrams identifies consecutive strings to hash over in
+ * sliding window formation. The hashes are stored in d_hashes and used as input
+ * to the minhash_kernel.
+ *
+ * This kernel also counts the number of rows above the wide_row_threshold
+ * and proactively initializes the output values for those rows.
+ *
+ * @tparam HashFunction The hash function to use for this kernel
+ * @tparam hash_value_type Derived from HashFunction result_type
+ *
+ * @param d_input The input column to hash
+ * @param seed The seed used for the hash function
+ * @param ngrams Number of strings in each row to hash
+ * @param d_hashes The resulting hash values are stored here
+ * @param threshold_count Stores the number of rows above wide_row_threshold
+ * @param param_count Number of parameters (used for the proactive initialize)
+ * @param d_results Final results vector (used for the proactive initialize)
+ */
+template <typename HashFunction, typename hash_value_type = typename HashFunction::result_type>
+CUDF_KERNEL void minhash_ngrams_kernel(cudf::detail::lists_column_device_view const d_input,
+                                       hash_value_type seed,
+                                       cudf::size_type ngrams,
+                                       hash_value_type* d_hashes,
+                                       cudf::size_type* threshold_count,
+                                       cudf::size_type param_count,
+                                       hash_value_type* d_results)
+{
+  auto const tid     = cudf::detail::grid_1d::global_thread_id();
+  auto const row_idx = tid / tile_size;
+  if (row_idx >= d_input.size()) { return; }
+  if (d_input.is_null(row_idx)) { return; }
+
+  // retrieve this row's offset to locate the output position in d_hashes
+  auto const offsets_itr = d_input.offsets().data<cudf::size_type>() + d_input.offset();
+  auto const offset      = offsets_itr[row_idx];
+  auto const size_row    = offsets_itr[row_idx + 1] - offset;
+  if (size_row == 0) { return; }
+
+  auto const d_row    = cudf::list_device_view(d_input, row_idx);
+  auto const lane_idx = static_cast<cudf::size_type>(tid % tile_size);
+
+  // hashes for this row/thread are stored here
+  auto seed_hashes  = d_hashes + offset - offsets_itr[0] + lane_idx;
+  auto const hasher = HashFunction(seed);
+
+  for (auto idx = lane_idx; idx < size_row; idx += tile_size, seed_hashes += tile_size) {
+    if (d_row.is_null(idx)) {
+      *seed_hashes = 0;
+      continue;
+    }
+
+    auto next_idx = cuda::std::min(idx + ngrams, size_row - 1);
+    if ((idx != 0) && ((next_idx - idx) < ngrams)) {
+      *seed_hashes = 0;
+      continue;
+    }
+
+    auto const first_str = d_row.element<cudf::string_view>(idx);
+    auto const last_str  = d_row.element<cudf::string_view>(next_idx);
+    // build super-string since adjacent strings are contiguous in memory
+    auto const size = static_cast<cudf::size_type>(
+      thrust::distance(first_str.data(), last_str.data()) + last_str.size_bytes());
+    auto const hash_str = cudf::string_view(first_str.data(), size);
+    hash_value_type hv;
+    if constexpr (std::is_same_v<hash_value_type, uint32_t>) {
+      hv = hasher(hash_str);
+    } else {
+      hv = cuda::std::get<0>(hasher(hash_str));
+    }
+    // disallowing hash to zero case
+    *seed_hashes = cuda::std::max(hv, hash_value_type{1});
+  }
+
+  // logic appended here to count long rows so an extra kernel is not required
+  if (size_row >= wide_row_threshold) {
+    if (lane_idx == 0) {
+      // count the number of wide rows
+      cuda::atomic_ref<cudf::size_type, cuda::thread_scope_device> ref{*threshold_count};
+      ref.fetch_add(1, cuda::std::memory_order_relaxed);
+    }
+    // initialize the output -- only needed for wider rows
+    auto d_output = d_results + (row_idx * param_count);
+    for (auto i = lane_idx; i < param_count; i += tile_size) {
+      d_output[i] = cuda::std::numeric_limits<hash_value_type>::max();
+    }
+  }
+}
+
 /**
  * @brief Permutation calculation kernel
  *
- * This kernel uses the hashes from the minhash_seed_kernel and the parameter_a and
- * parameter_b values to compute the final output results.
+ * This kernel uses the hashes from the minhash_seed_kernel or minhash_ngrams_kernel
+ * and the 'parameter_a' and 'parameter_b' values to compute the final output.
  * The output is the number of input rows (N) by the number of parameter values (M).
- * Each output[i] is the calculated result for parameter_a/b[0:M].
+ * Each row output[i] is the calculated result for parameter_a/b[0:M].
+ *
+ * This kernel is launched with either blocks per row of 1 for rows
+ * below the wide_row_threshold or blocks per row = blocks_per_rows
+ * for rows above wide_row_threshold.
  *
- * This kernel is launched with either blocks per strings of 1 for strings
- * below the wide_strings_threshold or blocks per string = blocks_per_strings
- * for strings above wide_strings_threshold.
+ * Note that this was refactored to accommodate lists of strings which is possible
+ * since there is no need here to access the characters, only the hash values.
+ * The offsets and width are used to locate and count the hash values produced by
+ * kernels above for each input row.
  *
+ * @tparam offsets_type Type for the offsets iterator for the input column
  * @tparam hash_value_type Derived from HashFunction result_type
- * @tparam blocks_per_string Number of blocks used to process each string
+ * @tparam blocks_per_row Number of blocks used to process each row
  *
- * @param d_strings The input strings to hash
- * @param indices The indices of the strings in d_strings to process
+ * @param offsets_itr The offsets are used to address the d_hashes
+ * @param indices The indices of the rows in the input column
  * @param parameter_a 1st set of parameters for the calculation result
  * @param parameter_b 2nd set of parameters for the calculation result
- * @param width Used for calculating the number of available hashes in each string
- * @param d_hashes The hash values computed in minhash_seed_kernel
+ * @param width Used for calculating the number of available hashes in each row
+ * @param d_hashes The hash values computed in one of the hash kernels
  * @param d_results Final results vector of calculate values
  */
-template <typename hash_value_type, int blocks_per_string>
-CUDF_KERNEL void minhash_kernel(cudf::column_device_view const d_strings,
+template <typename offsets_type, typename hash_value_type, int blocks_per_row>
+CUDF_KERNEL void minhash_kernel(offsets_type offsets_itr,
                                 cudf::device_span<cudf::size_type const> indices,
                                 cudf::device_span<hash_value_type const> parameter_a,
                                 cudf::device_span<hash_value_type const> parameter_b,
@@ -193,41 +294,36 @@ CUDF_KERNEL void minhash_kernel(cudf::column_device_view const d_strings,
                                 hash_value_type* d_results)
 {
   auto const tid = cudf::detail::grid_1d::global_thread_id();
-  auto const idx = (tid / blocks_per_string) / block_size;
+  auto const idx = (tid / blocks_per_row) / block_size;
   if (idx >= indices.size()) { return; }
-  auto const str_idx = indices[idx];
-  if (d_strings.is_null(str_idx)) { return; }
+  auto const row_idx = indices[idx];
 
   auto const block      = cooperative_groups::this_thread_block();
-  int const section_idx = block.group_index().x % blocks_per_string;
+  int const section_idx = block.group_index().x % blocks_per_row;
 
-  auto const offsets = d_strings.child(cudf::strings_column_view::offsets_column_index);
-  auto const offsets_itr =
-    cudf::detail::input_offsetalator(offsets.head(), offsets.type(), d_strings.offset());
-  auto const offset     = offsets_itr[str_idx];
-  auto const size_bytes = static_cast<cudf::size_type>(offsets_itr[str_idx + 1] - offset);
+  auto const offset   = offsets_itr[row_idx];
+  auto const row_size = static_cast<cudf::size_type>(offsets_itr[row_idx + 1] - offset);
 
   // number of items to process in this block;
-  // last block also includes any remainder values from the size_bytes/blocks_per_string truncation
+  // last block also includes any remainder values from the row_size/blocks_per_row truncation
   // example:
-  //  each section_size for string with size 588090 and blocks_per_string=64 is 9188
+  //  each section_size for string with size 588090 and blocks_per_row=64 is 9188
   //  except the last section which is 9188 + (588090 % 64) = 9246
-  auto const section_size =
-    (size_bytes / blocks_per_string) +
-    (section_idx < (blocks_per_string - 1) ? 0 : size_bytes % blocks_per_string);
-  auto const section_offset = section_idx * (size_bytes / blocks_per_string);
+  auto const section_size = (row_size / blocks_per_row) +
+                            (section_idx < (blocks_per_row - 1) ? 0 : row_size % blocks_per_row);
+  auto const section_offset = section_idx * (row_size / blocks_per_row);
 
   // hash values for this block/section
   auto const seed_hashes = d_hashes + offset - offsets_itr[0] + section_offset;
   // width used here as a max value since a string's char-count <= byte-count
   auto const hashes_size =
-    section_idx < (blocks_per_string - 1)
+    section_idx < (blocks_per_row - 1)
       ? section_size
-      : cuda::std::max(static_cast<cudf::size_type>(size_bytes > 0), section_size - width + 1);
+      : cuda::std::max(static_cast<cudf::size_type>(row_size > 0), section_size - width + 1);
 
-  auto const init     = size_bytes == 0 ? 0 : cuda::std::numeric_limits<hash_value_type>::max();
+  auto const init     = row_size == 0 ? 0 : cuda::std::numeric_limits<hash_value_type>::max();
   auto const lane_idx = block.thread_rank();
-  auto const d_output = d_results + (str_idx * parameter_a.size());
+  auto const d_output = d_results + (row_idx * parameter_a.size());
 
   auto const begin = seed_hashes + lane_idx;
   auto const end   = seed_hashes + hashes_size;
@@ -273,7 +369,7 @@ CUDF_KERNEL void minhash_kernel(cudf::column_device_view const d_strings,
       // cooperative groups does not have a min function and cub::BlockReduce was slower
       auto const minv =
         thrust::reduce(thrust::seq, values, values + block_size, init, thrust::minimum{});
-      if constexpr (blocks_per_string > 1) {
+      if constexpr (blocks_per_row > 1) {
         // accumulates mins for each block into d_output
         cuda::atomic_ref<hash_value_type, cuda::thread_scope_block> ref{d_output[lane_idx + i]};
         ref.fetch_min(minv, cuda::std::memory_order_relaxed);
@@ -285,6 +381,46 @@ CUDF_KERNEL void minhash_kernel(cudf::column_device_view const d_strings,
   }
 }
 
+/**
+ * @brief Partition input rows by row size
+ *
+ * The returned index is the first row above the wide_row_threshold size.
+ * The returned vector are the indices partitioned above and below the
+ * wide_row_threshold size.
+ *
+ * @param size Number of rows in the input column
+ * @param threshold_count Number of rows above wide_row_threshold
+ * @param tfn Transform function returns the size of each row
+ * @param stream Stream used for allocation and kernel launches
+ */
+template <typename transform_fn>
+std::pair<cudf::size_type, rmm::device_uvector<cudf::size_type>> partition_input(
+  cudf::size_type size,
+  cudf::size_type threshold_count,
+  transform_fn tfn,
+  rmm::cuda_stream_view stream)
+{
+  auto indices = rmm::device_uvector<cudf::size_type>(size, stream);
+  thrust::sequence(rmm::exec_policy(stream), indices.begin(), indices.end());
+  cudf::size_type threshold_index = threshold_count < size ? size : 0;
+
+  // if we counted a split of above/below threshold then
+  // compute partitions based on the size of each string
+  if ((threshold_count > 0) && (threshold_count < size)) {
+    auto sizes = rmm::device_uvector<cudf::size_type>(size, stream);
+    auto begin = thrust::counting_iterator<cudf::size_type>(0);
+    auto end   = begin + size;
+    thrust::transform(rmm::exec_policy_nosync(stream), begin, end, sizes.data(), tfn);
+    // these 2 are slightly faster than using partition()
+    thrust::sort_by_key(
+      rmm::exec_policy_nosync(stream), sizes.begin(), sizes.end(), indices.begin());
+    auto const lb = thrust::lower_bound(
+      rmm::exec_policy_nosync(stream), sizes.begin(), sizes.end(), wide_row_threshold);
+    threshold_index = static_cast<cudf::size_type>(thrust::distance(sizes.begin(), lb));
+  }
+  return {threshold_index, std::move(indices)};
+}
+
 template <typename HashFunction, typename hash_value_type = typename HashFunction::result_type>
 std::unique_ptr<cudf::column> minhash_fn(cudf::strings_column_view const& input,
                                          hash_value_type seed,
@@ -334,40 +470,112 @@ std::unique_ptr<cudf::column> minhash_fn(cudf::strings_column_view const& input,
                                                                          d_threshold_count.data(),
                                                                          parameter_a.size(),
                                                                          d_results);
-  auto const threshold_count = d_threshold_count.value(stream);
 
-  auto indices = rmm::device_uvector<cudf::size_type>(input.size(), stream);
-  thrust::sequence(rmm::exec_policy(stream), indices.begin(), indices.end());
-  cudf::size_type threshold_index = threshold_count < input.size() ? input.size() : 0;
+  auto transform_fn = [d_strings = *d_strings] __device__(auto idx) -> cudf::size_type {
+    if (d_strings.is_null(idx)) { return 0; }
+    return d_strings.element<cudf::string_view>(idx).size_bytes();
+  };
+  auto [threshold_index, indices] =
+    partition_input(input.size(), d_threshold_count.value(stream), transform_fn, stream);
 
-  // if we counted a split of above/below threshold then
-  // compute partitions based on the size of each string
-  if ((threshold_count > 0) && (threshold_count < input.size())) {
-    auto sizes = rmm::device_uvector<cudf::size_type>(input.size(), stream);
-    thrust::transform(rmm::exec_policy_nosync(stream),
-                      thrust::counting_iterator<cudf::size_type>(0),
-                      thrust::counting_iterator<cudf::size_type>(input.size()),
-                      sizes.data(),
-                      cuda::proclaim_return_type<cudf::size_type>(
-                        [d_strings = *d_strings] __device__(auto idx) -> cudf::size_type {
-                          if (d_strings.is_null(idx)) { return 0; }
-                          return d_strings.element<cudf::string_view>(idx).size_bytes();
-                        }));
-    thrust::sort_by_key(
-      rmm::exec_policy_nosync(stream), sizes.begin(), sizes.end(), indices.begin());
-    auto const lb = thrust::lower_bound(
-      rmm::exec_policy_nosync(stream), sizes.begin(), sizes.end(), wide_string_threshold);
-    threshold_index = static_cast<cudf::size_type>(thrust::distance(sizes.begin(), lb));
+  auto input_offsets =
+    cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset());
+  using offsets_type = decltype(input_offsets);
+
+  // handle the strings below the threshold width
+  if (threshold_index > 0) {
+    auto d_indices = cudf::device_span<cudf::size_type const>(indices.data(), threshold_index);
+    cudf::detail::grid_1d grid{static_cast<cudf::thread_index_type>(d_indices.size()) * block_size,
+                               block_size};
+    minhash_kernel<offsets_type, hash_value_type, 1>
+      <<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
+        input_offsets, d_indices, parameter_a, parameter_b, width, d_hashes.data(), d_results);
+  }
+
+  // handle the strings above the threshold width
+  if (threshold_index < input.size()) {
+    auto const count = static_cast<cudf::thread_index_type>(input.size() - threshold_index);
+    auto d_indices =
+      cudf::device_span<cudf::size_type const>(indices.data() + threshold_index, count);
+    cudf::detail::grid_1d grid{count * block_size * blocks_per_row, block_size};
+    minhash_kernel<offsets_type, hash_value_type, blocks_per_row>
+      <<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
+        input_offsets, d_indices, parameter_a, parameter_b, width, d_hashes.data(), d_results);
   }
 
+  return results;
+}
+
+template <typename HashFunction, typename hash_value_type = typename HashFunction::result_type>
+std::unique_ptr<cudf::column> minhash_ngrams_fn(
+  cudf::lists_column_view const& input,
+  cudf::size_type ngrams,
+  hash_value_type seed,
+  cudf::device_span<hash_value_type const> parameter_a,
+  cudf::device_span<hash_value_type const> parameter_b,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  CUDF_EXPECTS(ngrams >= 2,
+               "Parameter ngrams should be an integer value of 2 or greater",
+               std::invalid_argument);
+  CUDF_EXPECTS(!parameter_a.empty(), "Parameters A and B cannot be empty", std::invalid_argument);
+  CUDF_EXPECTS(parameter_a.size() == parameter_b.size(),
+               "Parameters A and B should have the same number of elements",
+               std::invalid_argument);
+  CUDF_EXPECTS(
+    (static_cast<std::size_t>(input.size()) * parameter_a.size()) <
+      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
+    "The number of parameters times the number of input rows exceeds the column size limit",
+    std::overflow_error);
+
+  auto const output_type = cudf::data_type{cudf::type_to_id<hash_value_type>()};
+  if (input.is_empty()) { return cudf::make_empty_column(output_type); }
+
+  auto const d_input = cudf::column_device_view::create(input.parent(), stream);
+
+  auto results =
+    cudf::make_numeric_column(output_type,
+                              input.size() * static_cast<cudf::size_type>(parameter_a.size()),
+                              cudf::mask_state::UNALLOCATED,
+                              stream,
+                              mr);
+  auto d_results = results->mutable_view().data<hash_value_type>();
+
+  cudf::detail::grid_1d grid{static_cast<cudf::thread_index_type>(input.size()) * block_size,
+                             block_size};
+  auto const hashes_size = input.child().size();
+  auto d_hashes          = rmm::device_uvector<hash_value_type>(hashes_size, stream);
+  auto d_threshold_count = cudf::detail::device_scalar<cudf::size_type>(0, stream);
+
+  auto d_list = cudf::detail::lists_column_device_view(*d_input);
+  minhash_ngrams_kernel<HashFunction>
+    <<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(d_list,
+                                                                         seed,
+                                                                         ngrams,
+                                                                         d_hashes.data(),
+                                                                         d_threshold_count.data(),
+                                                                         parameter_a.size(),
+                                                                         d_results);
+
+  auto sizes_fn = [d_list] __device__(auto idx) -> cudf::size_type {
+    if (d_list.is_null(idx)) { return 0; }
+    return cudf::list_device_view(d_list, idx).size();
+  };
+  auto [threshold_index, indices] =
+    partition_input(input.size(), d_threshold_count.value(stream), sizes_fn, stream);
+
+  auto input_offsets = input.offsets_begin();  // already includes input.offset()
+  using offset_type  = decltype(input_offsets);
+
   // handle the strings below the threshold width
   if (threshold_index > 0) {
     auto d_indices = cudf::device_span<cudf::size_type const>(indices.data(), threshold_index);
     cudf::detail::grid_1d grid{static_cast<cudf::thread_index_type>(d_indices.size()) * block_size,
                                block_size};
-    minhash_kernel<hash_value_type, 1>
+    minhash_kernel<offset_type, hash_value_type, 1>
       <<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
-        *d_strings, d_indices, parameter_a, parameter_b, width, d_hashes.data(), d_results);
+        input_offsets, d_indices, parameter_a, parameter_b, ngrams, d_hashes.data(), d_results);
   }
 
   // handle the strings above the threshold width
@@ -375,10 +583,10 @@ std::unique_ptr<cudf::column> minhash_fn(cudf::strings_column_view const& input,
     auto const count = static_cast<cudf::thread_index_type>(input.size() - threshold_index);
     auto d_indices =
       cudf::device_span<cudf::size_type const>(indices.data() + threshold_index, count);
-    cudf::detail::grid_1d grid{count * block_size * blocks_per_string, block_size};
-    minhash_kernel<hash_value_type, blocks_per_string>
+    cudf::detail::grid_1d grid{count * block_size * blocks_per_row, block_size};
+    minhash_kernel<offset_type, hash_value_type, blocks_per_row>
       <<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
-        *d_strings, d_indices, parameter_a, parameter_b, width, d_hashes.data(), d_results);
+        input_offsets, d_indices, parameter_a, parameter_b, ngrams, d_hashes.data(), d_results);
   }
 
   return results;
@@ -426,6 +634,20 @@ std::unique_ptr<cudf::column> minhash(cudf::strings_column_view const& input,
   return build_list_result(input.parent(), std::move(hashes), parameter_a.size(), stream, mr);
 }
 
+std::unique_ptr<cudf::column> minhash_ngrams(cudf::lists_column_view const& input,
+                                             cudf::size_type ngrams,
+                                             uint32_t seed,
+                                             cudf::device_span<uint32_t const> parameter_a,
+                                             cudf::device_span<uint32_t const> parameter_b,
+                                             rmm::cuda_stream_view stream,
+                                             rmm::device_async_resource_ref mr)
+{
+  using HashFunction = cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>;
+  auto hashes        = detail::minhash_ngrams_fn<HashFunction>(
+    input, ngrams, seed, parameter_a, parameter_b, stream, mr);
+  return build_list_result(input.parent(), std::move(hashes), parameter_a.size(), stream, mr);
+}
+
 std::unique_ptr<cudf::column> minhash64(cudf::strings_column_view const& input,
                                         uint64_t seed,
                                         cudf::device_span<uint64_t const> parameter_a,
@@ -440,6 +662,20 @@ std::unique_ptr<cudf::column> minhash64(cudf::strings_column_view const& input,
   return build_list_result(input.parent(), std::move(hashes), parameter_a.size(), stream, mr);
 }
 
+std::unique_ptr<cudf::column> minhash64_ngrams(cudf::lists_column_view const& input,
+                                               cudf::size_type ngrams,
+                                               uint64_t seed,
+                                               cudf::device_span<uint64_t const> parameter_a,
+                                               cudf::device_span<uint64_t const> parameter_b,
+                                               rmm::cuda_stream_view stream,
+                                               rmm::device_async_resource_ref mr)
+{
+  using HashFunction = cudf::hashing::detail::MurmurHash3_x64_128<cudf::string_view>;
+  auto hashes        = detail::minhash_ngrams_fn<HashFunction>(
+    input, ngrams, seed, parameter_a, parameter_b, stream, mr);
+  return build_list_result(input.parent(), std::move(hashes), parameter_a.size(), stream, mr);
+}
+
 }  // namespace detail
 
 std::unique_ptr<cudf::column> minhash(cudf::strings_column_view const& input,
@@ -454,6 +690,19 @@ std::unique_ptr<cudf::column> minhash(cudf::strings_column_view const& input,
   return detail::minhash(input, seed, parameter_a, parameter_b, width, stream, mr);
 }
 
+std::unique_ptr<cudf::column> minhash_ngrams(cudf::lists_column_view const& input,
+                                             cudf::size_type ngrams,
+                                             uint32_t seed,
+                                             cudf::device_span<uint32_t const> parameter_a,
+                                             cudf::device_span<uint32_t const> parameter_b,
+                                             rmm::cuda_stream_view stream,
+                                             rmm::device_async_resource_ref mr)
+
+{
+  CUDF_FUNC_RANGE();
+  return detail::minhash_ngrams(input, ngrams, seed, parameter_a, parameter_b, stream, mr);
+}
+
 std::unique_ptr<cudf::column> minhash64(cudf::strings_column_view const& input,
                                         uint64_t seed,
                                         cudf::device_span<uint64_t const> parameter_a,
@@ -466,4 +715,17 @@ std::unique_ptr<cudf::column> minhash64(cudf::strings_column_view const& input,
   return detail::minhash64(input, seed, parameter_a, parameter_b, width, stream, mr);
 }
 
+std::unique_ptr<cudf::column> minhash64_ngrams(cudf::lists_column_view const& input,
+                                               cudf::size_type ngrams,
+                                               uint64_t seed,
+                                               cudf::device_span<uint64_t const> parameter_a,
+                                               cudf::device_span<uint64_t const> parameter_b,
+                                               rmm::cuda_stream_view stream,
+                                               rmm::device_async_resource_ref mr)
+
+{
+  CUDF_FUNC_RANGE();
+  return detail::minhash64_ngrams(input, ngrams, seed, parameter_a, parameter_b, stream, mr);
+}
+
 }  // namespace nvtext
diff --git a/cpp/tests/groupby/tdigest_tests.cu b/cpp/tests/groupby/tdigest_tests.cu
index 883a5093bd1..ad92e322ee2 100644
--- a/cpp/tests/groupby/tdigest_tests.cu
+++ b/cpp/tests/groupby/tdigest_tests.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,6 +30,8 @@
 #include <thrust/fill.h>
 #include <thrust/iterator/counting_iterator.h>
 
+#include <functional>
+
 namespace {
 /**
  * @brief Functor to generate a tdigest by key.
diff --git a/cpp/tests/io/metadata_utilities.cpp b/cpp/tests/io/metadata_utilities.cpp
index 380d66c53f9..980d8d8b3d1 100644
--- a/cpp/tests/io/metadata_utilities.cpp
+++ b/cpp/tests/io/metadata_utilities.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,8 @@
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/io_metadata_utilities.hpp>
 
+#include <functional>
+
 namespace cudf::test {
 
 void expect_metadata_equal(cudf::io::table_input_metadata in_meta,
diff --git a/cpp/tests/io/parquet_writer_test.cpp b/cpp/tests/io/parquet_writer_test.cpp
index e201dc0565c..d99e19822c0 100644
--- a/cpp/tests/io/parquet_writer_test.cpp
+++ b/cpp/tests/io/parquet_writer_test.cpp
@@ -33,6 +33,7 @@
 
 #include <array>
 #include <fstream>
+#include <functional>
 
 using cudf::test::iterators::no_nulls;
 
diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp
index 5f911597b02..c6c419706e0 100644
--- a/cpp/tests/reductions/scan_tests.cpp
+++ b/cpp/tests/reductions/scan_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,6 +28,7 @@
 #include <thrust/tuple.h>
 
 #include <algorithm>
+#include <functional>
 #include <numeric>
 
 using aggregation      = cudf::aggregation;
diff --git a/cpp/tests/text/minhash_tests.cpp b/cpp/tests/text/minhash_tests.cpp
index 8bfb17e0efd..db43484ab09 100644
--- a/cpp/tests/text/minhash_tests.cpp
+++ b/cpp/tests/text/minhash_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -187,6 +187,15 @@ TEST_F(MinHashTest, EmptyTest)
   auto params64 = cudf::test::fixed_width_column_wrapper<uint64_t>({1, 2, 3});
   results = nvtext::minhash64(view, 0, cudf::column_view(params64), cudf::column_view(params64), 4);
   EXPECT_EQ(results->size(), 0);
+
+  auto empty = cudf::test::lists_column_wrapper<cudf::string_view>();
+  auto lview = cudf::lists_column_view(empty);
+  results =
+    nvtext::minhash_ngrams(lview, 4, 0, cudf::column_view(params), cudf::column_view(params));
+  EXPECT_EQ(results->size(), 0);
+  results =
+    nvtext::minhash64_ngrams(lview, 4, 0, cudf::column_view(params64), cudf::column_view(params64));
+  EXPECT_EQ(results->size(), 0);
 }
 
 TEST_F(MinHashTest, ErrorsTest)
@@ -194,17 +203,20 @@ TEST_F(MinHashTest, ErrorsTest)
   auto input = cudf::test::strings_column_wrapper({"this string intentionally left blank"});
   auto view  = cudf::strings_column_view(input);
   auto empty = cudf::test::fixed_width_column_wrapper<uint32_t>();
-  EXPECT_THROW(nvtext::minhash(view, 0, cudf::column_view(empty), cudf::column_view(empty), 0),
-               std::invalid_argument);
+  auto eview = cudf::column_view(empty);
+  EXPECT_THROW(nvtext::minhash(view, 0, eview, eview, 0), std::invalid_argument);
   auto empty64 = cudf::test::fixed_width_column_wrapper<uint64_t>();
-  EXPECT_THROW(
-    nvtext::minhash64(view, 0, cudf::column_view(empty64), cudf::column_view(empty64), 0),
-    std::invalid_argument);
-  EXPECT_THROW(nvtext::minhash(view, 0, cudf::column_view(empty), cudf::column_view(empty), 4),
-               std::invalid_argument);
-  EXPECT_THROW(
-    nvtext::minhash64(view, 0, cudf::column_view(empty64), cudf::column_view(empty64), 4),
-    std::invalid_argument);
+  auto eview64 = cudf::column_view(empty64);
+  EXPECT_THROW(nvtext::minhash64(view, 0, eview64, eview64, 0), std::invalid_argument);
+  EXPECT_THROW(nvtext::minhash(view, 0, eview, eview, 4), std::invalid_argument);
+  EXPECT_THROW(nvtext::minhash64(view, 0, eview64, eview64, 4), std::invalid_argument);
+
+  auto empty_list = cudf::test::lists_column_wrapper<cudf::string_view>();
+  auto lview      = cudf::lists_column_view(empty_list);
+  EXPECT_THROW(nvtext::minhash_ngrams(lview, 0, 0, eview, eview), std::invalid_argument);
+  EXPECT_THROW(nvtext::minhash64_ngrams(lview, 0, 0, eview64, eview64), std::invalid_argument);
+  EXPECT_THROW(nvtext::minhash_ngrams(lview, 4, 0, eview, eview), std::invalid_argument);
+  EXPECT_THROW(nvtext::minhash64_ngrams(lview, 4, 0, eview64, eview64), std::invalid_argument);
 
   std::vector<std::string> h_input(50000, "");
   input = cudf::test::strings_column_wrapper(h_input.begin(), h_input.end());
@@ -212,16 +224,133 @@ TEST_F(MinHashTest, ErrorsTest)
 
   auto const zeroes = thrust::constant_iterator<uint32_t>(0);
   auto params       = cudf::test::fixed_width_column_wrapper<uint32_t>(zeroes, zeroes + 50000);
-  EXPECT_THROW(nvtext::minhash(view, 0, cudf::column_view(params), cudf::column_view(params), 4),
-               std::overflow_error);
+  auto pview        = cudf::column_view(params);
+  EXPECT_THROW(nvtext::minhash(view, 0, pview, pview, 4), std::overflow_error);
   auto params64 = cudf::test::fixed_width_column_wrapper<uint64_t>(zeroes, zeroes + 50000);
-  EXPECT_THROW(
-    nvtext::minhash64(view, 0, cudf::column_view(params64), cudf::column_view(params64), 4),
-    std::overflow_error);
-
-  EXPECT_THROW(nvtext::minhash(view, 0, cudf::column_view(params), cudf::column_view(empty), 4),
-               std::invalid_argument);
-  EXPECT_THROW(
-    nvtext::minhash64(view, 0, cudf::column_view(params64), cudf::column_view(empty64), 4),
-    std::invalid_argument);
+  auto pview64  = cudf::column_view(params64);
+  EXPECT_THROW(nvtext::minhash64(view, 0, pview64, pview64, 4), std::overflow_error);
+
+  auto offsets = cudf::test::fixed_width_column_wrapper<int32_t>(
+    thrust::counting_iterator<cudf::size_type>(0),
+    thrust::counting_iterator<cudf::size_type>(h_input.size() + 1));
+  auto input_ngrams =
+    cudf::make_lists_column(h_input.size(), offsets.release(), input.release(), 0, {});
+  lview = cudf::lists_column_view(input_ngrams->view());
+  EXPECT_THROW(nvtext::minhash_ngrams(lview, 4, 0, pview, pview), std::overflow_error);
+  EXPECT_THROW(nvtext::minhash64_ngrams(lview, 4, 0, pview64, pview64), std::overflow_error);
+}
+
+TEST_F(MinHashTest, Ngrams)
+{
+  using LCWS = cudf::test::lists_column_wrapper<cudf::string_view>;
+  auto input =
+    LCWS({LCWS{"The", "quick", "brown", "fox", "jumpéd", "over", "the", "lazy", "brown", "dog."},
+          LCWS{"The", "quick", "brown", "fox", "jumpéd", "over", "the", "lazy", "", "dog."},
+          LCWS{"short", "row"}});
+
+  auto view = cudf::lists_column_view(input);
+
+  auto first  = thrust::counting_iterator<uint32_t>(10);
+  auto params = cudf::test::fixed_width_column_wrapper<uint32_t>(first, first + 3);
+  auto results =
+    nvtext::minhash_ngrams(view, 4, 0, cudf::column_view(params), cudf::column_view(params));
+  using LCW32 = cudf::test::lists_column_wrapper<uint32_t>;
+  // clang-format off
+  LCW32 expected({
+    LCW32{ 230924604u,   55492793u, 963436400u},
+    LCW32{ 230924604u,  367515795u, 963436400u},
+    LCW32{2380648568u, 1330223236u, 279797904u}
+  });
+  // clang-format on
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+
+  auto params64 = cudf::test::fixed_width_column_wrapper<uint64_t, uint32_t>(first, first + 3);
+  auto results64 =
+    nvtext::minhash64_ngrams(view, 4, 0, cudf::column_view(params64), cudf::column_view(params64));
+  using LCW64 = cudf::test::lists_column_wrapper<uint64_t>;
+  // clang-format off
+  LCW64 expected64({
+    LCW64{ 208926840193078200ul, 576399628675212695ul,  312927673584437419ul},
+    LCW64{ 677038498284219393ul, 326338087730412201ul,  298455901014050223ul},
+    LCW64{1493265692486268500ul, 720255058049417768ul, 2253087432826260995ul}
+  });
+  // clang-format on
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results64, expected64);
+}
+
+TEST_F(MinHashTest, NgramsWide)
+{
+  auto many     = std::vector<char const*>(1024, "hello");
+  auto str_data = cudf::test::strings_column_wrapper(many.begin(), many.end());
+  auto offsets =
+    cudf::test::fixed_width_column_wrapper<int32_t, uint64_t>({0ul, many.size() / 2, many.size()});
+  auto input = cudf::make_lists_column(2, offsets.release(), str_data.release(), 0, {});
+
+  auto view = cudf::lists_column_view(input->view());
+
+  auto first  = thrust::counting_iterator<uint32_t>(10);
+  auto params = cudf::test::fixed_width_column_wrapper<uint32_t>(first, first + 3);
+  auto results =
+    nvtext::minhash_ngrams(view, 4, 0, cudf::column_view(params), cudf::column_view(params));
+  using LCW32 = cudf::test::lists_column_wrapper<uint32_t>;
+  // clang-format off
+  LCW32 expected({
+    LCW32{ 571536396u, 2346676954u, 4121817512u},
+    LCW32{ 571536396u, 2346676954u, 4121817512u}
+  });
+  // clang-format on
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+
+  auto params64 = cudf::test::fixed_width_column_wrapper<uint64_t, uint32_t>(first, first + 3);
+  auto results64 =
+    nvtext::minhash64_ngrams(view, 4, 0, cudf::column_view(params64), cudf::column_view(params64));
+  using LCW64 = cudf::test::lists_column_wrapper<uint64_t>;
+  // clang-format off
+  LCW64 expected64({
+    LCW64{ 1947142336021414174ul, 1219519365938078011ul, 491896395854741840ul},
+    LCW64{ 1947142336021414174ul, 1219519365938078011ul, 491896395854741840ul}
+  });
+  // clang-format on
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results64, expected64);
+}
+
+TEST_F(MinHashTest, NgramsSliced)
+{
+  using LCWS = cudf::test::lists_column_wrapper<cudf::string_view>;
+  auto input =
+    LCWS({LCWS{"ignored", "row"},
+          LCWS{"The", "quick", "brown", "fox", "jumpéd", "over", "the", "lazy", "brown", "dog."},
+          LCWS{"The", "quick", "brown", "fox", "jumpéd", "over", "the", "lazy", "", "dog."},
+          LCWS{"short", "row"},
+          LCWS{"ignored", "row"}});
+
+  auto view  = cudf::lists_column_view(cudf::slice(input, {1, 4}).front());
+  auto first = thrust::counting_iterator<uint32_t>(10);
+
+  auto params = cudf::test::fixed_width_column_wrapper<uint32_t>(first, first + 3);
+  auto results =
+    nvtext::minhash_ngrams(view, 4, 0, cudf::column_view(params), cudf::column_view(params));
+
+  using LCW32 = cudf::test::lists_column_wrapper<uint32_t>;
+  // clang-format off
+  LCW32 expected({
+    LCW32{ 230924604u,   55492793u, 963436400u},
+    LCW32{ 230924604u,  367515795u, 963436400u},
+    LCW32{2380648568u, 1330223236u, 279797904u}
+  });
+  // clang-format on
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+
+  auto params64 = cudf::test::fixed_width_column_wrapper<uint64_t, uint32_t>(first, first + 3);
+  auto results64 =
+    nvtext::minhash64_ngrams(view, 4, 0, cudf::column_view(params64), cudf::column_view(params64));
+  using LCW64 = cudf::test::lists_column_wrapper<uint64_t>;
+  // clang-format off
+  LCW64 expected64({
+    LCW64{ 208926840193078200ul, 576399628675212695ul,  312927673584437419ul},
+    LCW64{ 677038498284219393ul, 326338087730412201ul,  298455901014050223ul},
+    LCW64{1493265692486268500ul, 720255058049417768ul, 2253087432826260995ul}
+  });
+  // clang-format on
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results64, expected64);
 }
diff --git a/dependencies.yaml b/dependencies.yaml
index c7869eee922..1578dadc793 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -813,7 +813,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - polars>=1.20,<1.23
+          - polars>=1.20,<1.24
   run_cudf_polars_experimental:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index c74da8d0ca9..92b37c4b3f2 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -207,6 +207,7 @@ def clean_all_xml_files(path):
 exclude_patterns = [
     "venv",
     "**/includes/**",
+    "narwhals_test_plugin",
 ]
 
 # The name of the Pygments (syntax highlighting) style to use.
@@ -585,6 +586,7 @@ def on_missing_reference(app, env, node, contnode):
     ("py:class", "pd.DataFrame"),
     ("py:class", "pandas.core.indexes.frozen.FrozenList"),
     ("py:class", "pa.Array"),
+    ("py:class", "pa.Decimal128Type"),
     ("py:class", "ScalarLike"),
     ("py:class", "ParentType"),
     ("py:class", "pyarrow.lib.DataType"),
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index a57ff9a7817..d41e448254c 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -36,6 +36,7 @@
         ColumnBinaryOperand,
         ColumnLike,
         Dtype,
+        DtypeObj,
         ScalarLike,
         SeriesOrIndex,
         SeriesOrSingleColumnIndex,
@@ -1168,7 +1169,7 @@ def _mimic_inplace(
             self._codes = other_col.codes
         return out
 
-    def view(self, dtype: Dtype) -> ColumnBase:
+    def view(self, dtype: DtypeObj) -> ColumnBase:
         raise NotImplementedError(
             "Categorical column views are not currently supported"
         )
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 89ac39b2be5..61f4f7d52fb 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -950,7 +950,7 @@ def copy(self, deep: bool = True) -> Self:
                 ),
             )
 
-    def view(self, dtype: Dtype) -> ColumnBase:
+    def view(self, dtype: DtypeObj) -> ColumnBase:
         """
         View the data underlying a column as different dtype.
         The source column must divide evenly into the size of
@@ -959,13 +959,9 @@ def view(self, dtype: Dtype) -> ColumnBase:
 
         Parameters
         ----------
-        dtype : NumPy dtype, string
+        dtype : Dtype object
             The dtype to view the data as
-
         """
-
-        dtype = cudf.dtype(dtype)
-
         if dtype.kind in ("o", "u", "s"):
             raise TypeError(
                 "Bytes viewed as str without metadata is ambiguous"
@@ -1586,7 +1582,7 @@ def distinct_count(self, dropna: bool = True) -> int:
             self._distinct_count[dropna] = result
             return self._distinct_count[dropna]
 
-    def can_cast_safely(self, to_dtype: Dtype) -> bool:
+    def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
         raise NotImplementedError()
 
     @acquire_spill_lock()
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 92d5c39e69d..213e91d7b3f 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -47,6 +47,7 @@
         ColumnBinaryOperand,
         DatetimeLikeScalar,
         Dtype,
+        DtypeObj,
         ScalarLike,
     )
     from cudf.core.column.numerical import NumericalColumn
@@ -837,7 +838,7 @@ def is_unique(self) -> bool:
     def isin(self, values: Sequence) -> ColumnBase:
         return cudf.core.tools.datetimes._isin_datetimelike(self, values)
 
-    def can_cast_safely(self, to_dtype: Dtype) -> bool:
+    def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
         if to_dtype.kind == "M":  # type: ignore[union-attr]
             to_res, _ = np.datetime_data(to_dtype)
             self_res, _ = np.datetime_data(self.dtype)
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 3c603c8e6ef..8db6f805bce 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -13,7 +13,6 @@
 import pylibcudf as plc
 
 import cudf
-from cudf.api.types import is_scalar
 from cudf.core._internals import binaryop
 from cudf.core.buffer import acquire_spill_lock, as_buffer
 from cudf.core.column.column import ColumnBase
@@ -73,11 +72,8 @@ def __cuda_array_interface__(self):
     def as_decimal_column(
         self,
         dtype: Dtype,
-    ) -> "DecimalBaseColumn":
-        if (
-            isinstance(dtype, cudf.core.dtypes.DecimalDtype)
-            and dtype.scale < self.dtype.scale
-        ):
+    ) -> DecimalBaseColumn:
+        if isinstance(dtype, DecimalDtype) and dtype.scale < self.dtype.scale:
             warnings.warn(
                 "cuDF truncates when downcasting decimals to a lower scale. "
                 "To round, use Series.round() or DataFrame.round()."
@@ -204,22 +200,17 @@ def normalize_binop_value(self, other) -> Self | cudf.Scalar:
                     other = other.astype(self.dtype)
             return other
         if isinstance(other, cudf.Scalar) and isinstance(
-            # TODO: Should it be possible to cast scalars of other numerical
-            # types to decimal?
             other.dtype,
-            cudf.core.dtypes.DecimalDtype,
+            DecimalDtype,
         ):
+            # TODO: Should it be possible to cast scalars of other numerical
+            # types to decimal?
             if _same_precision_and_scale(self.dtype, other.dtype):
                 other = other.astype(self.dtype)
             return other
-        elif is_scalar(other) and isinstance(other, (int, Decimal)):
-            other = Decimal(other)
-            metadata = other.as_tuple()
-            precision = max(len(metadata.digits), metadata.exponent)
-            scale = -cast(int, metadata.exponent)
-            return cudf.Scalar(
-                other, dtype=self.dtype.__class__(precision, scale)
-            )
+        elif isinstance(other, (int, Decimal)):
+            dtype = self.dtype._from_decimal(Decimal(other))
+            return cudf.Scalar(other, dtype=dtype)
         return NotImplemented
 
     def as_numerical_column(
@@ -373,11 +364,6 @@ def __init__(
             children=children,
         )
 
-    def __setitem__(self, key, value):
-        if isinstance(value, np.integer):
-            value = int(value)
-        super().__setitem__(key, value)
-
     @classmethod
     def from_arrow(cls, data: pa.Array):
         dtype = Decimal64Dtype.from_arrow(data.type)
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 28e8b98edfe..b82ec1958fb 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -43,6 +43,7 @@
         ColumnBinaryOperand,
         ColumnLike,
         Dtype,
+        DtypeObj,
         ScalarLike,
         SeriesOrIndex,
     )
@@ -5532,6 +5533,120 @@ def minhash64(
             self._column.minhash64(seed, a_column, b_column, width)  # type: ignore[arg-type]
         )
 
+    def minhash_ngrams(
+        self, ngrams: int, seed: np.uint32, a: ColumnLike, b: ColumnLike
+    ) -> SeriesOrIndex:
+        """
+        Compute the minhash of a list column of strings.
+
+        This uses the MurmurHash3_x86_32 algorithm for the hash function.
+
+        Calculation uses the formula (hv * a + b) % mersenne_prime
+        where hv is the hash of a ngrams of strings within each row,
+        a and b are provided values and mersenne_prime is 2^61-1.
+
+        Parameters
+        ----------
+        ngrams : int
+            Number of strings to hash within each row.
+        seed : uint32
+            The seed used for the hash algorithm.
+        a : ColumnLike
+            Values for minhash calculation.
+            Must be of type uint32.
+        b : ColumnLike
+            Values for minhash calculation.
+            Must be of type uint32.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> import numpy as np
+        >>> s = cudf.Series([['this', 'is', 'my'], ['favorite', 'book']])
+        >>> a = cudf.Series([1, 2, 3], dtype=np.uint32)
+        >>> b = cudf.Series([4, 5, 6], dtype=np.uint32)
+        >>> s.str.minhash_ngrams(ngrams=2, seed=0, a=a, b=b)
+        0      [416367551, 832735099, 1249102647]
+        1    [1906668704, 3813337405, 1425038810]
+        dtype: list
+        """
+        a_column = column.as_column(a)
+        if a_column.dtype != np.uint32:
+            raise ValueError(
+                f"Expecting a Series with dtype uint32, got {type(a)}"
+            )
+        b_column = column.as_column(b)
+        if b_column.dtype != np.uint32:
+            raise ValueError(
+                f"Expecting a Series with dtype uint32, got {type(b)}"
+            )
+        plc_column = plc.nvtext.minhash.minhash_ngrams(
+            self._column.to_pylibcudf(mode="read"),
+            ngrams,
+            seed,
+            a._column.to_pylibcudf(mode="read"),
+            b._column.to_pylibcudf(mode="read"),
+        )
+        result = ColumnBase.from_pylibcudf(plc_column)
+        return self._return_or_inplace(result)
+
+    def minhash64_ngrams(
+        self, ngrams: int, seed: np.uint64, a: ColumnLike, b: ColumnLike
+    ) -> SeriesOrIndex:
+        """
+        Compute the minhash of a list column of strings.
+
+        This uses the MurmurHash3_x64_128 algorithm for the hash function.
+
+        Calculation uses the formula (hv * a + b) % mersenne_prime
+        where hv is the hash of a ngrams of strings within each row,
+        a and b are provided values and mersenne_prime is 2^61-1.
+
+        Parameters
+        ----------
+        ngrams : int
+            Number of strings to hash within each row.
+        seed : uint64
+            The seed used for the hash algorithm.
+        a : ColumnLike
+            Values for minhash calculation.
+            Must be of type uint64.
+        b : ColumnLike
+            Values for minhash calculation.
+            Must be of type uint64.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> import numpy as np
+        >>> s = cudf.Series([['this', 'is', 'my'], ['favorite', 'book']])
+        >>> a = cudf.Series([2, 3], dtype=np.uint64)
+        >>> b = cudf.Series([5, 6], dtype=np.uint64)
+        >>> s.str.minhash64_ngrams(ngrams=2, seed=0, a=a, b=b)
+        0    [1304293339825194559, 1956440009737791829]
+        1     [472203876238918632, 1861227318965224922]
+        dtype: list
+        """
+        a_column = column.as_column(a)
+        if a_column.dtype != np.uint64:
+            raise ValueError(
+                f"Expecting a Series with dtype uint64, got {type(a)}"
+            )
+        b_column = column.as_column(b)
+        if b_column.dtype != np.uint64:
+            raise ValueError(
+                f"Expecting a Series with dtype uint64, got {type(b)}"
+            )
+        plc_column = plc.nvtext.minhash.minhash64_ngrams(
+            self._column.to_pylibcudf(mode="read"),
+            ngrams,
+            seed,
+            a._column.to_pylibcudf(mode="read"),
+            b._column.to_pylibcudf(mode="read"),
+        )
+        result = ColumnBase.from_pylibcudf(plc_column)
+        return self._return_or_inplace(result)
+
     def jaccard_index(self, input: cudf.Series, width: int) -> SeriesOrIndex:
         """
         Compute the Jaccard index between this column and the given
@@ -5640,7 +5755,7 @@ def __init__(
         if not isinstance(data, Buffer):
             raise ValueError("data must be a Buffer")
         if dtype != CUDF_STRING_DTYPE:
-            raise ValueError(f"dtypy must be {CUDF_STRING_DTYPE}")
+            raise ValueError(f"dtype must be {CUDF_STRING_DTYPE}")
         if len(children) > 1:
             raise ValueError("StringColumn must have at most 1 offset column.")
 
@@ -5826,23 +5941,22 @@ def __contains__(self, item: ScalarLike) -> bool:
         other = [item] if is_scalar(item) else item
         return self.contains(column.as_column(other, dtype=self.dtype)).any()
 
-    def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
-        out_dtype = cudf.api.types.dtype(dtype)
-        if out_dtype.kind == "b":
+    def as_numerical_column(self, dtype: np.dtype) -> NumericalColumn:
+        if dtype.kind == "b":
             with acquire_spill_lock():
                 plc_column = plc.strings.attributes.count_characters(
                     self.to_pylibcudf(mode="read")
                 )
                 result = ColumnBase.from_pylibcudf(plc_column)
             return (result > np.int8(0)).fillna(False)
-        elif out_dtype.kind in {"i", "u"}:
+        elif dtype.kind in {"i", "u"}:
             if not self.is_integer().all():
                 raise ValueError(
                     "Could not convert strings to integer "
                     "type due to presence of non-integer values."
                 )
             cast_func = plc.strings.convert.convert_integers.to_integers
-        elif out_dtype.kind == "f":
+        elif dtype.kind == "f":
             if not self.is_float().all():
                 raise ValueError(
                     "Could not convert strings to float "
@@ -5850,10 +5964,8 @@ def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
                 )
             cast_func = plc.strings.convert.convert_floats.to_floats
         else:
-            raise ValueError(
-                f"dtype must be a numerical type, not {out_dtype}"
-            )
-        plc_dtype = dtype_to_pylibcudf_type(out_dtype)
+            raise ValueError(f"dtype must be a numerical type, not {dtype}")
+        plc_dtype = dtype_to_pylibcudf_type(dtype)
         with acquire_spill_lock():
             return type(self).from_pylibcudf(  # type: ignore[return-value]
                 cast_func(self.to_pylibcudf(mode="read"), plc_dtype)
@@ -5973,17 +6085,15 @@ def to_pandas(
         else:
             return super().to_pandas(nullable=nullable, arrow_type=arrow_type)
 
-    def can_cast_safely(self, to_dtype: Dtype) -> bool:
-        to_dtype = cudf.api.types.dtype(to_dtype)
-
+    def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
         if self.dtype == to_dtype:
             return True
-        elif to_dtype.kind in {"i", "u"} and not self.is_integer().all():
-            return False
-        elif to_dtype.kind == "f" and not self.is_float().all():
-            return False
-        else:
+        elif to_dtype.kind in {"i", "u"} and self.is_integer().all():
+            return True
+        elif to_dtype.kind == "f" and self.is_float().all():
             return True
+        else:
+            return False
 
     def find_and_replace(
         self,
@@ -6122,12 +6232,11 @@ def _binaryop(
         return NotImplemented
 
     @copy_docstring(ColumnBase.view)
-    def view(self, dtype) -> ColumnBase:
+    def view(self, dtype: DtypeObj) -> ColumnBase:
         if self.null_count > 0:
             raise ValueError(
                 "Can not produce a view of a string column with nulls"
             )
-        dtype = cudf.api.types.dtype(dtype)
         str_byte_offset = self.base_children[0].element_indexing(self.offset)
         str_end_byte_offset = self.base_children[0].element_indexing(
             self.offset + self.size
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 8b0ef9f0cc8..e4d47f492c2 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -28,7 +28,12 @@
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
-    from cudf._typing import ColumnBinaryOperand, DatetimeLikeScalar, Dtype
+    from cudf._typing import (
+        ColumnBinaryOperand,
+        DatetimeLikeScalar,
+        Dtype,
+        DtypeObj,
+    )
 
 _unit_to_nanoseconds_conversion = {
     "ns": 1,
@@ -309,7 +314,9 @@ def total_seconds(self) -> ColumnBase:
         # https://github.com/rapidsai/cudf/issues/17664
         return (
             (self.astype(np.dtype(np.int64)) * conversion)
-            .astype(cudf.Decimal128Dtype(38, 9))
+            .astype(
+                cudf.Decimal128Dtype(cudf.Decimal128Dtype.MAX_PRECISION, 9)
+            )
             .round(decimals=abs(int(math.log10(conversion))))
             .astype(np.dtype(np.float64))
         )
@@ -378,10 +385,10 @@ def find_and_replace(
             ),
         )
 
-    def can_cast_safely(self, to_dtype: Dtype) -> bool:
-        if to_dtype.kind == "m":  # type: ignore[union-attr]
+    def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
+        if to_dtype.kind == "m":
             to_res, _ = np.datetime_data(to_dtype)
-            self_res, _ = np.datetime_data(self.dtype)
+            self_res = self.time_unit
 
             max_int = np.iinfo(np.int64).max
 
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 977208f5eb4..ac9c4d23cc2 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -776,35 +776,36 @@ def _recursively_replace_fields(self, result: dict) -> dict:
 class DecimalDtype(_BaseDtype):
     _metadata = ("precision", "scale")
 
-    def __init__(self, precision, scale=0):
+    def __init__(self, precision: int, scale: int = 0) -> None:
         self._validate(precision, scale)
-        self._typ = pa.decimal128(precision, scale)
+        self._precision = precision
+        self._scale = scale
 
     @property
-    def str(self):
+    def str(self) -> str:
         return f"{self.name!s}({self.precision}, {self.scale})"
 
     @property
-    def precision(self):
+    def precision(self) -> int:
         """
         The decimal precision, in number of decimal digits (an integer).
         """
-        return self._typ.precision
+        return self._precision
 
     @precision.setter
-    def precision(self, value):
+    def precision(self, value: int) -> None:
         self._validate(value, self.scale)
-        self._typ = pa.decimal128(precision=value, scale=self.scale)
+        self._precision = value
 
     @property
-    def scale(self):
+    def scale(self) -> int:
         """
         The decimal scale (an integer).
         """
-        return self._typ.scale
+        return self._scale
 
     @property
-    def itemsize(self):
+    def itemsize(self) -> int:
         """
         Length of one column element in bytes.
         """
@@ -815,14 +816,14 @@ def type(self):
         # might need to account for precision and scale here
         return decimal.Decimal
 
-    def to_arrow(self):
+    def to_arrow(self) -> pa.Decimal128Type:
         """
         Return the equivalent ``pyarrow`` dtype.
         """
-        return self._typ
+        return pa.decimal128(self.precision, self.scale)
 
     @classmethod
-    def from_arrow(cls, typ):
+    def from_arrow(cls, typ: pa.Decimal128Type) -> Self:
         """
         Construct a cudf decimal dtype from a ``pyarrow`` dtype
 
@@ -856,23 +857,23 @@ def __repr__(self):
         )
 
     @classmethod
-    def _validate(cls, precision, scale=0):
+    def _validate(cls, precision: int, scale: int) -> None:
         if precision > cls.MAX_PRECISION:
             raise ValueError(
                 f"Cannot construct a {cls.__name__}"
                 f" with precision > {cls.MAX_PRECISION}"
             )
         if abs(scale) > precision:
-            raise ValueError(f"scale={scale} exceeds precision={precision}")
+            raise ValueError(f"{scale=} cannot exceed {precision=}")
 
     @classmethod
-    def _from_decimal(cls, decimal):
+    def _from_decimal(cls, decimal: decimal.Decimal) -> Self:
         """
         Create a cudf.DecimalDtype from a decimal.Decimal object
         """
         metadata = decimal.as_tuple()
-        precision = max(len(metadata.digits), -metadata.exponent)
-        return cls(precision, -metadata.exponent)
+        precision = max(len(metadata.digits), -metadata.exponent)  # type: ignore[operator]
+        return cls(precision, -metadata.exponent)  # type: ignore[operator]
 
     def serialize(self) -> tuple[dict, list]:
         return (
@@ -885,7 +886,7 @@ def serialize(self) -> tuple[dict, list]:
         )
 
     @classmethod
-    def deserialize(cls, header: dict, frames: list):
+    def deserialize(cls, header: dict, frames: list) -> Self:
         _check_type(cls, header, frames, is_valid_class=issubclass)
         return cls(header["precision"], header["scale"])
 
@@ -896,8 +897,8 @@ def __eq__(self, other: Dtype) -> bool:
             return False
         return self.precision == other.precision and self.scale == other.scale
 
-    def __hash__(self):
-        return hash(self._typ)
+    def __hash__(self) -> int:
+        return hash(self.to_arrow())
 
 
 @doc_apply(
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index c5d2fd349e9..7d76907916f 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -1526,9 +1526,9 @@ def pivot_table(
     ----------
     data : DataFrame
     values : column name or list of column names to aggregate, optional
-    index : list of column names
+    index : scalar or list of column names
             Values to group by in the rows.
-    columns : list of column names
+    columns : scalar or list of column names
             Values to group by in the columns.
     aggfunc : str or dict, default "mean"
             If dict is passed, the key is column to aggregate
@@ -1562,6 +1562,11 @@ def pivot_table(
     if sort is not True:
         raise NotImplementedError("sort is not supported yet")
 
+    if is_scalar(index):
+        index = [index]
+    if is_scalar(columns):
+        columns = [columns]
+
     keys = index + columns
 
     values_passed = values is not None
@@ -1620,15 +1625,8 @@ def pivot_table(
         table = table.fillna(fill_value)
 
     # discard the top level
-    if values_passed and not values_multi and table._data.multiindex:
-        column_names = table._data.level_names[1:]
-        table_columns = tuple(
-            map(lambda column: column[1:], table._column_names)
-        )
-        table.columns = pd.MultiIndex.from_tuples(
-            tuples=table_columns, names=column_names
-        )
-
+    if values_passed and not values_multi and table._data.nlevels > 1:
+        table.columns = table._data.to_pandas_index.droplevel(0)
     if len(index) == 0 and len(columns) > 0:
         table = table.T
 
diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py
index cf85282cccb..29139768a36 100644
--- a/python/cudf/cudf/core/scalar.py
+++ b/python/cudf/cudf/core/scalar.py
@@ -85,9 +85,9 @@ def _preprocess_host_value(value, dtype) -> tuple[ScalarLike, Dtype]:
         return value.as_py(), dtype
 
     if isinstance(dtype, cudf.core.dtypes.DecimalDtype):
-        value = pa.scalar(
-            value, type=pa.decimal128(dtype.precision, dtype.scale)
-        ).as_py()
+        if isinstance(value, np.integer):
+            value = int(value)
+        value = pa.scalar(value, type=dtype.to_arrow()).as_py()
     if isinstance(value, decimal.Decimal) and dtype is None:
         dtype = cudf.Decimal128Dtype._from_decimal(value)
 
diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py
index 52fc945709e..742a6b57e59 100644
--- a/python/cudf/cudf/pandas/__init__.py
+++ b/python/cudf/cudf/pandas/__init__.py
@@ -8,12 +8,17 @@
 import pylibcudf
 import rmm.mr
 
-from .fast_slow_proxy import is_proxy_instance, is_proxy_object
+from .fast_slow_proxy import (
+    as_proxy_object,
+    is_proxy_instance,
+    is_proxy_object,
+)
 from .magics import load_ipython_extension
 from .profiler import Profiler
 
 __all__ = [
     "Profiler",
+    "as_proxy_object",
     "install",
     "is_proxy_instance",
     "is_proxy_object",
diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py
index 45944452c17..147971e8bee 100644
--- a/python/cudf/cudf/pandas/fast_slow_proxy.py
+++ b/python/cudf/cudf/pandas/fast_slow_proxy.py
@@ -151,7 +151,7 @@ def make_final_proxy_type(
     additional_attributes
         Mapping of additional attributes to add to the class
        (optional), these will override any defaulted attributes (e.g.
-       ``__init__`). If you want to remove a defaulted attribute
+       ``__init__``). If you want to remove a defaulted attribute
        completely, pass the special sentinel ``_DELETE`` as a value.
     postprocess
         Optional function called to allow the proxy to postprocess
@@ -1335,6 +1335,31 @@ def _get_proxy_base_class(cls):
     return object
 
 
+def as_proxy_object(obj: Any) -> Any:
+    """
+    Wraps a cudf or pandas object in a proxy object if applicable.
+
+    There will be no memory transfer, i.e., GPU objects stay on GPU and
+    CPU objects stay on CPU. The object will be wrapped in a
+    proxy object. This is useful for ensuring that the object is
+    compatible with the fast-slow proxy system.
+
+    Parameters
+    ----------
+    obj : Any
+        The object to wrap.
+
+    Returns
+    -------
+    Any
+        The wrapped proxy object if applicable, otherwise the original object.
+    """
+    if _is_final_type(obj):
+        typ = get_final_type_map()[type(obj)]
+        return typ._fsproxy_wrap(obj, None)
+    return obj
+
+
 def is_proxy_instance(obj, type):
     return is_proxy_object(obj) and obj.__class__.__name__ == type.__name__
 
diff --git a/python/cudf/cudf/testing/__init__.py b/python/cudf/cudf/testing/__init__.py
index 4e92b43b9f9..a4afa54f754 100644
--- a/python/cudf/cudf/testing/__init__.py
+++ b/python/cudf/cudf/testing/__init__.py
@@ -1,5 +1,6 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
+from cudf.testing import narwhals_test_plugin
 from cudf.testing.testing import (
     assert_eq,
     assert_frame_equal,
diff --git a/python/cudf/cudf/testing/narwhals_test_plugin.py b/python/cudf/cudf/testing/narwhals_test_plugin.py
new file mode 100644
index 00000000000..d794bd0120a
--- /dev/null
+++ b/python/cudf/cudf/testing/narwhals_test_plugin.py
@@ -0,0 +1,25 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Plugin for running narwhals test suite with cudf."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from collections.abc import Mapping
+
+EXPECTED_FAILURES: Mapping[str, str] = {
+    "tests/frame/select_test.py::test_select_duplicates[cudf]": "cuDF doesn't support having multiple columns with same names",
+}
+
+
+def pytest_collection_modifyitems(session, config, items) -> None:
+    """Mark known failing tests."""
+    import pytest
+
+    for item in items:
+        if item.nodeid in EXPECTED_FAILURES:
+            exp_val = EXPECTED_FAILURES[item.nodeid]
+            item.add_marker(pytest.mark.xfail(reason=exp_val))
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py
index 2996a88c171..b7cd2388f30 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/test_column.py
@@ -290,6 +290,8 @@ def test_column_chunked_array_creation():
     ],
 )
 def test_column_view_valid_numeric_to_numeric(data, from_dtype, to_dtype):
+    from_dtype = np.dtype(from_dtype)
+    to_dtype = np.dtype(to_dtype)
     cpu_data = np.asarray(data, dtype=from_dtype)
     gpu_data = as_column(data, dtype=from_dtype)
 
@@ -314,6 +316,8 @@ def test_column_view_valid_numeric_to_numeric(data, from_dtype, to_dtype):
     ],
 )
 def test_column_view_invalid_numeric_to_numeric(data, from_dtype, to_dtype):
+    from_dtype = np.dtype(from_dtype)
+    to_dtype = np.dtype(to_dtype)
     cpu_data = np.asarray(data, dtype=from_dtype)
     gpu_data = as_column(data, dtype=from_dtype)
 
@@ -337,6 +341,7 @@ def test_column_view_invalid_numeric_to_numeric(data, from_dtype, to_dtype):
     ],
 )
 def test_column_view_valid_string_to_numeric(data, to_dtype):
+    to_dtype = np.dtype(to_dtype)
     expect = cudf.Series._from_column(cudf.Series(data)._column.view(to_dtype))
     got = cudf.Series(str_host_view(data, to_dtype))
 
@@ -352,7 +357,7 @@ def test_column_view_nulls_widths_even():
 
     sr = cudf.Series(data, dtype="int32")
     expect = cudf.Series(expect_data, dtype="float32")
-    got = cudf.Series._from_column(sr._column.view("float32"))
+    got = cudf.Series._from_column(sr._column.view(np.dtype(np.float32)))
 
     assert_eq(expect, got)
 
@@ -364,7 +369,7 @@ def test_column_view_nulls_widths_even():
 
     sr = cudf.Series(data, dtype="float64")
     expect = cudf.Series(expect_data, dtype="int64")
-    got = cudf.Series._from_column(sr._column.view("int64"))
+    got = cudf.Series._from_column(sr._column.view(np.dtype(np.int64)))
 
     assert_eq(expect, got)
 
@@ -376,7 +381,7 @@ def test_column_view_numeric_slice(slc):
 
     expect = cudf.Series(data[slc].view("int64"))
     got = cudf.Series._from_column(
-        sr._column.slice(slc.start, slc.stop).view("int64")
+        sr._column.slice(slc.start, slc.stop).view(np.dtype(np.int64))
     )
 
     assert_eq(expect, got)
@@ -389,7 +394,9 @@ def test_column_view_string_slice(slc):
     data = ["a", "bcde", "cd", "efg", "h"]
 
     expect = cudf.Series._from_column(
-        cudf.Series(data)._column.slice(slc.start, slc.stop).view("int8")
+        cudf.Series(data)
+        ._column.slice(slc.start, slc.stop)
+        .view(np.dtype(np.int8))
     )
     got = cudf.Series(str_host_view(data[slc], "int8"))
 
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index 7fbe072dde7..eae73e47955 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -798,6 +798,25 @@ def test_dataframe_pivot_table_simple(aggfunc, fill_value):
     assert_eq(expected, actual, check_dtype=False)
 
 
+@pytest.mark.parametrize("index", ["A", ["A"]])
+@pytest.mark.parametrize("columns", ["C", ["C"]])
+def test_pivot_table_scalar_index_columns(index, columns):
+    data = {
+        "A": ["one", "one", "two", "three"] * 6,
+        "B": ["A", "B", "C"] * 8,
+        "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
+        "D": range(24),
+        "E": range(24),
+    }
+    result = cudf.DataFrame(data).pivot_table(
+        values="D", index=index, columns=columns, aggfunc="sum"
+    )
+    expected = pd.DataFrame(data).pivot_table(
+        values="D", index=index, columns=columns, aggfunc="sum"
+    )
+    assert_eq(result, expected)
+
+
 def test_crosstab_simple():
     a = np.array(
         [
diff --git a/python/cudf/cudf/tests/text/test_text_methods.py b/python/cudf/cudf/tests/text/test_text_methods.py
index dc45827d2e8..47b41bd1e39 100644
--- a/python/cudf/cudf/tests/text/test_text_methods.py
+++ b/python/cudf/cudf/tests/text/test_text_methods.py
@@ -930,6 +930,48 @@ def test_minhash():
         strings.str.minhash64(1, a=params, b=params, width=8)
 
 
+def test_minhash_ngrams():
+    strings = cudf.Series(
+        [["this", "is", "my"], ["favorite", "book", "today"]]
+    )
+
+    params = cudf.Series([1, 2, 3], dtype=np.uint32)
+    expected = cudf.Series(
+        [
+            cudf.Series([416367548, 832735096, 1249102644], dtype=np.uint32),
+            cudf.Series([1408797893, 2817595786, 4226393679], dtype=np.uint32),
+        ]
+    )
+    actual = strings.str.minhash_ngrams(ngrams=2, seed=0, a=params, b=params)
+    assert_eq(expected, actual)
+
+    params = cudf.Series([1, 2, 3], dtype=np.uint64)
+    expected = cudf.Series(
+        [
+            cudf.Series(
+                [652146669912597278, 1304293339825194556, 1956440009737791826],
+                dtype=np.uint64,
+            ),
+            cudf.Series(
+                [1776622609581023632, 1247402209948353305, 718181810315682986],
+                dtype=np.uint64,
+            ),
+        ]
+    )
+    actual = strings.str.minhash64_ngrams(ngrams=2, seed=0, a=params, b=params)
+    assert_eq(expected, actual)
+
+    # test wrong input types
+    with pytest.raises(ValueError):
+        strings.str.minhash_ngrams(ngrams=7, seed=1, a="a", b="b")
+    with pytest.raises(ValueError):
+        params = cudf.Series([0, 1, 2], dtype=np.int32)
+        strings.str.minhash_ngrams(ngrams=6, seed=1, a=params, b=params)
+    with pytest.raises(ValueError):
+        params = cudf.Series([0, 1, 2], dtype=np.uint32)
+        strings.str.minhash64_ngrams(ngrams=8, seed=1, a=params, b=params)
+
+
 def test_jaccard_index():
     str1 = cudf.Series(["the brown dog", "jumped about"])
     str2 = cudf.Series(["the black cat", "jumped around"])
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index fd946937945..2678a4f8116 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -18,9 +18,10 @@
 import cudf.api.types
 from cudf.core import column
 from cudf.core.buffer import as_buffer
+from cudf.utils.dtypes import SIZE_TYPE_DTYPE
 
 # The size of the mask in bytes
-mask_dtype = cudf.api.types.dtype(np.int32)
+mask_dtype = SIZE_TYPE_DTYPE
 mask_bitsize = mask_dtype.itemsize * 8
 
 # Mapping from ufuncs to the corresponding binary operators.
diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
index 47de8fb1435..d3bfd9298c2 100644
--- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
+++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
@@ -44,6 +44,7 @@
     OOMFallbackError,
     TypeFallbackError,
     _Unusable,
+    as_proxy_object,
     is_proxy_object,
 )
 from cudf.testing import assert_eq
@@ -1979,6 +1980,93 @@ def test_numpy_data_access():
     assert type(expected) is type(actual)
 
 
+@pytest.mark.parametrize(
+    "obj",
+    [
+        pd.DataFrame({"a": [1, 2, 3]}),
+        pd.Series([1, 2, 3]),
+        pd.Index([1, 2, 3]),
+        pd.Categorical([1, 2, 3]),
+        pd.to_datetime(["2021-01-01", "2021-01-02"]),
+        pd.to_timedelta(["1 days", "2 days"]),
+        xpd.DataFrame({"a": [1, 2, 3]}),
+        xpd.Series([1, 2, 3]),
+        xpd.Index([1, 2, 3]),
+        xpd.Categorical([1, 2, 3]),
+        xpd.to_datetime(["2021-01-01", "2021-01-02"]),
+        xpd.to_timedelta(["1 days", "2 days"]),
+        cudf.DataFrame({"a": [1, 2, 3]}),
+        cudf.Series([1, 2, 3]),
+        cudf.Index([1, 2, 3]),
+        cudf.Index([1, 2, 3], dtype="category"),
+        cudf.to_datetime(["2021-01-01", "2021-01-02"]),
+        cudf.Index([1, 2, 3], dtype="timedelta64[ns]"),
+        [1, 2, 3],
+        {"a": 1, "b": 2},
+        (1, 2, 3),
+    ],
+)
+def test_as_proxy_object(obj):
+    proxy_obj = as_proxy_object(obj)
+    if isinstance(
+        obj,
+        (
+            pd.DataFrame,
+            pd.Series,
+            pd.Index,
+            pd.Categorical,
+            xpd.DataFrame,
+            xpd.Series,
+            xpd.Index,
+            xpd.Categorical,
+            cudf.DataFrame,
+            cudf.Series,
+            cudf.Index,
+        ),
+    ):
+        assert is_proxy_object(proxy_obj)
+        if isinstance(proxy_obj, xpd.DataFrame):
+            tm.assert_frame_equal(proxy_obj, xpd.DataFrame(obj))
+        elif isinstance(proxy_obj, xpd.Series):
+            tm.assert_series_equal(proxy_obj, xpd.Series(obj))
+        elif isinstance(proxy_obj, xpd.Index):
+            tm.assert_index_equal(proxy_obj, xpd.Index(obj))
+        else:
+            tm.assert_equal(proxy_obj, obj)
+    else:
+        assert not is_proxy_object(proxy_obj)
+        assert proxy_obj == obj
+
+
+def test_as_proxy_object_doesnot_copy_series():
+    s = pd.Series([1, 2, 3])
+    proxy_obj = as_proxy_object(s)
+    s[0] = 10
+    assert proxy_obj[0] == 10
+    tm.assert_series_equal(s, proxy_obj)
+
+
+def test_as_proxy_object_doesnot_copy_dataframe():
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    proxy_obj = as_proxy_object(df)
+    df.iloc[0, 0] = 10
+    assert proxy_obj.iloc[0, 0] == 10
+    tm.assert_frame_equal(df, proxy_obj)
+
+
+def test_as_proxy_object_doesnot_copy_index():
+    idx = pd.Index([1, 2, 3])
+    proxy_obj = as_proxy_object(idx)
+    assert proxy_obj._fsproxy_wrapped is idx
+
+
+def test_as_proxy_object_no_op_for_intermediates():
+    s = pd.Series(["abc", "def", "ghi"])
+    str_attr = s.str
+    proxy_obj = as_proxy_object(str_attr)
+    assert proxy_obj is str_attr
+
+
 def test_pickle_round_trip_proxy_numpy_array(array):
     arr, proxy_arr = array
     pickled_arr = BytesIO()
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index a7b10a6e8fa..9b798688992 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -197,7 +197,6 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/io/test_multiscan.py::test_include_file_paths[scan_csv-write_csv]": "Need to expose include_file_paths xref: cudf#18012",
     "tests/unit/streaming/test_streaming_io.py::test_parquet_eq_statistics[False]": "Debug output on stderr doesn't match",
     # Maybe flaky, order-dependent?
-    "tests/unit/test_projections.py::test_schema_full_outer_join_projection_pd_13287": "Order-specific result check, query is correct but in different order",
     "tests/unit/test_queries.py::test_group_by_agg_equals_zero_3535": "libcudf sums all nulls to null, not zero",
 }
 
diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index 6bb5d78c488..85a4f007cf0 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 
 """Datatype utilities."""
@@ -71,7 +71,9 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
     -------
     True if casting is supported, False otherwise
     """
-    has_empty = from_.id() == plc.TypeId.EMPTY or to.id() == plc.TypeId.EMPTY
+    to_is_empty = to.id() == plc.TypeId.EMPTY
+    from_is_empty = from_.id() == plc.TypeId.EMPTY
+    has_empty = to_is_empty or from_is_empty
     return (
         (
             from_ == to
@@ -84,8 +86,16 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
                 )
             )
         )
-        or (from_.id() == plc.TypeId.STRING and is_numeric_not_bool(to))
-        or (to.id() == plc.TypeId.STRING and is_numeric_not_bool(from_))
+        or (
+            from_.id() == plc.TypeId.STRING
+            and not to_is_empty
+            and is_numeric_not_bool(to)
+        )
+        or (
+            to.id() == plc.TypeId.STRING
+            and not from_is_empty
+            and is_numeric_not_bool(from_)
+        )
     )
 
 
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index 9026a0c29ca..e9fc054efc2 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -19,7 +19,7 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "polars>=1.20,<1.23",
+    "polars>=1.20,<1.24",
     "pylibcudf==25.4.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd
index 9d1e8cba425..bfbb99e8eb0 100644
--- a/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/minhash.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 from libc.stdint cimport uint32_t, uint64_t
 from libcpp.memory cimport unique_ptr
 from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -25,3 +25,19 @@ cdef extern from "nvtext/minhash.hpp" namespace "nvtext" nogil:
         const column_view &b,
         const size_type width,
     ) except +
+
+    cdef unique_ptr[column] minhash_ngrams(
+        const column_view &strings,
+        const size_type ngrams,
+        const uint32_t seed,
+        const column_view &a,
+        const column_view &b,
+    ) except +
+
+    cdef unique_ptr[column] minhash64_ngrams(
+        const column_view &strings,
+        const size_type ngrams,
+        const uint64_t seed,
+        const column_view &a,
+        const column_view &b,
+    ) except +
diff --git a/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd b/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd
index 93f13a7e11f..33749141590 100644
--- a/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/utilities/traits.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 from libcpp cimport bool
 from libcpp.vector cimport vector
 from pylibcudf.exception_handler cimport libcudf_exception_handler
@@ -6,22 +6,22 @@ from pylibcudf.libcudf.types cimport data_type
 
 
 cdef extern from "cudf/utilities/traits.hpp" namespace "cudf" nogil:
-    cdef bool is_relationally_comparable(data_type)
-    cdef bool is_equality_comparable(data_type)
-    cdef bool is_numeric(data_type)
-    cdef bool is_numeric_not_bool(data_type)
-    cdef bool is_index_type(data_type)
-    cdef bool is_unsigned(data_type)
-    cdef bool is_integral(data_type)
-    cdef bool is_integral_not_bool(data_type)
-    cdef bool is_floating_point(data_type)
-    cdef bool is_boolean(data_type)
-    cdef bool is_timestamp(data_type)
-    cdef bool is_fixed_point(data_type)
-    cdef bool is_duration(data_type)
-    cdef bool is_chrono(data_type)
-    cdef bool is_dictionary(data_type)
-    cdef bool is_fixed_width(data_type)
-    cdef bool is_compound(data_type)
-    cdef bool is_nested(data_type)
-    cdef bool is_bit_castable(data_type, data_type)
+    cdef bool is_relationally_comparable(data_type) except +libcudf_exception_handler
+    cdef bool is_equality_comparable(data_type) except +libcudf_exception_handler
+    cdef bool is_numeric(data_type) except +libcudf_exception_handler
+    cdef bool is_numeric_not_bool(data_type) except +libcudf_exception_handler
+    cdef bool is_index_type(data_type) except +libcudf_exception_handler
+    cdef bool is_unsigned(data_type) except +libcudf_exception_handler
+    cdef bool is_integral(data_type) except +libcudf_exception_handler
+    cdef bool is_integral_not_bool(data_type) except +libcudf_exception_handler
+    cdef bool is_floating_point(data_type) except +libcudf_exception_handler
+    cdef bool is_boolean(data_type) except +libcudf_exception_handler
+    cdef bool is_timestamp(data_type) except +libcudf_exception_handler
+    cdef bool is_fixed_point(data_type) except +libcudf_exception_handler
+    cdef bool is_duration(data_type) except +libcudf_exception_handler
+    cdef bool is_chrono(data_type) except +libcudf_exception_handler
+    cdef bool is_dictionary(data_type) except +libcudf_exception_handler
+    cdef bool is_fixed_width(data_type) except +libcudf_exception_handler
+    cdef bool is_compound(data_type) except +libcudf_exception_handler
+    cdef bool is_nested(data_type) except +libcudf_exception_handler
+    cdef bool is_bit_castable(data_type, data_type) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pxd b/python/pylibcudf/pylibcudf/nvtext/minhash.pxd
index 0af53748cdc..f1e099ca7da 100644
--- a/python/pylibcudf/pylibcudf/nvtext/minhash.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libc.stdint cimport uint32_t, uint64_t
 from pylibcudf.column cimport Column
@@ -24,3 +24,19 @@ cpdef Column minhash64(
     Column b,
     size_type width
 )
+
+cpdef Column minhash_ngrams(
+    Column input,
+    size_type width,
+    uint32_t seed,
+    Column a,
+    Column b
+)
+
+cpdef Column minhash64_ngrams(
+    Column input,
+    size_type width,
+    uint64_t seed,
+    Column a,
+    Column b
+)
diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyi b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi
index 5d88cfbbea0..bb50a150798 100644
--- a/python/pylibcudf/pylibcudf/nvtext/minhash.pyi
+++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyi
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from pylibcudf.column import Column
 
@@ -8,3 +8,9 @@ def minhash(
 def minhash64(
     input: Column, seed: int, a: Column, b: Column, width: int
 ) -> Column: ...
+def minhash_ngrams(
+    input: Column, ngrams: int, seed: int, a: Column, b: Column
+) -> Column: ...
+def minhash64_ngrams(
+    input: Column, ngrams: int, seed: int, a: Column, b: Column
+) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx
index 84811cda867..cdc4a4f3ac8 100644
--- a/python/pylibcudf/pylibcudf/nvtext/minhash.pyx
+++ b/python/pylibcudf/pylibcudf/nvtext/minhash.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 from libc.stdint cimport uint32_t, uint64_t
 from libcpp.memory cimport unique_ptr
@@ -8,12 +8,16 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.nvtext.minhash cimport (
     minhash as cpp_minhash,
     minhash64 as cpp_minhash64,
+    minhash_ngrams as cpp_minhash_ngrams,
+    minhash64_ngrams as cpp_minhash64_ngrams,
 )
 from pylibcudf.libcudf.types cimport size_type
 
 __all__ = [
     "minhash",
     "minhash64",
+    "minhash_ngrams",
+    "minhash64_ngrams",
 ]
 
 cpdef Column minhash(
@@ -103,3 +107,93 @@ cpdef Column minhash64(
         )
 
     return Column.from_libcudf(move(c_result))
+
+cpdef Column minhash_ngrams(
+    Column input,
+    size_type ngrams,
+    uint32_t seed,
+    Column a,
+    Column b
+):
+    """
+    Returns the minhash values for each input row of strings.
+    This function uses MurmurHash3_x86_32 for the hash algorithm.
+
+    For details, see :cpp:func:`minhash_ngrams`.
+
+    Parameters
+    ----------
+    input : Column
+        List column of strings to compute minhash
+    ngrams : size_type
+        Number of consecutive strings to hash in each row
+    seed : uint32_t
+        Seed used for the hash function
+    a : Column
+        1st parameter value used for the minhash algorithm.
+    b : Column
+        2nd parameter value used for the minhash algorithm.
+
+    Returns
+    -------
+    Column
+        List column of minhash values for each row per
+        value in columns a and b.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = cpp_minhash_ngrams(
+            input.view(),
+            ngrams,
+            seed,
+            a.view(),
+            b.view()
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+cpdef Column minhash64_ngrams(
+    Column input,
+    size_type ngrams,
+    uint64_t seed,
+    Column a,
+    Column b
+):
+    """
+    Returns the minhash values for each input row of strings.
+    This function uses MurmurHash3_x64_128 for the hash algorithm.
+
+    For details, see :cpp:func:`minhash64_ngrams`.
+
+    Parameters
+    ----------
+    input : Column
+        Strings column to compute minhash
+    ngrams : size_type
+        Number of consecutive strings to hash in each row
+    seed : uint64_t
+        Seed used for the hash function
+    a : Column
+        1st parameter value used for the minhash algorithm.
+    b : Column
+        2nd parameter value used for the minhash algorithm.
+
+    Returns
+    -------
+    Column
+        List column of minhash values for each row per
+        value in columns a and b.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = cpp_minhash64_ngrams(
+            input.view(),
+            ngrams,
+            seed,
+            a.view(),
+            b.view()
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_minhash.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_minhash.py
index ad7a6f7a762..ff8545f0617 100644
--- a/python/pylibcudf/pylibcudf/tests/test_nvtext_minhash.py
+++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_minhash.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 import pyarrow as pa
 import pytest
@@ -33,3 +33,49 @@ def test_minhash(minhash_input_data, width):
     assert pa_result.type == pa.list_(
         pa.field("element", seed_type, nullable=False)
     )
+
+
+@pytest.fixture(scope="module", params=[pa.uint32(), pa.uint64()])
+def minhash_ngrams_input_data(request):
+    input_arr = pa.array(
+        [
+            ["foo", "bar", "foo foo", "bar bar", "foo bar", "bar foo"],
+            [
+                "one",
+                "two",
+                "three",
+                "four",
+                "five",
+                "six",
+                "seven",
+                "eight",
+                "nine",
+                "ten",
+                "eleven",
+            ],
+        ]
+    )
+    ab = pa.array([2, 3, 4, 5], request.param)
+    return input_arr, ab, request.param
+
+
+@pytest.mark.parametrize("ngrams", [5, 10])
+def test_minhash_ngrams(minhash_ngrams_input_data, ngrams):
+    input_arr, ab, seed_type = minhash_ngrams_input_data
+    minhash_func = (
+        plc.nvtext.minhash.minhash_ngrams
+        if seed_type == pa.uint32()
+        else plc.nvtext.minhash.minhash64_ngrams
+    )
+    result = minhash_func(
+        plc.interop.from_arrow(input_arr),
+        ngrams,
+        0,
+        plc.interop.from_arrow(ab),
+        plc.interop.from_arrow(ab),
+    )
+    pa_result = plc.interop.to_arrow(result)
+    assert all(len(got) == len(ab) for got, s in zip(pa_result, input_arr))
+    assert pa_result.type == pa.list_(
+        pa.field("element", seed_type, nullable=False)
+    )