diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 3018295c9eb..73c76cd5d3e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -740,6 +740,7 @@ add_library(
   src/table/table.cpp
   src/table/table_device_view.cu
   src/table/table_view.cpp
+  src/text/dedup.cu
   src/text/detokenize.cu
   src/text/edit_distance.cu
   src/text/generate_ngrams.cu
diff --git a/cpp/include/nvtext/dedup.hpp b/cpp/include/nvtext/dedup.hpp
new file mode 100644
index 00000000000..4595bcdef57
--- /dev/null
+++ b/cpp/include/nvtext/dedup.hpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/column/column.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/utilities/export.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+
+//! NVText APIs
+namespace CUDF_EXPORT nvtext {
+/**
+ * @addtogroup nvtext_replace
+ * @{
+ * @file
+ */
+
+/**
+ * @brief Returns a duplicate strings found in the given input
+ *
+ * The internal implementation creates a suffix array of the input which
+ * requires ~10x the input size for temporary memory.
+ *
+ * The output includes any strings of at least `min_width` bytes that
+ * appear more than once in the entire input.
+ *
+ * @param input Strings column to dedup
+ * @param min_width Minimum number of bytes must match to specify a duplicate
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New strings column with updated strings
+ */
+std::unique_ptr<cudf::column> substring_deduplicate(
+  cudf::strings_column_view const& input,
+  cudf::size_type min_width,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
+/** @} */  // end of group
+}  // namespace CUDF_EXPORT nvtext
diff --git a/cpp/src/text/dedup.cu b/cpp/src/text/dedup.cu
new file mode 100644
index 00000000000..3c59d9121ea
--- /dev/null
+++ b/cpp/src/text/dedup.cu
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/indexalator.cuh>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/sorting.hpp>
+#include <cudf/strings/detail/strings_column_factories.cuh>
+#include <cudf/strings/detail/utilities.cuh>
+#include <cudf/strings/detail/utilities.hpp>
+#include <cudf/strings/string_view.cuh>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+
+#include <nvtext/dedup.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <cuda/std/functional>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/remove.h>
+#include <thrust/sequence.h>
+#include <thrust/sort.h>
+#include <thrust/unique.h>
+
+namespace nvtext {
+namespace detail {
+namespace {
+
+using string_index = cudf::strings::detail::string_index_pair;
+
+struct sort_comparator_fn {
+  char const* d_chars;
+  int64_t chars_size;
+  __device__ bool operator()(int64_t lhs, int64_t rhs) const
+  {
+    constexpr int64_t max_size = cuda::std::numeric_limits<cudf::size_type>::max();
+
+    auto const lhs_size = static_cast<cudf::size_type>(cuda::std::min(max_size, chars_size - lhs));
+    auto const rhs_size = static_cast<cudf::size_type>(cuda::std::min(max_size, chars_size - rhs));
+    auto const lh_str   = cudf::string_view(d_chars + lhs, lhs_size);
+    auto const rh_str   = cudf::string_view(d_chars + rhs, rhs_size);
+    return lh_str < rh_str;
+  }
+};
+
+__device__ cudf::size_type count_common_bytes(cudf::string_view lhs, cudf::string_view rhs)
+{
+  auto const size1 = lhs.size_bytes();
+  auto const size2 = rhs.size_bytes();
+  auto const* ptr1 = lhs.data();
+  auto const* ptr2 = rhs.data();
+
+  cudf::size_type idx = 0;
+  for (; (idx < size1) && (idx < size2); ++idx) {
+    if (*ptr1 != *ptr2) { break; }
+    ++ptr1;
+    ++ptr2;
+  }
+  return idx;
+}
+
+struct find_duplicates_fn {
+  char const* d_chars;
+  int64_t chars_size;
+  cudf::size_type width;
+  int64_t const* d_indices;
+  __device__ int16_t operator()(int64_t idx) const
+  {
+    if (idx == 0) { return 0; }
+    constexpr int64_t max_size = cuda::std::numeric_limits<cudf::size_type>::max();
+
+    auto const lhs      = d_indices[idx - 1];
+    auto const rhs      = d_indices[idx];
+    auto const lhs_size = static_cast<cudf::size_type>(cuda::std::min(max_size, chars_size - lhs));
+    auto const rhs_size = static_cast<cudf::size_type>(cuda::std::min(max_size, chars_size - rhs));
+
+    auto const lh_str = cudf::string_view(d_chars + lhs, lhs_size);
+    auto const rh_str = cudf::string_view(d_chars + rhs, rhs_size);
+
+    constexpr auto max_run_length =
+      static_cast<cudf::size_type>(cuda::std::numeric_limits<int16_t>::max());
+
+    auto const size = cuda::std::min(count_common_bytes(lh_str, rh_str), max_run_length);
+
+    return size >= width ? static_cast<int16_t>(size) : 0;
+  }
+};
+
+struct collapse_overlaps_fn {
+  char const* d_chars;
+  int64_t const* d_offsets;
+  int16_t const* d_sizes;
+  __device__ string_index operator()(int64_t idx) const
+  {
+    auto size   = d_sizes[idx];
+    auto offset = d_offsets[idx];
+    if ((idx > 0) && ((offset - 1) == d_offsets[idx - 1]) && (size < d_sizes[idx - 1])) {
+      return string_index{nullptr, 0};
+    }
+    // TODO: need to handle chains longer than max<int16_t>
+    // size == d_sizes[idx-1] == max<int16_t>
+
+    auto d_ptr = d_chars + offset;
+    return string_index(d_ptr, size);
+  }
+};
+
+}  // namespace
+
+std::unique_ptr<cudf::column> substring_deduplicate(cudf::strings_column_view const& input,
+                                                    cudf::size_type min_width,
+                                                    rmm::cuda_stream_view stream,
+                                                    rmm::device_async_resource_ref mr)
+{
+  CUDF_EXPECTS(min_width > 8, "min_width should be at least 8");
+  auto d_strings = cudf::column_device_view::create(input.parent(), stream);
+
+  auto [first_offset, last_offset] =
+    cudf::strings::detail::get_first_and_last_offset(input, stream);
+
+  auto d_input_chars = input.chars_begin(stream) + first_offset;
+  auto chars_size    = last_offset - first_offset;
+  CUDF_EXPECTS(min_width < chars_size, "min_width value cannot exceed the input size");
+
+  auto indices = rmm::device_uvector<int64_t>(chars_size - min_width + 1, stream);
+  auto sizes   = rmm::device_uvector<int16_t>(indices.size(), stream);
+
+  thrust::sequence(rmm::exec_policy_nosync(stream), indices.begin(), indices.end());
+  // note: thrust::sort may be limited to a 32-bit range
+  thrust::sort(rmm::exec_policy_nosync(stream),
+               indices.begin(),
+               indices.end(),
+               sort_comparator_fn{d_input_chars, chars_size});
+
+  // locate candidate duplicates within the suffixes produced by sort
+  thrust::transform(rmm::exec_policy_nosync(stream),
+                    thrust::counting_iterator<int64_t>(0),
+                    thrust::counting_iterator<int64_t>(indices.size()),
+                    sizes.begin(),
+                    find_duplicates_fn{d_input_chars, chars_size, min_width, indices.data()});
+
+  // remove the non-candidate entries from indices and sizes
+  thrust::remove_if(
+    rmm::exec_policy_nosync(stream),
+    indices.begin(),
+    indices.end(),
+    thrust::counting_iterator<int64_t>(0),
+    [d_sizes = sizes.data()] __device__(int64_t idx) -> bool { return d_sizes[idx] == 0; });
+  auto end = thrust::remove(rmm::exec_policy(stream), sizes.begin(), sizes.end(), 0);
+  sizes.resize(thrust::distance(sizes.begin(), end), stream);
+  indices.resize(sizes.size(), stream);
+
+  // sort the resulting indices/sizes for overlap filtering
+  thrust::sort_by_key(
+    rmm::exec_policy_nosync(stream), indices.begin(), indices.end(), sizes.begin());
+
+  // produce final duplicates for make_strings_column and collapse any overlapping candidates
+  auto duplicates =
+    rmm::device_uvector<cudf::strings::detail::string_index_pair>(indices.size(), stream);
+  thrust::transform(rmm::exec_policy_nosync(stream),
+                    thrust::counting_iterator<int64_t>(0),
+                    thrust::counting_iterator<int64_t>(indices.size()),
+                    duplicates.begin(),
+                    collapse_overlaps_fn{d_input_chars, indices.data(), sizes.data()});
+
+  // filter out the remaining non-viable candidates
+  duplicates.resize(
+    thrust::distance(
+      duplicates.begin(),
+      thrust::remove(
+        rmm::exec_policy(stream), duplicates.begin(), duplicates.end(), string_index{nullptr, 0})),
+    stream);
+
+  // sort result by size descending (should be very fast)
+  thrust::sort(rmm::exec_policy_nosync(stream),
+               duplicates.begin(),
+               duplicates.end(),
+               [] __device__(auto lhs, auto rhs) -> bool { return lhs.second > rhs.second; });
+
+  return cudf::strings::detail::make_strings_column(
+    duplicates.begin(), duplicates.end(), stream, mr);
+}
+}  // namespace detail
+
+std::unique_ptr<cudf::column> substring_deduplicate(cudf::strings_column_view const& input,
+                                                    cudf::size_type min_width,
+                                                    rmm::cuda_stream_view stream,
+                                                    rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::substring_deduplicate(input, min_width, stream, mr);
+}
+
+}  // namespace nvtext
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index e3ca8b70b87..9fbe3d04d5d 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -607,6 +607,7 @@ ConfigureTest(STRUCTS_TEST structs/structs_column_tests.cpp structs/utilities_te
 ConfigureTest(
   TEXT_TEST
   text/bpe_tests.cpp
+  text/dedup_tests.cpp
   text/edit_distance_tests.cpp
   text/jaccard_tests.cpp
   text/minhash_tests.cpp
diff --git a/cpp/tests/text/dedup_tests.cpp b/cpp/tests/text/dedup_tests.cpp
new file mode 100644
index 00000000000..6429b70363a
--- /dev/null
+++ b/cpp/tests/text/dedup_tests.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
+#include <cudf/column/column.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+
+#include <nvtext/dedup.hpp>
+
+#include <vector>
+
+struct TextDedupTest : public cudf::test::BaseFixture {};
+
+TEST_F(TextDedupTest, StringDedup)
+{
+  // https://loremipsum.io/generator?n=25&t=p
+  // clang-format off
+  auto input = cudf::test::strings_column_wrapper({
+    "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ", //  90
+    "01234567890123456789 magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation     ", // 180
+    "laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit   ", // 270
+    "voluptate velit esse cillum dolore eu fugiat nulla pariatur. 01234567890123456789         ", // 360
+    "cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.    ", // 450
+    "Ea esse numquam et recusandae quia et voluptatem sint quo explicabo repudiandae. At nihil ", // 540
+    "sunt non architecto doloremque eos dolorem consequuntur. Vel adipisci quod et voluptatum  ", // 630
+    "quis est fuga tempore qui dignissimos aliquam et sint repellendus ut autem voluptas quo   ", // 720
+    "deleniti earum? Qui ipsam ipsum hic ratione mollitia aut nobis laboriosam. Eum aspernatur ", // 810
+    "dolorem sit voluptatum numquam in iure placeat vel laudantium molestiae? Ad reprehenderit ", // 900
+    "quia aut minima deleniti id consequatur sapiente est dolores cupiditate. 012345678901234  ", // 990
+  });
+  // clang-format on
+
+  auto sv = cudf::strings_column_view(input);
+
+  auto results  = nvtext::substring_deduplicate(sv, 20);
+  auto expected = cudf::test::strings_column_wrapper({" 01234567890123456789 "});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+
+  results  = nvtext::substring_deduplicate(sv, 15);
+  expected = cudf::test::strings_column_wrapper(
+    {" 01234567890123456789 ", ". 012345678901234", " reprehenderit "});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+
+  // Test with sliced input
+  auto const sliced_input = cudf::slice(input, {1, 10}).front();
+
+  sv       = cudf::strings_column_view(sliced_input);
+  results  = nvtext::substring_deduplicate(sv, 15);
+  expected = cudf::test::strings_column_wrapper({"01234567890123456789 ", " reprehenderit "});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+}
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 7adea963868..0f9b77aa874 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5342,6 +5342,27 @@ def is_vowel(self, position) -> SeriesOrIndex:
             self._column.is_letter(True, position)  # type: ignore[arg-type]
         )
 
+    def substring_deduplicate(self, min_width) -> SeriesOrIndex:
+        """
+
+
+        Parameters
+        ----------
+        min_width : int32
+            The minimum number of bytes to determine duplicates
+
+        Returns
+        -------
+        Series of duplicate strings found
+
+        """
+        return self._return_or_inplace(
+            self._column.substring_deduplicate(min_width),  # type: ignore[arg-type]
+            inplace=False,
+            expand=False,
+            retain_index=False,
+        )
+
     def edit_distance(self, targets) -> SeriesOrIndex:
         """
         The ``targets`` strings are measured against the strings in this
@@ -6322,6 +6343,13 @@ def hash_character_ngrams(
         )
         return type(self).from_pylibcudf(result)  # type: ignore[return-value]
 
+    @acquire_spill_lock()
+    def substring_deduplicate(self, min_width: int) -> Self:
+        result = plc.nvtext.dedup.substring_deduplicate(
+            self.to_pylibcudf(mode="read"), min_width
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
     @acquire_spill_lock()
     def edit_distance(self, targets: Self) -> NumericalColumn:
         result = plc.nvtext.edit_distance.edit_distance(
diff --git a/python/cudf/cudf/tests/text/test_text_methods.py b/python/cudf/cudf/tests/text/test_text_methods.py
index 47b41bd1e39..1463f1d3351 100644
--- a/python/cudf/cudf/tests/text/test_text_methods.py
+++ b/python/cudf/cudf/tests/text/test_text_methods.py
@@ -1085,3 +1085,20 @@ def test_byte_pair_encoding(separator, input, results):
     actual = encoder(strings, separator)
     assert type(expected) is type(actual)
     assert_eq(expected, actual)
+
+
+def test_substring_deduplicate():
+    text = (
+        " 01234567890123456789 magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation    "
+        "laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit   "
+        "voluptate velit esse cillum dolore eu fugiat nulla pariatur. 01234567890123456789         "
+        "deleniti earum? Qui ipsam ipsum hic ratione mollitia aut nobis laboriosam. Eum aspernatur "
+        "dolorem sit voluptatum numquam in iure placeat vel laudantium molestiae? Ad reprehenderit "
+        "quia aut minima deleniti id consequatur sapiente est dolores cupiditate. 012345678901234  "
+    )
+    input = cudf.Series([text])
+    actual = input.str.substring_deduplicate(15)
+    expected = cudf.Series(
+        [" 01234567890123456789 ", ". 012345678901234", " reprehenderit "]
+    )
+    assert_eq(expected, actual)
diff --git a/python/pylibcudf/pylibcudf/libcudf/nvtext/dedup.pxd b/python/pylibcudf/pylibcudf/libcudf/nvtext/dedup.pxd
new file mode 100644
index 00000000000..9e38b9b4c51
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/libcudf/nvtext/dedup.pxd
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+from libcpp.memory cimport unique_ptr
+from pylibcudf.exception_handler cimport libcudf_exception_handler
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.types cimport size_type
+
+
+cdef extern from "nvtext/dedup.hpp" namespace "nvtext" nogil:
+
+    cdef unique_ptr[column] substring_deduplicate(
+        column_view source_strings,
+        size_type min_width) except +libcudf_exception_handler
diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
index d8cabbf4d47..29f350c5611 100644
--- a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
@@ -14,6 +14,7 @@
 
 set(cython_sources
     byte_pair_encode.pyx
+    dedup.pyx
     edit_distance.pyx
     generate_ngrams.pyx
     jaccard.pyx
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
index eb48ea84dee..56506f5f8b6 100644
--- a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
@@ -2,6 +2,7 @@
 
 from . cimport (
     byte_pair_encode,
+    dedup,
     edit_distance,
     generate_ngrams,
     jaccard,
@@ -16,11 +17,12 @@ from . cimport (
 )
 
 __all__ = [
+    "byte_pair_encode",
+    "dedup",
     "edit_distance",
     "generate_ngrams",
     "jaccard",
     "minhash",
-    "byte_pair_encode"
     "ngrams_tokenize",
     "normalize",
     "replace",
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py
index 07e80abe3a8..eebab06de3c 100644
--- a/python/pylibcudf/pylibcudf/nvtext/__init__.py
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py
@@ -2,6 +2,7 @@
 
 from . import (
     byte_pair_encode,
+    dedup,
     edit_distance,
     generate_ngrams,
     jaccard,
@@ -17,6 +18,7 @@
 
 __all__ = [
     "byte_pair_encode",
+    "dedup",
     "edit_distance",
     "generate_ngrams",
     "jaccard",
diff --git a/python/pylibcudf/pylibcudf/nvtext/dedup.pxd b/python/pylibcudf/pylibcudf/nvtext/dedup.pxd
new file mode 100644
index 00000000000..5ff85f60b68
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/dedup.pxd
@@ -0,0 +1,6 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.types cimport size_type
+
+cpdef Column substring_deduplicate(Column input, size_type min_width)
diff --git a/python/pylibcudf/pylibcudf/nvtext/dedup.pyi b/python/pylibcudf/pylibcudf/nvtext/dedup.pyi
new file mode 100644
index 00000000000..c324cfbd9c2
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/dedup.pyi
@@ -0,0 +1,5 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+from pylibcudf.column import Column
+
+def substring_deduplicate(input: Column, min_width: int) -> Column: ...
diff --git a/python/pylibcudf/pylibcudf/nvtext/dedup.pyx b/python/pylibcudf/pylibcudf/nvtext/dedup.pyx
new file mode 100644
index 00000000000..5c65129f380
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/dedup.pyx
@@ -0,0 +1,39 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.nvtext.dedup cimport (
+    substring_deduplicate as cpp_substring_deduplicate,
+)
+from pylibcudf.libcudf.types cimport size_type
+
+__all__ = ["substring_deduplicate"]
+
+
+cpdef Column substring_deduplicate(Column input, size_type min_width):
+    """
+    Returns duplicate strings found anywhere in the input column
+    with min_width minimum number of bytes.
+
+    For details, see :cpp:func:`substring_deduplicate`
+
+    Parameters
+    ----------
+    input : Column
+        Strings column of text
+    min_width : size_type
+        Minimum width of bytes to detect duplicates
+
+    Returns
+    -------
+    Column
+        New column of duplicate strings
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = cpp_substring_deduplicate(input.view(), min_width)
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_dedup.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_dedup.py
new file mode 100644
index 00000000000..7987045435c
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_dedup.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pytest
+from utils import assert_column_eq
+
+import pylibcudf as plc
+
+
+@pytest.fixture(scope="module")
+def input_col():
+    arr = [
+        "01234567890123456789",
+        "01234567890123456789",
+        "01234567890123456789",
+    ]
+    return pa.array(arr)
+
+
+@pytest.mark.parametrize("min_width", [10, 20])
+def test_substring_deduplicate(input_col, min_width):
+    result = plc.nvtext.dedup.substring_deduplicate(
+        plc.interop.from_arrow(input_col),
+        min_width,
+    )
+    expected = pa.array(["01234567890123456789012345678901234567890123456789"])
+    assert_column_eq(result, expected)