diff --git a/cpp/include/cuml/cluster/hdbscan.hpp b/cpp/include/cuml/cluster/hdbscan.hpp
index eb1223fd88..0b98aeca86 100644
--- a/cpp/include/cuml/cluster/hdbscan.hpp
+++ b/cpp/include/cuml/cluster/hdbscan.hpp
@@ -18,6 +18,7 @@
 
 #include <raft/core/handle.hpp>
 #include <raft/distance/distance_types.hpp>
+#include <raft/neighbors/nn_descent_types.hpp>
 
 #include <rmm/device_uvector.hpp>
 
@@ -27,6 +28,8 @@ namespace ML {
 namespace HDBSCAN {
 namespace Common {
 
+using nn_index_params = raft::neighbors::experimental::nn_descent::index_params;
+
 /**
  * The Condensed hierarchicy is represented by an edge list with
  * parents as the source vertices, children as the destination,
@@ -134,6 +137,7 @@ class CondensedHierarchy {
 };
 
 enum CLUSTER_SELECTION_METHOD { EOM = 0, LEAF = 1 };
+enum GRAPH_BUILD_ALGO { BRUTE_FORCE_KNN = 0, NN_DESCENT = 1 };
 
 class RobustSingleLinkageParams {
  public:
@@ -151,6 +155,8 @@ class RobustSingleLinkageParams {
 class HDBSCANParams : public RobustSingleLinkageParams {
  public:
   CLUSTER_SELECTION_METHOD cluster_selection_method = CLUSTER_SELECTION_METHOD::EOM;
+  GRAPH_BUILD_ALGO build_algo                       = GRAPH_BUILD_ALGO::BRUTE_FORCE_KNN;
+  nn_index_params nn_descent_params                 = {};
 };
 
 /**
@@ -495,14 +501,19 @@ namespace HDBSCAN::HELPER {
  * @param n number of columns in X
  * @param metric distance metric to use
  * @param min_samples minimum number of samples to use for computing core distances
+ * @param build_algo build algo for building the knn graph (default: brute_force_knn)
+ * @param build_params build parameters for build_algo
  */
-void compute_core_dists(const raft::handle_t& handle,
-                        const float* X,
-                        float* core_dists,
-                        size_t m,
-                        size_t n,
-                        raft::distance::DistanceType metric,
-                        int min_samples);
+void compute_core_dists(
+  const raft::handle_t& handle,
+  const float* X,
+  float* core_dists,
+  size_t m,
+  size_t n,
+  raft::distance::DistanceType metric,
+  int min_samples,
+  HDBSCAN::Common::GRAPH_BUILD_ALGO build_algo = HDBSCAN::Common::GRAPH_BUILD_ALGO::BRUTE_FORCE_KNN,
+  HDBSCAN::Common::nn_index_params build_params = Common::nn_index_params{});
 
 /**
  * @brief Compute the map from final, normalize labels to the labels in the CondensedHierarchy
diff --git a/cpp/src/hdbscan/detail/reachability.cuh b/cpp/src/hdbscan/detail/reachability.cuh
index 03a7f7c0ad..9538536723 100644
--- a/cpp/src/hdbscan/detail/reachability.cuh
+++ b/cpp/src/hdbscan/detail/reachability.cuh
@@ -16,11 +16,16 @@
 
 #pragma once
 
+#include <cuml/cluster/hdbscan.hpp>
+#include <cuml/common/utils.hpp>
 #include <cuml/neighbors/knn.hpp>
 
+#include <raft/core/resource/cuda_stream.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/neighbors/brute_force.cuh>
+#include <raft/neighbors/nn_descent.cuh>
+#include <raft/neighbors/nn_descent_types.hpp>
 #include <raft/sparse/convert/csr.cuh>
 #include <raft/sparse/linalg/symmetrize.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -34,6 +39,8 @@
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
+namespace NNDescent = raft::neighbors::experimental::nn_descent;
+
 namespace ML {
 namespace HDBSCAN {
 namespace detail {
@@ -68,6 +75,62 @@ void core_distances(
   });
 }
 
+// Functor to post-process distances by sqrt
+// For usage with NN Descent which internally supports L2Expanded only
+template <typename value_idx, typename value_t = float>
+struct DistancePostProcessSqrt : NNDescent::DistEpilogue<value_idx, value_t> {
+  DI value_t operator()(value_t value, value_idx row, value_idx col) const
+  {
+    return powf(fabsf(value), 0.5);
+  }
+};
+
+template <typename T>
+CUML_KERNEL void copy_first_k_cols_shift_self(
+  T* out, T* in, size_t out_k, size_t in_k, size_t nrows)
+{
+  size_t row = blockIdx.x * blockDim.x + threadIdx.x;
+  if (row < nrows) {
+    for (size_t i = 1; i < out_k; i++) {
+      out[row * out_k + i] = in[row * in_k + i - 1];
+    }
+    out[row * out_k] = row;
+  }
+}
+
+template <typename T>
+CUML_KERNEL void copy_first_k_cols_shift_zero(
+  T* out, T* in, size_t out_k, size_t in_k, size_t nrows)
+{
+  size_t row = blockIdx.x * blockDim.x + threadIdx.x;
+  if (row < nrows) {
+    for (size_t i = 1; i < out_k; i++) {
+      out[row * out_k + i] = in[row * in_k + i - 1];
+    }
+    out[row * out_k] = static_cast<T>(0);
+  }
+}
+
+template <typename value_idx, typename value_t, typename epilogue_op>
+auto get_graph_nnd(const raft::handle_t& handle,
+                   const value_t* X,
+                   size_t m,
+                   size_t n,
+                   epilogue_op distance_epilogue,
+                   Common::nn_index_params build_params)
+{
+  cudaPointerAttributes attr;
+  RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, X));
+  float* ptr = reinterpret_cast<float*>(attr.devicePointer);
+  if (ptr != nullptr) {
+    auto dataset = raft::make_device_matrix_view<const value_t, int64_t>(X, m, n);
+    return NNDescent::build<value_t, value_idx>(handle, build_params, dataset, distance_epilogue);
+  } else {
+    auto dataset = raft::make_host_matrix_view<const value_t, int64_t>(X, m, n);
+    return NNDescent::build<value_t, value_idx>(handle, build_params, dataset, distance_epilogue);
+  }
+}
+
 /**
  * Wraps the brute force knn API, to be used for both training and prediction
  * @tparam value_idx data type for integrals
@@ -93,33 +156,76 @@ void compute_knn(const raft::handle_t& handle,
                  const value_t* search_items,
                  size_t n_search_items,
                  int k,
-                 raft::distance::DistanceType metric)
+                 raft::distance::DistanceType metric,
+                 Common::GRAPH_BUILD_ALGO build_algo  = Common::GRAPH_BUILD_ALGO::BRUTE_FORCE_KNN,
+                 Common::nn_index_params build_params = Common::nn_index_params{})
 {
   auto stream      = handle.get_stream();
   auto exec_policy = handle.get_thrust_policy();
-  std::vector<value_t*> inputs;
-  inputs.push_back(const_cast<value_t*>(X));
-
-  std::vector<int> sizes;
-  sizes.push_back(m);
-
   // This is temporary. Once faiss is updated, we should be able to
   // pass value_idx through to knn.
   rmm::device_uvector<int64_t> int64_indices(k * n_search_items, stream);
 
-  // perform knn
-  brute_force_knn(handle,
-                  inputs,
-                  sizes,
-                  n,
-                  const_cast<value_t*>(search_items),
-                  n_search_items,
-                  int64_indices.data(),
-                  dists,
-                  k,
-                  true,
-                  true,
-                  metric);
+  switch (build_algo) {
+    case Common::GRAPH_BUILD_ALGO::BRUTE_FORCE_KNN: {
+      std::vector<value_t*> inputs;
+      inputs.push_back(const_cast<value_t*>(X));
+
+      std::vector<int> sizes;
+      sizes.push_back(m);
+
+      // perform knn
+      brute_force_knn(handle,
+                      inputs,
+                      sizes,
+                      n,
+                      const_cast<value_t*>(search_items),
+                      n_search_items,
+                      int64_indices.data(),
+                      dists,
+                      k,
+                      true,
+                      true,
+                      metric);
+      break;
+    }
+
+    case Common::GRAPH_BUILD_ALGO::NN_DESCENT: {
+      RAFT_EXPECTS(static_cast<size_t>(k) <= build_params.graph_degree,
+                   "n_neighbors should be smaller than the graph degree computed by nn descent");
+
+      auto epilogue                 = DistancePostProcessSqrt<int64_t, float>{};
+      build_params.return_distances = true;
+      auto graph = get_graph_nnd<int64_t, float>(handle, X, m, n, epilogue, build_params);
+
+      size_t TPB        = 256;
+      size_t num_blocks = static_cast<size_t>((m + TPB) / TPB);
+
+      auto indices_d =
+        raft::make_device_matrix<int64_t, int64_t>(handle, m, build_params.graph_degree);
+
+      raft::copy(indices_d.data_handle(),
+                 graph.graph().data_handle(),
+                 m * build_params.graph_degree,
+                 stream);
+
+      RAFT_EXPECTS(graph.distances().has_value(),
+                   "return_distances for nn descent should be set to true to be used for HDBSCAN");
+      copy_first_k_cols_shift_zero<float>
+        <<<num_blocks, TPB, 0, stream>>>(dists,
+                                         graph.distances().value().data_handle(),
+                                         static_cast<size_t>(k),
+                                         build_params.graph_degree,
+                                         m);
+      copy_first_k_cols_shift_self<int64_t>
+        <<<num_blocks, TPB, 0, stream>>>(int64_indices.data(),
+                                         indices_d.data_handle(),
+                                         static_cast<size_t>(k),
+                                         build_params.graph_degree,
+                                         m);
+      break;
+    }
+  }
 
   // convert from current knn's 64-bit to 32-bit.
   thrust::transform(exec_policy,
@@ -134,13 +240,16 @@ void compute_knn(const raft::handle_t& handle,
          to compute core_dists
 */
 template <typename value_idx, typename value_t>
-void _compute_core_dists(const raft::handle_t& handle,
-                         const value_t* X,
-                         value_t* core_dists,
-                         size_t m,
-                         size_t n,
-                         raft::distance::DistanceType metric,
-                         int min_samples)
+void _compute_core_dists(
+  const raft::handle_t& handle,
+  const value_t* X,
+  value_t* core_dists,
+  size_t m,
+  size_t n,
+  raft::distance::DistanceType metric,
+  int min_samples,
+  Common::GRAPH_BUILD_ALGO build_algo  = Common::GRAPH_BUILD_ALGO::BRUTE_FORCE_KNN,
+  Common::nn_index_params build_params = Common::nn_index_params{})
 {
   RAFT_EXPECTS(metric == raft::distance::DistanceType::L2SqrtExpanded,
                "Currently only L2 expanded distance is supported");
@@ -151,7 +260,18 @@ void _compute_core_dists(const raft::handle_t& handle,
   rmm::device_uvector<value_t> dists(min_samples * m, stream);
 
   // perform knn
-  compute_knn(handle, X, inds.data(), dists.data(), m, n, X, m, min_samples, metric);
+  compute_knn(handle,
+              X,
+              inds.data(),
+              dists.data(),
+              m,
+              n,
+              X,
+              m,
+              min_samples,
+              metric,
+              build_algo,
+              build_params);
 
   // Slice core distances (distances to kth nearest neighbor)
   core_distances<value_idx>(dists.data(), min_samples, min_samples, m, core_dists, stream);
@@ -169,6 +289,22 @@ struct ReachabilityPostProcess {
   value_t alpha;
 };
 
+// Functor to post-process distances into reachability space (Sqrt)
+// For usage with NN Descent which internally supports L2Expanded only
+template <typename value_idx, typename value_t = float>
+struct ReachabilityPostProcessSqrt : NNDescent::DistEpilogue<value_idx, value_t> {
+  ReachabilityPostProcessSqrt(value_t* core_dists_, value_t alpha_)
+    : NNDescent::DistEpilogue<value_idx, value_t>(), core_dists(core_dists_), alpha(alpha_){};
+
+  DI value_t operator()(value_t value, value_idx row, value_idx col) const
+  {
+    return max(core_dists[col], max(core_dists[row], powf(fabsf(alpha * value), 0.5)));
+  }
+
+  const value_t* core_dists;
+  value_t alpha;
+};
+
 /**
  * Given core distances, Fuses computations of L2 distances between all
  * points, projection into mutual reachability space, and k-selection.
@@ -184,38 +320,79 @@ struct ReachabilityPostProcess {
  * @param[in] core_dists array of core distances (size m)
  */
 template <typename value_idx, typename value_t>
-void mutual_reachability_knn_l2(const raft::handle_t& handle,
-                                value_idx* out_inds,
-                                value_t* out_dists,
-                                const value_t* X,
-                                size_t m,
-                                size_t n,
-                                int k,
-                                value_t* core_dists,
-                                value_t alpha)
+void mutual_reachability_knn_l2(
+  const raft::handle_t& handle,
+  value_idx* out_inds,
+  value_t* out_dists,
+  const value_t* X,
+  size_t m,
+  size_t n,
+  int k,
+  value_t* core_dists,
+  value_t alpha,
+  Common::GRAPH_BUILD_ALGO build_algo  = Common::GRAPH_BUILD_ALGO::BRUTE_FORCE_KNN,
+  Common::nn_index_params build_params = Common::nn_index_params{})
 {
   // Create a functor to postprocess distances into mutual reachability space
   // Note that we can't use a lambda for this here, since we get errors like:
   // `A type local to a function cannot be used in the template argument of the
   // enclosing parent function (and any parent classes) of an extended __device__
   // or __host__ __device__ lambda`
-  auto epilogue = ReachabilityPostProcess<value_idx, value_t>{core_dists, alpha};
-
-  auto X_view = raft::make_device_matrix_view(X, m, n);
-  std::vector<raft::device_matrix_view<const value_t, size_t>> index = {X_view};
-
-  raft::neighbors::brute_force::knn<value_idx, value_t>(
-    handle,
-    index,
-    X_view,
-    raft::make_device_matrix_view(out_inds, m, static_cast<size_t>(k)),
-    raft::make_device_matrix_view(out_dists, m, static_cast<size_t>(k)),
-    // TODO: expand distance metrics to support more than just L2 distance
-    // https://github.com/rapidsai/cuml/issues/5301
-    raft::distance::DistanceType::L2SqrtExpanded,
-    std::make_optional<float>(2.0f),
-    std::nullopt,
-    epilogue);
+  switch (build_algo) {
+    case Common::GRAPH_BUILD_ALGO::BRUTE_FORCE_KNN: {
+      auto epilogue = ReachabilityPostProcess<value_idx, value_t>{core_dists, alpha};
+      auto X_view   = raft::make_device_matrix_view(X, m, n);
+      std::vector<raft::device_matrix_view<const value_t, size_t>> index = {X_view};
+
+      raft::neighbors::brute_force::knn<value_idx, value_t>(
+        handle,
+        index,
+        X_view,
+        raft::make_device_matrix_view(out_inds, m, static_cast<size_t>(k)),
+        raft::make_device_matrix_view(out_dists, m, static_cast<size_t>(k)),
+        // TODO: expand distance metrics to support more than just L2 distance
+        // https://github.com/rapidsai/cuml/issues/5301
+        raft::distance::DistanceType::L2SqrtExpanded,
+        std::make_optional<float>(2.0f),
+        std::nullopt,
+        epilogue);
+      break;
+    }
+
+    case Common::GRAPH_BUILD_ALGO::NN_DESCENT: {
+      RAFT_EXPECTS(static_cast<size_t>(k) <= build_params.graph_degree,
+                   "n_neighbors should be smaller than the graph degree computed by nn descent");
+
+      auto epilogue = ReachabilityPostProcessSqrt<value_idx, value_t>(core_dists, alpha);
+      build_params.return_distances = true;
+      auto graph = get_graph_nnd<value_idx, value_t>(handle, X, m, n, epilogue, build_params);
+
+      auto indices_d =
+        raft::make_device_matrix<value_idx, value_idx>(handle, m, build_params.graph_degree);
+
+      raft::copy(indices_d.data_handle(),
+                 graph.graph().data_handle(),
+                 m * build_params.graph_degree,
+                 handle.get_stream());
+
+      RAFT_EXPECTS(graph.distances().has_value(),
+                   "return_distances for nn descent should be set to true to be used for HDBSCAN");
+
+      raft::matrix::slice_coordinates coords{static_cast<int64_t>(0),
+                                             static_cast<int64_t>(0),
+                                             static_cast<int64_t>(m),
+                                             static_cast<int64_t>(k)};
+
+      auto out_knn_dists_view = raft::make_device_matrix_view(out_dists, m, (size_t)k);
+      raft::matrix::slice<float, int64_t, raft::row_major>(
+        handle, raft::make_const_mdspan(graph.distances().value()), out_knn_dists_view, coords);
+      auto out_knn_indices_view =
+        raft::make_device_matrix_view<value_idx, int64_t>(out_inds, m, (size_t)k);
+      raft::matrix::slice<value_idx, int64_t, raft::row_major>(
+        handle, raft::make_const_mdspan(indices_d.view()), out_knn_indices_view, coords);
+      break;
+    }
+  }
 }
 
 /**
@@ -260,16 +437,19 @@ void mutual_reachability_knn_l2(const raft::handle_t& handle,
  *             neighbors.
  */
 template <typename value_idx, typename value_t>
-void mutual_reachability_graph(const raft::handle_t& handle,
-                               const value_t* X,
-                               size_t m,
-                               size_t n,
-                               raft::distance::DistanceType metric,
-                               int min_samples,
-                               value_t alpha,
-                               value_idx* indptr,
-                               value_t* core_dists,
-                               raft::sparse::COO<value_t, value_idx>& out)
+void mutual_reachability_graph(
+  const raft::handle_t& handle,
+  const value_t* X,
+  size_t m,
+  size_t n,
+  raft::distance::DistanceType metric,
+  int min_samples,
+  value_t alpha,
+  value_idx* indptr,
+  value_t* core_dists,
+  raft::sparse::COO<value_t, value_idx>& out,
+  Common::GRAPH_BUILD_ALGO build_algo  = Common::GRAPH_BUILD_ALGO::BRUTE_FORCE_KNN,
+  Common::nn_index_params build_params = Common::nn_index_params{})
 {
   RAFT_EXPECTS(metric == raft::distance::DistanceType::L2SqrtExpanded,
                "Currently only L2 expanded distance is supported");
@@ -282,7 +462,18 @@ void mutual_reachability_graph(const raft::handle_t& handle,
   rmm::device_uvector<value_t> dists(min_samples * m, stream);
 
   // perform knn
-  compute_knn(handle, X, inds.data(), dists.data(), m, n, X, m, min_samples, metric);
+  compute_knn(handle,
+              X,
+              inds.data(),
+              dists.data(),
+              m,
+              n,
+              X,
+              m,
+              min_samples,
+              metric,
+              build_algo,
+              build_params);
 
   // Slice core distances (distances to kth nearest neighbor)
   core_distances<value_idx>(dists.data(), min_samples, min_samples, m, core_dists, stream);
@@ -290,8 +481,17 @@ void mutual_reachability_graph(const raft::handle_t& handle,
   /**
    * Compute L2 norm
    */
-  mutual_reachability_knn_l2(
-    handle, inds.data(), dists.data(), X, m, n, min_samples, core_dists, (value_t)1.0 / alpha);
+  mutual_reachability_knn_l2(handle,
+                             inds.data(),
+                             dists.data(),
+                             X,
+                             m,
+                             n,
+                             min_samples,
+                             core_dists,
+                             (value_t)1.0 / alpha,
+                             build_algo,
+                             build_params);
 
   // self-loops get max distance
   auto coo_rows_counting_itr = thrust::make_counting_iterator<value_idx>(0);
diff --git a/cpp/src/hdbscan/hdbscan.cu b/cpp/src/hdbscan/hdbscan.cu
index ea64d20f6b..32ef78b470 100644
--- a/cpp/src/hdbscan/hdbscan.cu
+++ b/cpp/src/hdbscan/hdbscan.cu
@@ -158,10 +158,12 @@ void compute_core_dists(const raft::handle_t& handle,
                         size_t m,
                         size_t n,
                         raft::distance::DistanceType metric,
-                        int min_samples)
+                        int min_samples,
+                        HDBSCAN::Common::GRAPH_BUILD_ALGO build_algo,
+                        HDBSCAN::Common::nn_index_params build_params)
 {
   HDBSCAN::detail::Reachability::_compute_core_dists<int, float>(
-    handle, X, core_dists, m, n, metric, min_samples);
+    handle, X, core_dists, m, n, metric, min_samples, build_algo, build_params);
 }
 
 void compute_inverse_label_map(const raft::handle_t& handle,
diff --git a/cpp/src/hdbscan/runner.h b/cpp/src/hdbscan/runner.h
index c79148eed2..d9591bf0f1 100644
--- a/cpp/src/hdbscan/runner.h
+++ b/cpp/src/hdbscan/runner.h
@@ -183,7 +183,9 @@ void build_linkage(const raft::handle_t& handle,
                                                   params.alpha,
                                                   mutual_reachability_indptr.data(),
                                                   core_dists,
-                                                  mutual_reachability_coo);
+                                                  mutual_reachability_coo,
+                                                  params.build_algo,
+                                                  params.nn_descent_params);
 
   /**
    * Construct MST sorted by weights
diff --git a/python/cuml/cuml/cluster/hdbscan/hdbscan.pyx b/python/cuml/cuml/cluster/hdbscan/hdbscan.pyx
index f7691c1684..37ef2ded3d 100644
--- a/python/cuml/cuml/cluster/hdbscan/hdbscan.pyx
+++ b/python/cuml/cuml/cluster/hdbscan/hdbscan.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@ from cuml.internals.safe_imports import gpu_only_import
 cp = gpu_only_import('cupy')
 from warnings import warn
 
+from cuml.internals import logger
 from cuml.internals.array import CumlArray
 from cuml.internals.base import UniversalBase
 from cuml.common.doc_utils import generate_docstring
@@ -46,12 +47,25 @@ IF GPUBUILD == 1:
     from pylibraft.common.handle import Handle
     from pylibraft.common.handle cimport handle_t
 
+    cdef extern from "raft/neighbors/nn_descent_types.hpp" namespace "raft::neighbors::experimental::nn_descent":
+        cdef struct index_params:
+            size_t graph_degree,
+            size_t intermediate_graph_degree,
+            size_t max_iterations,
+            float termination_threshold,
+            bool return_distances,
+            size_t n_clusters,
+
     cdef extern from "cuml/cluster/hdbscan.hpp" namespace "ML::HDBSCAN::Common":
 
         ctypedef enum CLUSTER_SELECTION_METHOD:
             EOM "ML::HDBSCAN::Common::CLUSTER_SELECTION_METHOD::EOM"
             LEAF "ML::HDBSCAN::Common::CLUSTER_SELECTION_METHOD::LEAF"
 
+        ctypedef enum GRAPH_BUILD_ALGO:
+            BRUTE_FORCE_KNN "ML::HDBSCAN::Common::GRAPH_BUILD_ALGO::BRUTE_FORCE_KNN"
+            NN_DESCENT "ML::HDBSCAN::Common::GRAPH_BUILD_ALGO::NN_DESCENT"
+
         cdef cppclass CondensedHierarchy[value_idx, value_t]:
             CondensedHierarchy(
                 const handle_t &handle, size_t n_leaves)
@@ -98,6 +112,8 @@ IF GPUBUILD == 1:
 
             bool allow_single_cluster,
             CLUSTER_SELECTION_METHOD cluster_selection_method,
+            GRAPH_BUILD_ALGO build_algo,
+            index_params nn_descent_params,
 
         cdef cppclass PredictionData[int, float]:
             PredictionData(const handle_t &handle,
@@ -151,7 +167,9 @@ IF GPUBUILD == 1:
                                 size_t m,
                                 size_t n,
                                 DistanceType metric,
-                                int min_samples)
+                                int min_samples,
+                                GRAPH_BUILD_ALGO build_algo,
+                                index_params build_params)
 
         void compute_inverse_label_map(const handle_t& handle,
                                        CondensedHierarchy[int, float]&
@@ -501,7 +519,9 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
                  verbose=False,
                  connectivity='knn',
                  output_type=None,
-                 prediction_data=False):
+                 prediction_data=False,
+                 build_algo='auto',
+                 build_kwds=None):
 
         super().__init__(handle=handle,
                          verbose=verbose,
@@ -532,6 +552,9 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
         self.fit_called_ = False
         self.prediction_data = prediction_data
 
+        self.build_algo = build_algo
+        self.build_kwds = build_kwds
+
         self.n_clusters_ = None
         self.n_leaves_ = None
 
@@ -547,6 +570,8 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
         self.prediction_data_ptr = None
         self._cpu_to_gpu_interop_prepped = False
 
+        logger.set_level(verbose)
+
     @property
     def condensed_tree_(self):
 
@@ -831,6 +856,35 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
                 raise ValueError("Cluster selection method not supported. "
                                  "Must one of {'eom', 'leaf'}")
 
+            if self.build_algo == "auto":
+                if self.n_rows <= 50000:
+                    # brute force is faster for small datasets
+                    logger.warn("Building knn graph using brute force")
+                    self.build_algo = "brute_force_knn"
+                else:
+                    logger.warn("Building knn graph using nn descent")
+                    self.build_algo = "nn_descent"
+
+            if self.build_algo == 'brute_force_knn':
+                params.build_algo = GRAPH_BUILD_ALGO.BRUTE_FORCE_KNN
+            elif self.build_algo == 'nn_descent':
+                params.build_algo = GRAPH_BUILD_ALGO.NN_DESCENT
+                if self.build_kwds is None:
+                    params.nn_descent_params.graph_degree = <size_t> 64
+                    params.nn_descent_params.intermediate_graph_degree = <size_t> 128
+                    params.nn_descent_params.max_iterations = <size_t> 20
+                    params.nn_descent_params.termination_threshold = <float> 0.0001
+                    params.nn_descent_params.return_distances = <bool> True
+                else:
+                    params.nn_descent_params.graph_degree = <size_t> self.build_kwds.get("nnd_graph_degree", 64)
+                    params.nn_descent_params.intermediate_graph_degree = <size_t> self.build_kwds.get("nnd_intermediate_graph_degree", 128)
+                    params.nn_descent_params.max_iterations = <size_t> self.build_kwds.get("nnd_max_iterations", 20)
+                    params.nn_descent_params.termination_threshold = <float> self.build_kwds.get("nnd_termination_threshold", 0.0001)
+                    params.nn_descent_params.return_distances = <bool> self.build_kwds.get("nnd_return_distances", True)
+            else:
+                raise ValueError("Build algo not supported. "
+                                 "Must one of {'brute_force_knn', 'nn_descent'}")
+
             cdef DistanceType metric
             if self.metric in _metrics_mapping:
                 metric = _metrics_mapping[self.metric]
@@ -1071,13 +1125,44 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
             cdef uintptr_t X_ptr = self.X_m.ptr
             cdef uintptr_t core_dists_ptr = self.core_dists.ptr
 
+            cdef GRAPH_BUILD_ALGO build_algo
+            cdef index_params build_params
+
+            if self.build_algo == "auto":
+                if self.n_rows <= 50000:
+                    # brute force is faster for small datasets
+                    logger.warn("Building knn graph using brute force")
+                    self.build_algo = "brute_force_knn"
+                else:
+                    logger.warn("Building knn graph using nn descent")
+                    self.build_algo = "nn_descent"
+
+            if self.build_algo == 'brute_force_knn':
+                build_algo = GRAPH_BUILD_ALGO.BRUTE_FORCE_KNN
+            elif self.build_algo == 'nn_descent':
+                build_algo = GRAPH_BUILD_ALGO.NN_DESCENT
+                if self.build_kwds is None:
+                    build_params.graph_degree = <size_t> 64
+                    build_params.intermediate_graph_degree = <size_t> 128
+                    build_params.max_iterations = <size_t> 20
+                    build_params.termination_threshold = <float> 0.0001
+                    build_params.return_distances = <bool> True
+                else:
+                    build_params.graph_degree = <size_t> self.build_kwds.get("nnd_graph_degree", 64)
+                    build_params.intermediate_graph_degree = <size_t> self.build_kwds.get("nnd_intermediate_graph_degree", 128)
+                    build_params.max_iterations = <size_t> self.build_kwds.get("nnd_max_iterations", 20)
+                    build_params.termination_threshold = <float> self.build_kwds.get("nnd_termination_threshold", 0.0001)
+                    build_params.return_distances = <bool> self.build_kwds.get("nnd_return_distances", True)
+
             compute_core_dists(handle_[0],
                                <float*> X_ptr,
                                <float*> core_dists_ptr,
                                <size_t> self.n_rows,
                                <size_t> self.n_cols,
                                <DistanceType> metric,
-                               <int> self.min_samples)
+                               <int> self.min_samples,
+                               <GRAPH_BUILD_ALGO> build_algo,
+                               build_params)
 
             cdef device_uvector[int] *inverse_label_map = \
                 new device_uvector[int](0, handle_[0].get_stream())
@@ -1125,7 +1210,9 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
             "connectivity",
             "alpha",
             "gen_min_span_tree",
-            "prediction_data"
+            "prediction_data",
+            "build_algo",
+            "build_kwds"
         ]
 
     def get_attr_names(self):
diff --git a/python/cuml/cuml/tests/test_hdbscan.py b/python/cuml/cuml/tests/test_hdbscan.py
index 0a9a3a6382..a061f71d56 100644
--- a/python/cuml/cuml/tests/test_hdbscan.py
+++ b/python/cuml/cuml/tests/test_hdbscan.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -44,6 +44,12 @@
 dataset_names = ["noisy_circles", "noisy_moons", "varied"]
 
 
+def get_graph_degree(n_samples):
+    graph_degree = max(int((1 + ((n_samples * 1.5) // 32)) * 32), 64)
+    intermediate_graph_degree = int(1 + ((graph_degree * 1.3) // 32) * 32)
+    return graph_degree, intermediate_graph_degree
+
+
 def assert_cluster_counts(sk_agg, cuml_agg, digits=25):
     sk_unique, sk_counts = np.unique(sk_agg.labels_, return_counts=True)
     sk_counts = np.sort(sk_counts)
@@ -142,14 +148,19 @@ def assert_membership_vectors(cu_vecs, sk_vecs):
         cu_labels_sorted = np.argsort(cu_vecs)[::-1]
         sk_labels_sorted = np.argsort(sk_vecs)[::-1]
 
-        k = min(sk_vecs.shape[1], 10)
-        for i in range(k):
+        if len(sk_vecs.shape) == 1:
             assert (
-                adjusted_rand_score(
-                    cu_labels_sorted[:, i], sk_labels_sorted[:, i]
-                )
-                >= 0.90
+                adjusted_rand_score(cu_labels_sorted, sk_labels_sorted) >= 0.9
             )
+        else:
+            k = min(sk_vecs.shape[1], 10)
+            for i in range(k):
+                assert (
+                    adjusted_rand_score(
+                        cu_labels_sorted[:, i], sk_labels_sorted[:, i]
+                    )
+                    >= 0.9
+                )
 
 
 @pytest.mark.parametrize("nrows", [500])
@@ -308,6 +319,7 @@ def test_hdbscan_sklearn_extract_clusters(
     allow_single_cluster,
 ):
     X = test_datasets.data
+
     cuml_agg = HDBSCAN(
         verbose=logger.level_info,
         allow_single_cluster=allow_single_cluster,
@@ -349,6 +361,7 @@ def test_hdbscan_sklearn_extract_clusters(
 @pytest.mark.parametrize("max_cluster_size", [0])
 @pytest.mark.parametrize("cluster_selection_method", ["eom"])
 @pytest.mark.parametrize("connectivity", ["knn"])
+@pytest.mark.parametrize("build_algo", ["brute_force_knn", "nn_descent"])
 def test_hdbscan_cluster_patterns(
     dataset,
     nrows,
@@ -359,11 +372,11 @@ def test_hdbscan_cluster_patterns(
     allow_single_cluster,
     max_cluster_size,
     min_samples,
+    build_algo,
 ):
 
     # This also tests duplicate data points
     X, y = get_pattern(dataset, nrows)[0]
-
     cuml_agg = HDBSCAN(
         verbose=logger.level_info,
         allow_single_cluster=allow_single_cluster,
@@ -372,6 +385,7 @@ def test_hdbscan_cluster_patterns(
         min_cluster_size=min_cluster_size,
         cluster_selection_epsilon=cluster_selection_epsilon,
         cluster_selection_method=cluster_selection_method,
+        build_algo=build_algo,
     )
 
     cuml_agg.fit(X)
@@ -412,6 +426,7 @@ def test_hdbscan_cluster_patterns(
 @pytest.mark.parametrize("max_cluster_size", [0])
 @pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
 @pytest.mark.parametrize("connectivity", ["knn"])
+@pytest.mark.parametrize("build_algo", ["brute_force_knn", "nn_descent"])
 def test_hdbscan_cluster_patterns_extract_clusters(
     dataset,
     nrows,
@@ -422,11 +437,12 @@ def test_hdbscan_cluster_patterns_extract_clusters(
     allow_single_cluster,
     max_cluster_size,
     min_samples,
+    build_algo,
 ):
 
     # This also tests duplicate data points
     X, y = get_pattern(dataset, nrows)[0]
-
+    graph_degree, intermediate_graph_degree = get_graph_degree(min_samples)
     cuml_agg = HDBSCAN(
         verbose=logger.level_info,
         allow_single_cluster=allow_single_cluster,
@@ -435,6 +451,11 @@ def test_hdbscan_cluster_patterns_extract_clusters(
         min_cluster_size=min_cluster_size,
         cluster_selection_epsilon=cluster_selection_epsilon,
         cluster_selection_method=cluster_selection_method,
+        build_algo=build_algo,
+        build_kwds={
+            "nnd_graph_degree": graph_degree,
+            "nnd_intermediate_graph_degree": intermediate_graph_degree,
+        },
     )
 
     sk_agg = hdbscan.HDBSCAN(
@@ -494,7 +515,8 @@ def test_hdbscan_metric_parameter_input(metric, supported):
             clf.fit(X)
 
 
-def test_hdbscan_empty_cluster_tree():
+@pytest.mark.parametrize("build_algo", ["brute_force_knn", "nn_descent"])
+def test_hdbscan_empty_cluster_tree(build_algo):
 
     raw_tree = np.recarray(
         shape=(5,),
@@ -510,7 +532,9 @@ def test_hdbscan_empty_cluster_tree():
     condensed_tree = CondensedTree(raw_tree, 0.0, True)
 
     cuml_agg = HDBSCAN(
-        allow_single_cluster=True, cluster_selection_method="eom"
+        allow_single_cluster=True,
+        cluster_selection_method="eom",
+        build_algo=build_algo,
     )
     cuml_agg._extract_clusters(condensed_tree)
 
@@ -570,7 +594,6 @@ def test_all_points_membership_vectors_blobs(
         shuffle=True,
         random_state=42,
     )
-
     cuml_agg = HDBSCAN(
         verbose=logger.level_info,
         allow_single_cluster=allow_single_cluster,
@@ -613,6 +636,7 @@ def test_all_points_membership_vectors_blobs(
 @pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
 @pytest.mark.parametrize("connectivity", ["knn"])
 @pytest.mark.parametrize("batch_size", [128, 1000])
+@pytest.mark.parametrize("build_algo", ["brute_force_knn", "nn_descent"])
 def test_all_points_membership_vectors_moons(
     nrows,
     min_samples,
@@ -623,6 +647,7 @@ def test_all_points_membership_vectors_moons(
     max_cluster_size,
     connectivity,
     batch_size,
+    build_algo,
 ):
 
     X, y = datasets.make_moons(n_samples=nrows, noise=0.05, random_state=42)
@@ -636,6 +661,7 @@ def test_all_points_membership_vectors_moons(
         cluster_selection_epsilon=cluster_selection_epsilon,
         cluster_selection_method=cluster_selection_method,
         prediction_data=True,
+        build_algo=build_algo,
     )
     cuml_agg.fit(X)
 
@@ -934,6 +960,7 @@ def test_approximate_predict_circles(
 @pytest.mark.parametrize("max_cluster_size", [0])
 @pytest.mark.parametrize("cluster_selection_method", ["eom"])
 @pytest.mark.parametrize("connectivity", ["knn"])
+@pytest.mark.parametrize("build_algo", ["brute_force_knn", "nn_descent"])
 def test_approximate_predict_digits(
     n_points_to_predict,
     min_samples,
@@ -943,6 +970,7 @@ def test_approximate_predict_digits(
     max_cluster_size,
     cluster_selection_method,
     connectivity,
+    build_algo,
 ):
     digits = datasets.load_digits()
     X, y = digits.data, digits.target
@@ -966,6 +994,7 @@ def test_approximate_predict_digits(
         cluster_selection_epsilon=cluster_selection_epsilon,
         cluster_selection_method=cluster_selection_method,
         prediction_data=True,
+        build_algo=build_algo,
     )
 
     cuml_agg.fit(X_train)
@@ -1077,6 +1106,7 @@ def test_membership_vector_blobs(
 @pytest.mark.parametrize("cluster_selection_method", ["eom", "leaf"])
 @pytest.mark.parametrize("connectivity", ["knn"])
 @pytest.mark.parametrize("batch_size", [16])
+@pytest.mark.parametrize("build_algo", ["brute_force_knn", "nn_descent"])
 def test_membership_vector_moons(
     nrows,
     n_points_to_predict,
@@ -1088,6 +1118,7 @@ def test_membership_vector_moons(
     max_cluster_size,
     connectivity,
     batch_size,
+    build_algo,
 ):
 
     X, y = datasets.make_moons(
@@ -1106,6 +1137,7 @@ def test_membership_vector_moons(
         cluster_selection_epsilon=cluster_selection_epsilon,
         cluster_selection_method=cluster_selection_method,
         prediction_data=True,
+        build_algo=build_algo,
     )
     cuml_agg.fit(X_train)
 
@@ -1193,5 +1225,4 @@ def test_membership_vector_circles(
     sk_membership_vectors = hdbscan.membership_vector(sk_agg, X_test).astype(
         "float32"
     )
-
     assert_membership_vectors(cu_membership_vectors, sk_membership_vectors)