Skip to content

Commit

Permalink
adding gpu hdbscan
Browse files Browse the repository at this point in the history
  • Loading branch information
ddangelov committed Nov 2, 2023
1 parent 42a764b commit de06e56
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions top2vec/Top2Vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@
except ImportError:
_HAVE_CUMAP = False

try:
from cuml.cluster import HDBSCAN as cuHDBSCAN

_HAVE_CUHDBSCAN = True
except ImportError:
_HAVE_CUHDBSCAN = False

try:
import hnswlib

Expand Down Expand Up @@ -1369,13 +1376,19 @@ def compute_topics(self,
'metric': 'euclidean',
'cluster_selection_method': 'eom'}

cluster = hdbscan.HDBSCAN(**hdbscan_args).fit(umap_embedding)
if gpu_hdbscan and _HAVE_CUHDBSCAN:
cluster = cuHDBSCAN(**hdbscan_args)
labels = cluster.fit_predict(umap_embedding)

else:
cluster = hdbscan.HDBSCAN(**hdbscan_args).fit(umap_embedding)
labels = cluster.labels_

# calculate topic vectors from dense areas of documents
logger.info('Finding topics')

# create topic vectors
self._create_topic_vectors(cluster.labels_)
self._create_topic_vectors(labels)

# deduplicate topics
self._deduplicate_topics(topic_merge_delta)
Expand Down

0 comments on commit de06e56

Please sign in to comment.