Change default nn_method to 'kdtree'

dpeerlab · Oct 13, 2016 · 7f4bc18 · 7f4bc18
1 parent b290adf
commit 7f4bc18
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -55,13 +55,13 @@ Release Notes
 
 ### Version 1.5.2
 
- * Include simple parallel implementation of brute force nearest neighbors search using scipy's `cdist` and `multiprocessing`. This is now the default for nearest neighbors computations. It is typically more efficient than `kdtree`
- and avoids memory issues that arise in `sklearn`'s implementation with large data sets
+ * Include simple parallel implementation of brute force nearest neighbors search using scipy's `cdist` and `multiprocessing`. This may be more efficient than `kdtree` on very large high-dimensional data sets
+ and avoids memory issues that arise in `sklearn`'s implementation.
  * Refactor `parallel_jaccard_kernel` to remove unnecessary use of `ctypes` and `multiprocessing.Array`.
 
 ### Version 1.5.1
 
- * Make `louvain_time_limit` a parameter to `phenograph.cluster`
+ * Make `louvain_time_limit` a parameter to `phenograph.cluster`.
 
 ### Version 1.5
 

diff --git a/phenograph/cluster.py b/phenograph/cluster.py
@@ -30,7 +30,7 @@ def sort_by_size(clusters, min_size):
 
 def cluster(data, k=30, directed=False, prune=False, min_cluster_size=10, jaccard=True,
             primary_metric='euclidean', n_jobs=-1, q_tol=1e-3, louvain_time_limit=2000,
-            nn_method='brute'):
+            nn_method='kdtree'):
     """
     PhenoGraph clustering
 
@@ -54,7 +54,7 @@ def cluster(data, k=30, directed=False, prune=False, min_cluster_size=10, jaccar
     :param q_tol: Tolerance (i.e., precision) for monitoring modularity optimization
     :param louvain_time_limit: Maximum number of seconds to run modularity optimization. If exceeded
         the best result so far is returned
-    :param nn_method: Whether to use brute force or kdtree for nearest neighbor search. For most high-dimensional
+    :param nn_method: Whether to use brute force or kdtree for nearest neighbor search. For very large high-dimensional
         data sets, brute force (with parallel computation) performs faster than kdtree.
 
     :return communities: numpy integer array of community assignments for each row in data