From 7f4bc18699adb513a983ba183fdc7c989c219b85 Mon Sep 17 00:00:00 2001
From: Jacob Levine <jl3545@columbia.edu>
Date: Thu, 13 Oct 2016 14:08:20 -0400
Subject: [PATCH] Change default nn_method to 'kdtree'

---
 README.md             | 6 +++---
 phenograph/cluster.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index bdd1b35..181fb1d 100644
--- a/README.md
+++ b/README.md
@@ -55,13 +55,13 @@ Release Notes
 
 ### Version 1.5.2
 
- * Include simple parallel implementation of brute force nearest neighbors search using scipy's `cdist` and `multiprocessing`. This is now the default for nearest neighbors computations. It is typically more efficient than `kdtree`
- and avoids memory issues that arise in `sklearn`'s implementation with large data sets
+ * Include simple parallel implementation of brute force nearest neighbors search using scipy's `cdist` and `multiprocessing`. This may be more efficient than `kdtree` on very large high-dimensional data sets
+ and avoids memory issues that arise in `sklearn`'s implementation.
  * Refactor `parallel_jaccard_kernel` to remove unnecessary use of `ctypes` and `multiprocessing.Array`.
 
 ### Version 1.5.1
 
- * Make `louvain_time_limit` a parameter to `phenograph.cluster`
+ * Make `louvain_time_limit` a parameter to `phenograph.cluster`.
 
 ### Version 1.5
 
diff --git a/phenograph/cluster.py b/phenograph/cluster.py
index e9d04a5..2180051 100644
--- a/phenograph/cluster.py
+++ b/phenograph/cluster.py
@@ -30,7 +30,7 @@ def sort_by_size(clusters, min_size):
 
 def cluster(data, k=30, directed=False, prune=False, min_cluster_size=10, jaccard=True,
             primary_metric='euclidean', n_jobs=-1, q_tol=1e-3, louvain_time_limit=2000,
-            nn_method='brute'):
+            nn_method='kdtree'):
     """
     PhenoGraph clustering
 
@@ -54,7 +54,7 @@ def cluster(data, k=30, directed=False, prune=False, min_cluster_size=10, jaccar
     :param q_tol: Tolerance (i.e., precision) for monitoring modularity optimization
     :param louvain_time_limit: Maximum number of seconds to run modularity optimization. If exceeded
         the best result so far is returned
-    :param nn_method: Whether to use brute force or kdtree for nearest neighbor search. For most high-dimensional
+    :param nn_method: Whether to use brute force or kdtree for nearest neighbor search. For very large high-dimensional
         data sets, brute force (with parallel computation) performs faster than kdtree.
 
     :return communities: numpy integer array of community assignments for each row in data