From 7f4bc18699adb513a983ba183fdc7c989c219b85 Mon Sep 17 00:00:00 2001 From: Jacob Levine Date: Thu, 13 Oct 2016 14:08:20 -0400 Subject: [PATCH] Change default nn_method to 'kdtree' --- README.md | 6 +++--- phenograph/cluster.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index bdd1b35..181fb1d 100644 --- a/README.md +++ b/README.md @@ -55,13 +55,13 @@ Release Notes ### Version 1.5.2 - * Include simple parallel implementation of brute force nearest neighbors search using scipy's `cdist` and `multiprocessing`. This is now the default for nearest neighbors computations. It is typically more efficient than `kdtree` - and avoids memory issues that arise in `sklearn`'s implementation with large data sets + * Include simple parallel implementation of brute force nearest neighbors search using scipy's `cdist` and `multiprocessing`. This may be more efficient than `kdtree` on very large high-dimensional data sets + and avoids memory issues that arise in `sklearn`'s implementation. * Refactor `parallel_jaccard_kernel` to remove unnecessary use of `ctypes` and `multiprocessing.Array`. ### Version 1.5.1 - * Make `louvain_time_limit` a parameter to `phenograph.cluster` + * Make `louvain_time_limit` a parameter to `phenograph.cluster`. ### Version 1.5 diff --git a/phenograph/cluster.py b/phenograph/cluster.py index e9d04a5..2180051 100644 --- a/phenograph/cluster.py +++ b/phenograph/cluster.py @@ -30,7 +30,7 @@ def sort_by_size(clusters, min_size): def cluster(data, k=30, directed=False, prune=False, min_cluster_size=10, jaccard=True, primary_metric='euclidean', n_jobs=-1, q_tol=1e-3, louvain_time_limit=2000, - nn_method='brute'): + nn_method='kdtree'): """ PhenoGraph clustering @@ -54,7 +54,7 @@ def cluster(data, k=30, directed=False, prune=False, min_cluster_size=10, jaccar :param q_tol: Tolerance (i.e., precision) for monitoring modularity optimization :param louvain_time_limit: Maximum number of seconds to run modularity optimization. If exceeded the best result so far is returned - :param nn_method: Whether to use brute force or kdtree for nearest neighbor search. For most high-dimensional + :param nn_method: Whether to use brute force or kdtree for nearest neighbor search. For very large high-dimensional data sets, brute force (with parallel computation) performs faster than kdtree. :return communities: numpy integer array of community assignments for each row in data