diff --git a/README.md b/README.md index bdd1b35..181fb1d 100644 --- a/README.md +++ b/README.md @@ -55,13 +55,13 @@ Release Notes ### Version 1.5.2 - * Include simple parallel implementation of brute force nearest neighbors search using scipy's `cdist` and `multiprocessing`. This is now the default for nearest neighbors computations. It is typically more efficient than `kdtree` - and avoids memory issues that arise in `sklearn`'s implementation with large data sets + * Include simple parallel implementation of brute force nearest neighbors search using scipy's `cdist` and `multiprocessing`. This may be more efficient than `kdtree` on very large high-dimensional data sets + and avoids memory issues that arise in `sklearn`'s implementation. * Refactor `parallel_jaccard_kernel` to remove unnecessary use of `ctypes` and `multiprocessing.Array`. ### Version 1.5.1 - * Make `louvain_time_limit` a parameter to `phenograph.cluster` + * Make `louvain_time_limit` a parameter to `phenograph.cluster`. ### Version 1.5 diff --git a/phenograph/cluster.py b/phenograph/cluster.py index e9d04a5..2180051 100644 --- a/phenograph/cluster.py +++ b/phenograph/cluster.py @@ -30,7 +30,7 @@ def sort_by_size(clusters, min_size): def cluster(data, k=30, directed=False, prune=False, min_cluster_size=10, jaccard=True, primary_metric='euclidean', n_jobs=-1, q_tol=1e-3, louvain_time_limit=2000, - nn_method='brute'): + nn_method='kdtree'): """ PhenoGraph clustering @@ -54,7 +54,7 @@ def cluster(data, k=30, directed=False, prune=False, min_cluster_size=10, jaccar :param q_tol: Tolerance (i.e., precision) for monitoring modularity optimization :param louvain_time_limit: Maximum number of seconds to run modularity optimization. If exceeded the best result so far is returned - :param nn_method: Whether to use brute force or kdtree for nearest neighbor search. For most high-dimensional + :param nn_method: Whether to use brute force or kdtree for nearest neighbor search. For very large high-dimensional data sets, brute force (with parallel computation) performs faster than kdtree. :return communities: numpy integer array of community assignments for each row in data