From 339b27343a4ee00a4cb13b0b1bd712c093d2f309 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 18 Nov 2020 19:05:22 -0500
Subject: [PATCH 01/13] switch install references to pypi

---
 README.md               | 10 ++++++++--
 tutorial/10X_pbmc.ipynb |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index a1140e8..53b03b1 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,8 @@ Multiscale PHATE
 ================
 
 [![Latest PyPi version](https://img.shields.io/pypi/v/multiscale_phate.svg)](https://pypi.org/project/multiscale_phate/)
-[![Travis CI Build](https://api.travis-ci.com/KrishnaswamyLab/Multiscale_PHATE.svg?branch=master)](https://travis-ci.com/KrishnaswamyLab/Multiscale_PHATE)
-[![Coverage Status](https://coveralls.io/repos/github/KrishnaswamyLab/Multiscale_PHATE/badge.svg?branch=master)](https://coveralls.io/github/KrishnaswamyLab/Multiscale_PHATE?branch=master)
+[![Travis CI Build](https://api.travis-ci.com/KrishnaswamyLab/Multiscale_PHATE.svg?branch=main)](https://travis-ci.com/KrishnaswamyLab/Multiscale_PHATE)
+[![Coverage Status](https://coveralls.io/repos/github/KrishnaswamyLab/Multiscale_PHATE/badge.svg?branch=main)](https://coveralls.io/github/KrishnaswamyLab/Multiscale_PHATE?branch=main)
 [![Twitter](https://img.shields.io/twitter/follow/KrishnaswamyLab.svg?style=social&label=Follow)](https://twitter.com/KrishnaswamyLab)
 [![GitHub stars](https://img.shields.io/github/stars/KrishnaswamyLab/Multiscale_PHATE.svg?style=social&label=Stars)](https://github.com/KrishnaswamyLab/Multiscale_PHATE/)
 [![Code style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
@@ -19,6 +19,12 @@ Installation
 
 Multiscale PHATE is available on `pip`. Install by running the following in a terminal:
 
+```
+pip install --user multiscale_phate
+```
+
+If you wish to install from source, you may do so as follows:
+
 ```
 pip install --user git+https://github.com/KrishnaswamyLab/Multiscale_PHATE
 ```
diff --git a/tutorial/10X_pbmc.ipynb b/tutorial/10X_pbmc.ipynb
index 236d892..2c897b1 100644
--- a/tutorial/10X_pbmc.ipynb
+++ b/tutorial/10X_pbmc.ipynb
@@ -41,7 +41,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install --user -q git+https://github.com/KrishnaswamyLab/Multiscale_PHATE"
+    "!pip install --user -q multiscale_phate"
    ]
   },
   {

From 918e8e84e711e3144dab0068753c3191d4b9d91a Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 18 Nov 2020 19:06:19 -0500
Subject: [PATCH 02/13] bump version

---
 multiscale_phate/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/multiscale_phate/version.py b/multiscale_phate/version.py
index 76fe15d..8f02035 100644
--- a/multiscale_phate/version.py
+++ b/multiscale_phate/version.py
@@ -1 +1 @@
-__version__ = "0.0"
+__version__ = "0.1.0a0"

From dec7b2aa2ee1a00424b9dc5a388a9ae066520a97 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 18 Nov 2020 19:10:47 -0500
Subject: [PATCH 03/13] fix docs

---
 multiscale_phate/multiscale_phate.py | 57 +++++++++-------------------
 1 file changed, 18 insertions(+), 39 deletions(-)

diff --git a/multiscale_phate/multiscale_phate.py b/multiscale_phate/multiscale_phate.py
index df0765d..15823e7 100644
--- a/multiscale_phate/multiscale_phate.py
+++ b/multiscale_phate/multiscale_phate.py
@@ -2,7 +2,9 @@
 
 
 class Multiscale_PHATE(object):
-    """Multscale PHATE operator which performs dimensionality reduction and clustering across granularities.
+    """Multscale PHATE operator.
+
+    Performs dimensionality reduction and clustering across granularities.
 
     Parameters
     ----------
@@ -39,6 +41,13 @@ class Multiscale_PHATE(object):
         used at all, which is useful for debugging.
         For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for
         n_jobs = -2, all CPUs but one are used
+    random_state : integer or numpy.RandomState, optional, default: None
+        The generator used to initialize SMACOF (metric, nonmetric) MDS
+        If an integer is given, it fixes the seed
+        Defaults to the global `numpy` random number generator
+
+    Attributes
+    ----------
     NxTs : list of lists
         Cluster assignment for every point at all levels of Diffusion
         Condensation tree
@@ -70,37 +79,6 @@ class Multiscale_PHATE(object):
     levels : list
         List of salient resolutions for downstream analysis, computed via gradient
         analysis
-    random_state : integer or numpy.RandomState, optional, default: None
-        The generator used to initialize SMACOF (metric, nonmetric) MDS
-        If an integer is given, it fixes the seed
-        Defaults to the global `numpy` random number generator
-
-    Attributes
-    ----------
-    scale
-    landmarks
-    partitions
-    granularity
-    n_pca
-    decay
-    gamma
-    knn
-    n_jobs
-    NxTs
-    Xs
-    Ks
-    merges
-    Ps
-    diff_op
-    data_pca
-    pca_op
-    partition_clusters
-    dp_pca
-    epsilon
-    merge_threshold
-    gradient
-    levels
-
     """
 
     def __init__(
@@ -144,7 +122,7 @@ def __init__(
         super().__init__()
 
     def fit(self, X):
-        """Builds Diffusion Condensation tree and computes ideal resolutions.
+        """Build Diffusion Condensation tree and computes ideal resolutions.
 
         Parameters
         ----------
@@ -201,6 +179,7 @@ def transform(
         repulse=False,
     ):
         """Short summary.
+
         Parameters
         ----------
         visualization_level : int, default = levels[-2]
@@ -216,6 +195,7 @@ def transform(
             Cluster in 'coarse_cluster_level' to zoom in on.
         repulse  : bool, default = False
             Allows for repulsion between points in multiscale embedding.
+
         Returns
         -------
         embedding : array, shape=[number of points in visualization_level, 2]
@@ -228,7 +208,6 @@ def transform(
             Number of points aggregated into each point as visualized at
             the granularity of visualization_level
         """
-
         if visualization_level is None:
             visualization_level = self.levels[2]
         if cluster_level is None:
@@ -255,7 +234,7 @@ def transform(
             )
 
     def build_tree(self):
-        """Computes and returns a tree from the Diffusion Condensation process.
+        """Compute and returns a tree from the Diffusion Condensation process.
 
         Returns
         -------
@@ -268,8 +247,9 @@ def build_tree(self):
         )
 
     def fit_transform(self, X):
-        """Builds Diffusion Condensation tree, identifies ideal resolutions and returns
-         Multiscale PHATE embedding and clusters.
+        """Build Diffusion Condensation tree and identify ideal resolutions.
+
+        Returns Multiscale PHATE embedding and clusters.
 
         Parameters
         ----------
@@ -294,7 +274,7 @@ def fit_transform(self, X):
         return self.transform()
 
     def get_tree_clusters(self, cluster_level):
-        """Colors Diffusion Condensation tree by a granularity of clusters.
+        """Color Diffusion Condensation tree by a granularity of clusters.
 
         Parameters
         ----------
@@ -306,6 +286,5 @@ def get_tree_clusters(self, cluster_level):
         clusters_tree : list, shape=[n_points_aggregated]
             Cluster labels of each point in computed diffusion condensation tree
             as dictated by a granularity of the tree
-
         """
         return visualize.map_clusters_to_tree(self.NxTs[cluster_level], self.NxTs)

From e31f61fbad0250395bb72fdf096f7d6d22f9d5fd Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 18 Nov 2020 19:13:00 -0500
Subject: [PATCH 04/13] document hash

---
 multiscale_phate/utils.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/multiscale_phate/utils.py b/multiscale_phate/utils.py
index 63b013e..82c6125 100644
--- a/multiscale_phate/utils.py
+++ b/multiscale_phate/utils.py
@@ -2,17 +2,16 @@
 
 
 def hash_object(X):
-    """Short summary.
+    """Compute a unique hash of any Python object.
 
     Parameters
     ----------
-    X : type
-        Description of parameter `X`.
+    X : object
+        Object for which to compute unique hash
 
     Returns
     -------
-    type
-        Description of returned object.
-
+    hash : str
+        Unique hash based on pickle dump of X.
     """
     return hash(pickle.dumps(X))

From ca2ce05143531c9d1f3bb8b1434f01ed8caebc87 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 18 Nov 2020 19:14:51 -0500
Subject: [PATCH 05/13] add TODOs

---
 multiscale_phate/compress.py  | 110 +++++++++++++++++-----------------
 multiscale_phate/condense.py  |  72 +++++++++++-----------
 multiscale_phate/diffuse.py   |  52 ++++++++--------
 multiscale_phate/embed.py     | 105 ++++++++++++++++----------------
 multiscale_phate/tree.py      | 108 ++++++++++++++++-----------------
 multiscale_phate/visualize.py |  34 +++++------
 6 files changed, 238 insertions(+), 243 deletions(-)

diff --git a/multiscale_phate/compress.py b/multiscale_phate/compress.py
index 8f4d718..0d2800a 100644
--- a/multiscale_phate/compress.py
+++ b/multiscale_phate/compress.py
@@ -7,25 +7,25 @@
 
 
 def get_compression_features(N, features, n_pca, partitions, landmarks):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    N : type
-        Description of parameter `N`.
-    features : type
-        Description of parameter `features`.
-    n_pca : type
-        Description of parameter `n_pca`.
-    partitions : type
-        Description of parameter `partitions`.
-    landmarks : type
-        Description of parameter `landmarks`.
+    N : type TODO
+        Description of parameter `N`. TODO
+    features : type TODO
+        Description of parameter `features`. TODO
+    n_pca : type TODO
+        Description of parameter `n_pca`. TODO
+    partitions : type TODO
+        Description of parameter `partitions`. TODO
+    landmarks : type TODO
+        Description of parameter `landmarks`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     if n_pca == None:
@@ -49,16 +49,16 @@ def get_compression_features(N, features, n_pca, partitions, landmarks):
 
 
 def cluster_components(data_subset, num_cluster, size, random_state=None):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    data_subset : type
-        Description of parameter `data_subset`.
-    num_cluster : type
-        Description of parameter `num_cluster`.
-    size : type
-        Description of parameter `size`.
+    data_subset : type TODO
+        Description of parameter `data_subset`. TODO
+    num_cluster : type TODO
+        Description of parameter `num_cluster`. TODO
+    size : type TODO
+        Description of parameter `size`. TODO
     random_state : integer or numpy.RandomState, optional, default: None
         The generator used to initialize MiniBatchKMeans.
         If an integer is given, it fixes the seed.
@@ -66,8 +66,8 @@ def cluster_components(data_subset, num_cluster, size, random_state=None):
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     if data_subset.shape[0] == 1:
@@ -91,18 +91,18 @@ def cluster_components(data_subset, num_cluster, size, random_state=None):
 
 
 def subset_data(data, desired_num_clusters, n_jobs, num_cluster=100, random_state=None):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    data : type
-        Description of parameter `data`.
-    desired_num_clusters : type
-        Description of parameter `desired_num_clusters`.
-    n_jobs : type
-        Description of parameter `n_jobs`.
-    num_cluster : type
-        Description of parameter `num_cluster`.
+    data : type TODO
+        Description of parameter `data`. TODO
+    desired_num_clusters : type TODO
+        Description of parameter `desired_num_clusters`. TODO
+    n_jobs : type TODO
+        Description of parameter `n_jobs`. TODO
+    num_cluster : type TODO
+        Description of parameter `num_cluster`. TODO
     random_state : integer or numpy.RandomState, optional, default: None
         The generator used to initialize MiniBatchKMeans.
         If an integer is given, it fixes the seed.
@@ -110,8 +110,8 @@ def subset_data(data, desired_num_clusters, n_jobs, num_cluster=100, random_stat
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     N = data.shape[0]
@@ -156,19 +156,19 @@ def subset_data(data, desired_num_clusters, n_jobs, num_cluster=100, random_stat
 
 
 def merge_clusters(diff_pot_unmerged, clusters):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    diff_pot_unmerged : type
-        Description of parameter `diff_pot_unmerged`.
-    clusters : type
-        Description of parameter `clusters`.
+    diff_pot_unmerged : type TODO
+        Description of parameter `diff_pot_unmerged`. TODO
+    clusters : type TODO
+        Description of parameter `clusters`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     clusters_uni = np.unique(clusters)
@@ -202,27 +202,27 @@ def get_distance_from_centroids(centroids, data, clusters):
 
 
 def map_update_data(centroids, data, new_data, partition_clusters, nn=5, n_jobs=10):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    centroids : type
-        Description of parameter `centroids`.
-    data : type
-        Description of parameter `data`.
-    new_data : type
-        Description of parameter `new_data`.
-    partition_clusters : type
-        Description of parameter `partition_clusters`.
-    nn : type
-        Description of parameter `nn`.
-    n_jobs : type
-        Description of parameter `n_jobs`.
+    centroids : type TODO
+        Description of parameter `centroids`. TODO
+    data : type TODO
+        Description of parameter `data`. TODO
+    new_data : type TODO
+        Description of parameter `new_data`. TODO
+    partition_clusters : type TODO
+        Description of parameter `partition_clusters`. TODO
+    nn : type TODO
+        Description of parameter `nn`. TODO
+    n_jobs : type TODO
+        Description of parameter `n_jobs`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     with tasklogger.log_task("map to computed partitions"):
diff --git a/multiscale_phate/condense.py b/multiscale_phate/condense.py
index 97fc560..e62b901 100644
--- a/multiscale_phate/condense.py
+++ b/multiscale_phate/condense.py
@@ -9,21 +9,21 @@
 
 
 def comp(node, neigh, visited):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    node : type
-        Description of parameter `node`.
-    neigh : type
-        Description of parameter `neigh`.
-    visited : type
-        Description of parameter `visited`.
+    node : type TODO
+        Description of parameter `node`. TODO
+    neigh : type TODO
+        Description of parameter `neigh`. TODO
+    visited : type TODO
+        Description of parameter `visited`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     vis = visited.add
@@ -37,17 +37,17 @@ def comp(node, neigh, visited):
 
 
 def merge_common(lists):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    lists : type
-        Description of parameter `lists`.
+    lists : type TODO
+        Description of parameter `lists`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     neigh = collections.defaultdict(set)
@@ -62,19 +62,19 @@ def merge_common(lists):
 
 
 def compute_condensation_param(X, granularity):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    X : type
-        Description of parameter `X`.
-    granularity : type
-        Description of parameter `granularity`.
+    X : type TODO
+        Description of parameter `X`. TODO
+    granularity : type TODO
+        Description of parameter `granularity`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     epsilon = granularity * (0.1 * np.mean(np.std(X))) / (X.shape[0] ** (-1 / 5))
@@ -86,22 +86,22 @@ def compute_condensation_param(X, granularity):
 
 
 def condense(X, clusters, scale, epsilon, merge_threshold, n_jobs, random_state=None):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    X : type
-        Description of parameter `X`.
-    clusters : type
-        Description of parameter `clusters`.
-    scale : type
-        Description of parameter `scale`.
-    epsilon : type
-        Description of parameter `epsilon`.
-    merge_threshold : type
-        Description of parameter `merge_threshold`.
-    n_jobs : type
-        Description of parameter `n_jobs`.
+    X : type TODO
+        Description of parameter `X`. TODO
+    clusters : type TODO
+        Description of parameter `clusters`. TODO
+    scale : type TODO
+        Description of parameter `scale`. TODO
+    epsilon : type TODO
+        Description of parameter `epsilon`. TODO
+    merge_threshold : type TODO
+        Description of parameter `merge_threshold`. TODO
+    n_jobs : type TODO
+        Description of parameter `n_jobs`. TODO
     random_state : integer or numpy.RandomState, optional, default: None
         The generator used to initialize graphtools.
         If an integer is given, it fixes the seed.
@@ -109,8 +109,8 @@ def condense(X, clusters, scale, epsilon, merge_threshold, n_jobs, random_state=
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     NxT = []
diff --git a/multiscale_phate/diffuse.py b/multiscale_phate/diffuse.py
index 7241cad..4b7bbac 100644
--- a/multiscale_phate/diffuse.py
+++ b/multiscale_phate/diffuse.py
@@ -9,24 +9,24 @@
 def compute_diffusion_potential(
     data, N, decay, gamma, knn, landmarks=2000, n_jobs=10, random_state=None
 ):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    data : type
-        Description of parameter `data`.
-    N : type
-        Description of parameter `N`.
-    decay : type
-        Description of parameter `decay`.
-    gamma : type
-        Description of parameter `gamma`.
-    knn : type
-        Description of parameter `knn`.
-    landmarks : type
-        Description of parameter `landmarks`.
-    n_jobs : type
-        Description of parameter `n_jobs`.
+    data : type TODO
+        Description of parameter `data`. TODO
+    N : type TODO
+        Description of parameter `N`. TODO
+    decay : type TODO
+        Description of parameter `decay`. TODO
+    gamma : type TODO
+        Description of parameter `gamma`. TODO
+    knn : type TODO
+        Description of parameter `knn`. TODO
+    landmarks : type TODO
+        Description of parameter `landmarks`. TODO
+    n_jobs : type TODO
+        Description of parameter `n_jobs`. TODO
     random_state : integer or numpy.RandomState, optional, default: None
         The generator used to initialize PHATE and PCA.
         If an integer is given, it fixes the seed.
@@ -34,8 +34,8 @@ def compute_diffusion_potential(
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     with tasklogger.log_task("diffusion potential"):
@@ -68,21 +68,21 @@ def compute_diffusion_potential(
 
 
 def online_update_diffusion_potential(unmapped_data, diff_op, dp_pca):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    unmapped_data : type
-        Description of parameter `unmapped_data`.
-    diff_op : type
-        Description of parameter `diff_op`.
-    dp_pca : type
-        Description of parameter `dp_pca`.
+    unmapped_data : type TODO
+        Description of parameter `unmapped_data`. TODO
+    diff_op : type TODO
+        Description of parameter `diff_op`. TODO
+    dp_pca : type TODO
+        Description of parameter `dp_pca`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     with tasklogger.log_task("extended diffusion potential"):
diff --git a/multiscale_phate/embed.py b/multiscale_phate/embed.py
index 07fec97..8099be7 100644
--- a/multiscale_phate/embed.py
+++ b/multiscale_phate/embed.py
@@ -4,17 +4,17 @@
 
 
 def repulsion(temp):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    temp : type
-        Description of parameter `temp`.
+    temp : type TODO
+        Description of parameter `temp`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     for r in range(temp.shape[0]):
@@ -29,19 +29,19 @@ def repulsion(temp):
 
 
 def condense_visualization(merge_pairs, phate):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    merge_pairs : type
-        Description of parameter `merge_pairs`.
-    phate : type
-        Description of parameter `phate`.
+    merge_pairs : type TODO
+        Description of parameter `merge_pairs`. TODO
+    phate : type TODO
+        Description of parameter `phate`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     to_delete = []
@@ -54,19 +54,19 @@ def condense_visualization(merge_pairs, phate):
 
 
 def compute_gradient(Xs, merges):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    Xs : type
-        Description of parameter `Xs`.
-    merges : type
-        Description of parameter `merges`.
+    Xs : type TODO
+        Description of parameter `Xs`. TODO
+    merges : type TODO
+        Description of parameter `merges`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     tasklogger.log_info("Computing gradient...")
@@ -89,19 +89,17 @@ def compute_gradient(Xs, merges):
 
 
 def get_levels(grad):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    grad : type
-        Description of parameter `Xs`.
+    grad : type TODO
+        Description of parameter `Xs`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
-
-
+    type TODO
+        Description of returned object. TODO
     """
     tasklogger.log_info("Identifying salient levels of resolution...")
     minimum = np.max(grad)
@@ -125,11 +123,11 @@ def get_zoom_visualization(
     n_jobs,
     random_state=None,
 ):
-    """Short summary
+    """Short summary TODO
 
     Parameters
     ----------
-
+    TODO
     random_state : integer or numpy.RandomState, optional, default: None
         The generator used to initialize MDS.
         If an integer is given, it fixes the seed.
@@ -148,22 +146,21 @@ def get_zoom_visualization(
 
 
 def compute_ideal_visualization_layer(gradient, Xs, min_cells=100):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    gradient : type
-        Description of parameter `gradient`.
-    Xs : type
-        Description of parameter `Xs`.
-    min_cells : type
-        Description of parameter `min_cells`.
+    gradient : type TODO
+        Description of parameter `gradient`. TODO
+    Xs : type TODO
+        Description of parameter `Xs`. TODO
+    min_cells : type TODO
+        Description of parameter `min_cells`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
-
+    type TODO
+        Description of returned object. TODO
     """
     minimum = np.max(gradient)
     min_layer = 0
@@ -181,23 +178,22 @@ def compute_ideal_visualization_layer(gradient, Xs, min_cells=100):
 def get_clusters_sizes_2(
     clusters_full, layer, NxT, X, repulse=False, n_jobs=10, random_state=None
 ):
-    """Short summary.
+    """Short summary. TODO
 
-    Parameters
     Parameters
     ----------
-    clusters_full : type
-        Description of parameter `clusters_full`.
-    layer : type
-        Description of parameter `layer`.
-    NxT : type
-        Description of parameter `NxT`.
-    X : type
-        Description of parameter `X`.
-    repulse : type
-        Description of parameter `repulse`.
-    n_jobs : type
-        Description of parameter `n_jobs`.
+    clusters_full : type TODO
+        Description of parameter `clusters_full`. TODO
+    layer : type TODO
+        Description of parameter `layer`. TODO
+    NxT : type TODO
+        Description of parameter `NxT`. TODO
+    X : type TODO
+        Description of parameter `X`. TODO
+    repulse : type TODO
+        Description of parameter `repulse`. TODO
+    n_jobs : type TODO
+        Description of parameter `n_jobs`. TODO
     random_state : integer or numpy.RandomState, optional, default: None
         The generator used to initialize MDS.
         If an integer is given, it fixes the seed.
@@ -205,9 +201,8 @@ def get_clusters_sizes_2(
 
     Returns
     -------
-    type
-        Description of returned object.
-
+    type TODO
+        Description of returned object. TODO
     """
     unique = np.unique(NxT[layer], return_index=True, return_counts=True)
 
diff --git a/multiscale_phate/tree.py b/multiscale_phate/tree.py
index 807a454..b80cf77 100644
--- a/multiscale_phate/tree.py
+++ b/multiscale_phate/tree.py
@@ -17,30 +17,30 @@ def build_tree(
     n_jobs=10,
     random_state=None,
 ):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    data_input : type
-        Description of parameter `data_input`.
-    scale : type
-        Description of parameter `scale`.
-    landmarks : type
-        Description of parameter `landmarks`.
-    partitions : type
-        Description of parameter `partitions`.
-    granularity : type
-        Description of parameter `granularity`.
-    n_pca : type
-        Description of parameter `n_pca`.
-    decay : type
-        Description of parameter `decay`.
-    gamma : type
-        Description of parameter `gamma`.
-    knn : type
-        Description of parameter `knn`.
-    n_jobs : type
-        Description of parameter `n_jobs`.
+    data_input : type TODO
+        Description of parameter `data_input`. TODO
+    scale : type TODO
+        Description of parameter `scale`. TODO
+    landmarks : type TODO
+        Description of parameter `landmarks`. TODO
+    partitions : type TODO
+        Description of parameter `partitions`. TODO
+    granularity : type TODO
+        Description of parameter `granularity`. TODO
+    n_pca : type TODO
+        Description of parameter `n_pca`. TODO
+    decay : type TODO
+        Description of parameter `decay`. TODO
+    gamma : type TODO
+        Description of parameter `gamma`. TODO
+    knn : type TODO
+        Description of parameter `knn`. TODO
+    n_jobs : type TODO
+        Description of parameter `n_jobs`. TODO
     random_state : integer or numpy.RandomState, optional, default: None
         The random number generator.
         If an integer is given, it fixes the seed.
@@ -48,8 +48,8 @@ def build_tree(
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     with tasklogger.log_task("Multiscale PHATE tree"):
@@ -124,38 +124,38 @@ def online_update_tree(
     n_jobs=10,
     random_state=None,
 ):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    data_1 : type
-        Description of parameter `data_1`.
-    data_2 : type
-        Description of parameter `data_2`.
-    pca_centroid : type
-        Description of parameter `pca_centroid`.
-    pca_op : type
-        Description of parameter `pca_op`.
-    partitions : type
-        Description of parameter `partitions`.
-    diff_operator : type
-        Description of parameter `diff_operator`.
-    diff_pca_op : type
-        Description of parameter `diff_pca_op`.
-    Xs : type
-        Description of parameter `Xs`.
-    NxTs : type
-        Description of parameter `NxTs`.
-    Ks : type
-        Description of parameter `Ks`.
-    Merges : type
-        Description of parameter `Merges`.
-    Ps : type
-        Description of parameter `Ps`.
-    scale : type
-        Description of parameter `scale`.
-    n_jobs : type
-        Description of parameter `n_jobs`.
+    data_1 : type TODO
+        Description of parameter `data_1`. TODO
+    data_2 : type TODO
+        Description of parameter `data_2`. TODO
+    pca_centroid : type TODO
+        Description of parameter `pca_centroid`. TODO
+    pca_op : type TODO
+        Description of parameter `pca_op`. TODO
+    partitions : type TODO
+        Description of parameter `partitions`. TODO
+    diff_operator : type TODO
+        Description of parameter `diff_operator`. TODO
+    diff_pca_op : type TODO
+        Description of parameter `diff_pca_op`. TODO
+    Xs : type TODO
+        Description of parameter `Xs`. TODO
+    NxTs : type TODO
+        Description of parameter `NxTs`. TODO
+    Ks : type TODO
+        Description of parameter `Ks`. TODO
+    Merges : type TODO
+        Description of parameter `Merges`. TODO
+    Ps : type TODO
+        Description of parameter `Ps`. TODO
+    scale : type TODO
+        Description of parameter `scale`. TODO
+    n_jobs : type TODO
+        Description of parameter `n_jobs`. TODO
     random_state : integer or numpy.RandomState, optional, default: None
         The random number generator.
         If an integer is given, it fixes the seed.
@@ -163,8 +163,8 @@ def online_update_tree(
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     with tasklogger.log_task("Multiscale PHATE tree mapping"):
diff --git a/multiscale_phate/visualize.py b/multiscale_phate/visualize.py
index e82a7b9..54650b0 100644
--- a/multiscale_phate/visualize.py
+++ b/multiscale_phate/visualize.py
@@ -8,15 +8,15 @@
 def get_visualization(
     Xs, NxTs, cluster_level, visualization_level, repulse, random_state=None
 ):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    Xs : type
+    Xs : type TODO
         Description of parameter `Xs`.
-    NxTs : type
+    NxTs : type TODO
         Description of parameter `NxTs`.
-    merges : type
+    merges : type TODO
         Description of parameter `merges`.
     random_state : integer or numpy.RandomState, optional, default: None
         The generator used to initialize MDS.
@@ -25,7 +25,7 @@ def get_visualization(
 
     Returns
     -------
-    type
+    type TODO
         Description of returned object.
 
     """
@@ -41,15 +41,15 @@ def get_visualization(
 
 
 def build_visualization(Xs, NxTs, merges, gradient, min_cells, random_state=None):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    Xs : type
+    Xs : type TODO
         Description of parameter `Xs`.
-    NxTs : type
+    NxTs : type TODO
         Description of parameter `NxTs`.
-    merges : type
+    merges : type TODO
         Description of parameter `merges`.
     random_state : integer or numpy.RandomState, optional, default: None
         The generator used to initialize MDS.
@@ -58,7 +58,7 @@ def build_visualization(Xs, NxTs, merges, gradient, min_cells, random_state=None
 
     Returns
     -------
-    type
+    type TODO
         Description of returned object.
 
     """
@@ -86,24 +86,24 @@ def map_clusters_to_tree(clusters, NxTs):
 
 
 def build_condensation_tree(data_pca, diff_op, NxT, merged_list, Ps):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    data_pca : type
+    data_pca : type TODO
         Description of parameter `data_pca`.
-    diff_op : type
+    diff_op : type TODO
         Description of parameter `diff_op`.
-    NxT : type
+    NxT : type TODO
         Description of parameter `NxT`.
-    merged_list : type
+    merged_list : type TODO
         Description of parameter `merged_list`.
-    Ps : type
+    Ps : type TODO
         Description of parameter `Ps`.
 
     Returns
     -------
-    type
+    type TODO
         Description of returned object.
 
     """

From 817015243a3d1bf0aff2e898ae5f95714e94e593 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 18 Nov 2020 19:21:32 -0500
Subject: [PATCH 06/13] add verbosity

---
 multiscale_phate/compress.py         |  7 +++++--
 multiscale_phate/condense.py         |  8 +++++---
 multiscale_phate/diffuse.py          | 14 +++++++++-----
 multiscale_phate/embed.py            |  6 ++++--
 multiscale_phate/multiscale_phate.py | 13 +++++++++++++
 multiscale_phate/tree.py             | 29 ++++++++++++++++++++--------
 multiscale_phate/visualize.py        |  6 ++++--
 7 files changed, 61 insertions(+), 22 deletions(-)

diff --git a/multiscale_phate/compress.py b/multiscale_phate/compress.py
index 0d2800a..6414ac2 100644
--- a/multiscale_phate/compress.py
+++ b/multiscale_phate/compress.py
@@ -6,6 +6,9 @@
 import scipy.spatial.distance
 
 
+_logger = tasklogger.get_tasklogger("graphtools")
+
+
 def get_compression_features(N, features, n_pca, partitions, landmarks):
     """Short summary. TODO
 
@@ -116,7 +119,7 @@ def subset_data(data, desired_num_clusters, n_jobs, num_cluster=100, random_stat
     """
     N = data.shape[0]
     size = int(N / desired_num_clusters)
-    with tasklogger.log_task("partitions"):
+    with _logger.task("partitions"):
 
         mbk = sklearn.cluster.MiniBatchKMeans(
             init="k-means++",
@@ -225,7 +228,7 @@ def map_update_data(centroids, data, new_data, partition_clusters, nn=5, n_jobs=
         Description of returned object. TODO
 
     """
-    with tasklogger.log_task("map to computed partitions"):
+    with _logger.task("map to computed partitions"):
         # getting max distance to each partition centroid
         distance_merged = get_distance_from_centroids(
             centroids, data, partition_clusters
diff --git a/multiscale_phate/condense.py b/multiscale_phate/condense.py
index e62b901..1079463 100644
--- a/multiscale_phate/condense.py
+++ b/multiscale_phate/condense.py
@@ -7,6 +7,8 @@
 import scipy.spatial.distance
 import sklearn.metrics.pairwise
 
+_logger = tasklogger.get_tasklogger("graphtools")
+
 
 def comp(node, neigh, visited):
     """Short summary. TODO
@@ -80,8 +82,8 @@ def compute_condensation_param(X, granularity):
     epsilon = granularity * (0.1 * np.mean(np.std(X))) / (X.shape[0] ** (-1 / 5))
     D = scipy.spatial.distance.pdist(X, metric="euclidean")
     merge_threshold = np.percentile(D, 0.001) + 0.001
-    tasklogger.log_info("Setting epsilon to " + str(round(epsilon, 4)))
-    tasklogger.log_info("Setting merge threshold to " + str(round(merge_threshold, 4)))
+    _logger.info("Setting epsilon to " + str(round(epsilon, 4)))
+    _logger.info("Setting merge threshold to " + str(round(merge_threshold, 4)))
     return epsilon, merge_threshold
 
 
@@ -130,7 +132,7 @@ def condense(X, clusters, scale, epsilon, merge_threshold, n_jobs, random_state=
     X_list.append(X_1)
     P_list = []
     merged = []
-    with tasklogger.log_task("condensation"):
+    with _logger.task("condensation"):
         while X_1.shape[0] > 1:
             D = sklearn.metrics.pairwise.pairwise_distances(
                 X_1, metric="euclidean", n_jobs=n_jobs
diff --git a/multiscale_phate/diffuse.py b/multiscale_phate/diffuse.py
index 4b7bbac..f8c4c48 100644
--- a/multiscale_phate/diffuse.py
+++ b/multiscale_phate/diffuse.py
@@ -5,9 +5,11 @@
 
 from . import compress
 
+_logger = tasklogger.get_tasklogger("graphtools")
+
 
 def compute_diffusion_potential(
-    data, N, decay, gamma, knn, landmarks=2000, n_jobs=10, random_state=None
+    data, N, decay, gamma, knn, landmarks=2000, n_jobs=10, verbose=0, random_state=None
 ):
     """Short summary. TODO
 
@@ -27,6 +29,8 @@ def compute_diffusion_potential(
         Description of parameter `landmarks`. TODO
     n_jobs : type TODO
         Description of parameter `n_jobs`. TODO
+    verbose : `int`, optional (default: 0)
+        If `> 0`, print status messages
     random_state : integer or numpy.RandomState, optional, default: None
         The generator used to initialize PHATE and PCA.
         If an integer is given, it fixes the seed.
@@ -38,19 +42,19 @@ def compute_diffusion_potential(
         Description of returned object. TODO
 
     """
-    with tasklogger.log_task("diffusion potential"):
+    with _logger.task("diffusion potential"):
 
         if landmarks != None and landmarks > data.shape[0]:
             landmarks = None
 
         diff_op = phate.PHATE(
-            verbose=False,
             n_landmark=landmarks,
             decay=decay,
             gamma=gamma,
             n_pca=None,
             knn=knn,
             n_jobs=n_jobs,
+            verbose=verbose,
             random_state=random_state,
         )
         diff_op.fit(data)
@@ -85,8 +89,8 @@ def online_update_diffusion_potential(unmapped_data, diff_op, dp_pca):
         Description of returned object. TODO
 
     """
-    with tasklogger.log_task("extended diffusion potential"):
-        with tasklogger.log_task("extended kernel"):
+    with _logger.task("extended diffusion potential"):
+        with _logger.task("extended kernel"):
             # Extending kernel to new data
             transitions = diff_op.graph.extend_to_data(unmapped_data)
 
diff --git a/multiscale_phate/embed.py b/multiscale_phate/embed.py
index 8099be7..b318177 100644
--- a/multiscale_phate/embed.py
+++ b/multiscale_phate/embed.py
@@ -2,6 +2,8 @@
 import phate
 import tasklogger
 
+_logger = tasklogger.get_tasklogger("graphtools")
+
 
 def repulsion(temp):
     """Short summary. TODO
@@ -69,7 +71,7 @@ def compute_gradient(Xs, merges):
         Description of returned object. TODO
 
     """
-    tasklogger.log_info("Computing gradient...")
+    _logger.info("Computing gradient...")
     gradient = []
     m = 0
     X = Xs[0]
@@ -101,7 +103,7 @@ def get_levels(grad):
     type TODO
         Description of returned object. TODO
     """
-    tasklogger.log_info("Identifying salient levels of resolution...")
+    _logger.info("Identifying salient levels of resolution...")
     minimum = np.max(grad)
     levels = []
     levels.append(0)
diff --git a/multiscale_phate/multiscale_phate.py b/multiscale_phate/multiscale_phate.py
index 15823e7..53d08b0 100644
--- a/multiscale_phate/multiscale_phate.py
+++ b/multiscale_phate/multiscale_phate.py
@@ -1,5 +1,9 @@
+import tasklogger
+
 from . import tree, embed, utils, visualize
 
+_logger = tasklogger.get_tasklogger("graphtools")
+
 
 class Multiscale_PHATE(object):
     """Multscale PHATE operator.
@@ -41,6 +45,8 @@ class Multiscale_PHATE(object):
         used at all, which is useful for debugging.
         For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for
         n_jobs = -2, all CPUs but one are used
+    verbose : `int` or `boolean`, optional (default: 1)
+        If `True` or `> 0`, print status messages
     random_state : integer or numpy.RandomState, optional, default: None
         The generator used to initialize SMACOF (metric, nonmetric) MDS
         If an integer is given, it fixes the seed
@@ -92,6 +98,7 @@ def __init__(
         gamma=1,
         knn=5,
         n_jobs=1,
+        verbose=1,
         random_state=None,
     ):
         self.scale = scale
@@ -103,7 +110,12 @@ def __init__(
         self.gamma = gamma
         self.knn = knn
         self.n_jobs = n_jobs
+        self.verbose = verbose
         self.random_state = random_state
+
+        _logger.set_level(int(verbose))
+
+        # TODO: remove all of the below? Why are they here
         self.NxTs = None
         self.Xs = None
         self.Ks = None
@@ -162,6 +174,7 @@ def fit(self, X):
             gamma=self.gamma,
             knn=self.knn,
             n_jobs=self.n_jobs,
+            verbose=self.verbose,
             random_state=self.random_state,
         )
 
diff --git a/multiscale_phate/tree.py b/multiscale_phate/tree.py
index b80cf77..12af8d4 100644
--- a/multiscale_phate/tree.py
+++ b/multiscale_phate/tree.py
@@ -3,6 +3,8 @@
 import sklearn.decomposition
 from . import compress, diffuse, condense
 
+_logger = tasklogger.get_tasklogger("graphtools")
+
 
 def build_tree(
     data_input,
@@ -15,6 +17,7 @@ def build_tree(
     gamma=1,
     knn=5,
     n_jobs=10,
+    verbose=1,
     random_state=None,
 ):
     """Short summary. TODO
@@ -41,6 +44,8 @@ def build_tree(
         Description of parameter `knn`. TODO
     n_jobs : type TODO
         Description of parameter `n_jobs`. TODO
+    verbose : `int`, optional (default: 1)
+        If `> 0`, print status messages
     random_state : integer or numpy.RandomState, optional, default: None
         The random number generator.
         If an integer is given, it fixes the seed.
@@ -52,7 +57,7 @@ def build_tree(
         Description of returned object. TODO
 
     """
-    with tasklogger.log_task("Multiscale PHATE tree"):
+    with _logger.task("Multiscale PHATE tree"):
         N, features = data_input.shape
 
         # Computing compression features
@@ -60,7 +65,7 @@ def build_tree(
             N, features, n_pca, partitions, landmarks
         )
 
-        with tasklogger.log_task("PCA"):
+        with _logger.task("PCA"):
             pca_op = sklearn.decomposition.PCA(n_components=n_pca)
             data_pca = pca_op.fit_transform(np.array(data_input))
         clusters = np.arange(N)
@@ -74,7 +79,15 @@ def build_tree(
             clusters = partition_clusters
 
         X, diff_op, diff_pca = diffuse.compute_diffusion_potential(
-            data_pca, N, decay, gamma, knn, landmarks, n_jobs, random_state=random_state
+            data_pca,
+            N,
+            decay,
+            gamma,
+            knn,
+            landmarks,
+            n_jobs,
+            verbose=verbose - 1,
+            random_state=random_state,
         )
 
         epsilon, merge_threshold = condense.compute_condensation_param(
@@ -167,9 +180,9 @@ def online_update_tree(
         Description of returned object. TODO
 
     """
-    with tasklogger.log_task("Multiscale PHATE tree mapping"):
+    with _logger.task("Multiscale PHATE tree mapping"):
         if data_1.shape[0] != len(np.unique(partitions)):
-            tasklogger.log_info("PCA compressing new data...")
+            _logger.info("PCA compressing new data...")
             data_pca_1 = pca_op.transform(np.array(data_1))
             data_pca_2 = pca_op.transform(np.array(data_2))
 
@@ -177,7 +190,7 @@ def online_update_tree(
             partition_assignments = compress.map_update_data(
                 pca_centroid, data_pca_1, data_pca_2, partitions, nn=5, n_jobs=n_jobs
             )
-            tasklogger.log_info(
+            _logger.info(
                 "Points not mapped to partitions: "
                 + str(sum(partition_assignments == -1))
             )
@@ -223,7 +236,7 @@ def online_update_tree(
 
             else:
                 clusters = new_partition_clusters
-                tasklogger.log_info("Rebuilding condensation tree...")
+                _logger.info("Rebuilding condensation tree...")
                 clusters_idx = []
 
                 for c in clusters:
@@ -236,7 +249,7 @@ def online_update_tree(
                 return NxTs_l, Xs, Ks, Merges, Ps, pca_centroid
 
         else:
-            tasklogger.log_info("PCA compressing new data...")
+            _logger.info("PCA compressing new data...")
             data_pca_2 = pca_op.transform(np.array(data_2))
             diff_pot_1 = diffuse.online_update_diffusion_potential(
                 data_pca_2, diff_operator, diff_pca_op
diff --git a/multiscale_phate/visualize.py b/multiscale_phate/visualize.py
index 54650b0..cc53626 100644
--- a/multiscale_phate/visualize.py
+++ b/multiscale_phate/visualize.py
@@ -4,6 +4,8 @@
 
 from . import embed
 
+_logger = tasklogger.get_tasklogger("graphtools")
+
 
 def get_visualization(
     Xs, NxTs, cluster_level, visualization_level, repulse, random_state=None
@@ -107,7 +109,7 @@ def build_condensation_tree(data_pca, diff_op, NxT, merged_list, Ps):
         Description of returned object.
 
     """
-    with tasklogger.log_task("base visualization"):
+    with _logger.task("base visualization"):
         with warnings.catch_warnings():
             warnings.filterwarnings(
                 "ignore",
@@ -131,7 +133,7 @@ def build_condensation_tree(data_pca, diff_op, NxT, merged_list, Ps):
 
     m = 0
 
-    with tasklogger.log_task("tree"):
+    with _logger.task("tree"):
         for l in range(0, len(Ps)):
             if len(np.unique(NxT[l])) != len(np.unique(NxT[l + 1])):
                 tree_phate_1 = embed.condense_visualization(merged_list[m], tree_phate)

From 47dc43d1c278a97cdf2ac8469111dea2ed266204 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 18 Nov 2020 19:22:59 -0500
Subject: [PATCH 07/13] assert results different with different random seed

---
 test/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test.py b/test/test.py
index 68369f3..27d48ab 100644
--- a/test/test.py
+++ b/test/test.py
@@ -74,4 +74,4 @@ def test_random_seed():
     mp_op = multiscale_phate.Multiscale_PHATE(partitions=100, landmarks=50)
     hp_embedding, _, _ = mp_op.fit_transform(X)
     hp_embedding2, _, _ = mp_op.fit_transform(X)
-    # np.testing.assert_all_close(hp_embedding, hp_embedding2)
+    assert not np.all(hp_embedding == hp_embedding2)

From 22e724f4d6432d0c618d9a633955e8fde979e573 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 18 Nov 2020 19:30:08 -0500
Subject: [PATCH 08/13] Fix flake8 errors

---
 multiscale_phate/compress.py  | 24 ++++++++++++++++++++----
 multiscale_phate/condense.py  |  2 --
 multiscale_phate/diffuse.py   |  2 +-
 multiscale_phate/embed.py     | 21 ++++++++++-----------
 multiscale_phate/tree.py      | 11 ++++++-----
 multiscale_phate/visualize.py | 35 +++++++++++++++++++++++++----------
 6 files changed, 62 insertions(+), 33 deletions(-)

diff --git a/multiscale_phate/compress.py b/multiscale_phate/compress.py
index 6414ac2..33fa921 100644
--- a/multiscale_phate/compress.py
+++ b/multiscale_phate/compress.py
@@ -31,7 +31,7 @@ def get_compression_features(N, features, n_pca, partitions, landmarks):
         Description of returned object. TODO
 
     """
-    if n_pca == None:
+    if n_pca is None:
         n_pca = min(N, features)
     if n_pca > 100:
         n_pca = 100
@@ -40,10 +40,10 @@ def get_compression_features(N, features, n_pca, partitions, landmarks):
 
     # if N<100000:
     #     partitions=None
-    if partitions != None and partitions >= N:
+    if partitions is not None and partitions >= N:
         partitions = None
 
-    if partitions != None and partitions > 50000:
+    if partitions is not None and partitions > 50000:
         partitions = 50000
     elif N > 100000:
         partitions = 20000
@@ -188,6 +188,22 @@ def merge_clusters(diff_pot_unmerged, clusters):
 
 
 def get_distance_from_centroids(centroids, data, clusters):
+    """Short summary.
+
+    Parameters
+    ----------
+    centroids : type
+        Description of parameter `centroids`.
+    data : type
+        Description of parameter `data`.
+    clusters : type
+        Description of parameter `clusters`.
+
+    Returns
+    -------
+    type
+        Description of returned object.
+    """
     distance = np.zeros(centroids.shape[0])
 
     for c in range(centroids.shape[0]):
@@ -249,7 +265,7 @@ def map_update_data(centroids, data, new_data, partition_clusters, nn=5, n_jobs=
         for r in range(len(subset_partition_assignment)):
             c = 0
             while c < nn:
-                if parition_assignment_bool[r, c] == True:
+                if parition_assignment_bool[r, c] is True:
                     subset_partition_assignment[r] = neighbor_idx[r, c]
                     c = nn + 1
                     break
diff --git a/multiscale_phate/condense.py b/multiscale_phate/condense.py
index 1079463..4a6251d 100644
--- a/multiscale_phate/condense.py
+++ b/multiscale_phate/condense.py
@@ -120,8 +120,6 @@ def condense(X, clusters, scale, epsilon, merge_threshold, n_jobs, random_state=
     NxT.append(clusters)
     X_cont = []
 
-    N = X.shape[0]
-
     for c in range(len(np.unique(clusters))):
         loc = np.where(c == clusters)[0]
         X_cont.append(list(loc))
diff --git a/multiscale_phate/diffuse.py b/multiscale_phate/diffuse.py
index f8c4c48..425b000 100644
--- a/multiscale_phate/diffuse.py
+++ b/multiscale_phate/diffuse.py
@@ -44,7 +44,7 @@ def compute_diffusion_potential(
     """
     with _logger.task("diffusion potential"):
 
-        if landmarks != None and landmarks > data.shape[0]:
+        if landmarks is not None and landmarks > data.shape[0]:
             landmarks = None
 
         diff_op = phate.PHATE(
diff --git a/multiscale_phate/embed.py b/multiscale_phate/embed.py
index b318177..9214193 100644
--- a/multiscale_phate/embed.py
+++ b/multiscale_phate/embed.py
@@ -76,17 +76,17 @@ def compute_gradient(Xs, merges):
     m = 0
     X = Xs[0]
 
-    for l in range(0, len(Xs) - 1):
-        if X.shape[0] != Xs[l + 1].shape[0]:
+    for layer in range(0, len(Xs) - 1):
+        if X.shape[0] != Xs[layer + 1].shape[0]:
             X_1 = condense_visualization(merges[m], X)
             m = m + 1
-            while X_1.shape[0] != Xs[l + 1].shape[0]:
+            while X_1.shape[0] != Xs[layer + 1].shape[0]:
                 X_1 = condense_visualization(merges[m], X_1)
                 m = m + 1
         else:
             X_1 = X
-        gradient.append(np.sum(np.abs(X_1 - Xs[l + 1])))
-        X = Xs[l + 1]
+        gradient.append(np.sum(np.abs(X_1 - Xs[layer + 1])))
+        X = Xs[layer + 1]
     return np.array(gradient)
 
 
@@ -135,7 +135,6 @@ def get_zoom_visualization(
         If an integer is given, it fixes the seed.
         Defaults to the global `numpy` random number generator
     """
-
     unique = np.unique(
         NxTs[zoom_visualization_level], return_index=True, return_counts=True
     )
@@ -167,13 +166,13 @@ def compute_ideal_visualization_layer(gradient, Xs, min_cells=100):
     minimum = np.max(gradient)
     min_layer = 0
 
-    for l in range(1, len(Xs)):
-        if Xs[l].shape[0] < min_cells:
+    for layer in range(1, len(Xs)):
+        if Xs[layer].shape[0] < min_cells:
             break
-        if gradient[l] < minimum:
+        if gradient[layer] < minimum:
             # print("New minimum!")
-            minimum = gradient[l]
-            min_layer = l
+            minimum = gradient[layer]
+            min_layer = layer
     return min_layer
 
 
diff --git a/multiscale_phate/tree.py b/multiscale_phate/tree.py
index 12af8d4..0734888 100644
--- a/multiscale_phate/tree.py
+++ b/multiscale_phate/tree.py
@@ -71,7 +71,7 @@ def build_tree(
         clusters = np.arange(N)
 
         # Subsetting if required
-        if partitions != None:
+        if partitions is not None:
             partition_clusters = compress.subset_data(
                 data_pca, partitions, n_jobs=n_jobs, random_state=random_state
             )
@@ -191,8 +191,9 @@ def online_update_tree(
                 pca_centroid, data_pca_1, data_pca_2, partitions, nn=5, n_jobs=n_jobs
             )
             _logger.info(
-                "Points not mapped to partitions: "
-                + str(sum(partition_assignments == -1))
+                "Points not mapped to partitions: {}".format(
+                    sum(partition_assignments == -1)
+                )
             )
 
             # creating new joint paritions mapping
@@ -244,8 +245,8 @@ def online_update_tree(
 
                 NxTs_l = []
 
-                for l in range(len(NxTs)):
-                    NxTs_l.append(NxTs[l][clusters_idx])
+                for layer in range(len(NxTs)):
+                    NxTs_l.append(NxTs[layer][clusters_idx])
                 return NxTs_l, Xs, Ks, Merges, Ps, pca_centroid
 
         else:
diff --git a/multiscale_phate/visualize.py b/multiscale_phate/visualize.py
index cc53626..eafa23d 100644
--- a/multiscale_phate/visualize.py
+++ b/multiscale_phate/visualize.py
@@ -64,7 +64,6 @@ def build_visualization(Xs, NxTs, merges, gradient, min_cells, random_state=None
         Description of returned object.
 
     """
-
     min_layer = embed.compute_ideal_visualization_layer(gradient, Xs, min_cells)
     (hp_embedding, cluster_viz, sizes_viz,) = embed.get_clusters_sizes_2(
         np.array(NxTs[-35]),
@@ -78,10 +77,25 @@ def build_visualization(Xs, NxTs, merges, gradient, min_cells, random_state=None
 
 
 def map_clusters_to_tree(clusters, NxTs):
+    """Short summary.
+
+    Parameters
+    ----------
+    clusters : type
+        Description of parameter `clusters`.
+    NxTs : type
+        Description of parameter `NxTs`.
+
+    Returns
+    -------
+    type
+        Description of returned object.
+
+    """
     clusters_tree = []
 
-    for l in range(len(NxTs) - 1):
-        _, ind = np.unique(NxTs[l], return_index=True)
+    for layer in range(len(NxTs) - 1):
+        _, ind = np.unique(NxTs[layer], return_index=True)
         clusters_tree.extend(clusters[ind])
 
     return clusters_tree
@@ -114,8 +128,9 @@ def build_condensation_tree(data_pca, diff_op, NxT, merged_list, Ps):
             warnings.filterwarnings(
                 "ignore",
                 category=RuntimeWarning,
-                message="Pre-fit PHATE should not be used to transform a new data matrix. "
-                "Please fit PHATE to the new data by running 'fit' with the new data.",
+                message="Pre-fit PHATE should not be used to transform a new data "
+                "matrix. Please fit PHATE to the new data by running 'fit' with the "
+                "new data.",
             )
             tree_phate = diff_op.transform(data_pca)
 
@@ -134,21 +149,21 @@ def build_condensation_tree(data_pca, diff_op, NxT, merged_list, Ps):
     m = 0
 
     with _logger.task("tree"):
-        for l in range(0, len(Ps)):
-            if len(np.unique(NxT[l])) != len(np.unique(NxT[l + 1])):
+        for layer in range(0, len(Ps)):
+            if len(np.unique(NxT[layer])) != len(np.unique(NxT[layer + 1])):
                 tree_phate_1 = embed.condense_visualization(merged_list[m], tree_phate)
                 m = m + 1
-            if Ps[l].shape[0] != tree_phate_1.shape[0]:
+            if Ps[layer].shape[0] != tree_phate_1.shape[0]:
                 tree_phate_1 = embed.condense_visualization(
                     merged_list[m], tree_phate_1
                 )
                 m = m + 1
-            tree_phate = Ps[l] @ tree_phate_1
+            tree_phate = Ps[layer] @ tree_phate_1
             embeddings.append(
                 np.concatenate(
                     [
                         tree_phate,
-                        np.repeat(l + 1, tree_phate.shape[0]).reshape(
+                        np.repeat(layer + 1, tree_phate.shape[0]).reshape(
                             tree_phate.shape[0], 1
                         ),
                     ],

From de6ba0dc82914b1b69729848fbda3e19f6ceb553 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 19 Nov 2020 09:46:12 -0500
Subject: [PATCH 09/13] remove unused argument

---
 multiscale_phate/compress.py | 2 +-
 multiscale_phate/tree.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/multiscale_phate/compress.py b/multiscale_phate/compress.py
index 33fa921..8b2db97 100644
--- a/multiscale_phate/compress.py
+++ b/multiscale_phate/compress.py
@@ -9,7 +9,7 @@
 _logger = tasklogger.get_tasklogger("graphtools")
 
 
-def get_compression_features(N, features, n_pca, partitions, landmarks):
+def get_compression_features(N, features, n_pca, partitions):
     """Short summary. TODO
 
     Parameters
diff --git a/multiscale_phate/tree.py b/multiscale_phate/tree.py
index 0734888..4e7d2d1 100644
--- a/multiscale_phate/tree.py
+++ b/multiscale_phate/tree.py
@@ -62,7 +62,7 @@ def build_tree(
 
         # Computing compression features
         n_pca, partitions = compress.get_compression_features(
-            N, features, n_pca, partitions, landmarks
+            N, features, n_pca, partitions
         )
 
         with _logger.task("PCA"):

From da6cf434bd76d5235e9d378c9d56f2bb96943528 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 19 Nov 2020 09:59:33 -0500
Subject: [PATCH 10/13] test get_compression_features

---
 test/test.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/test/test.py b/test/test.py
index 27d48ab..f36524c 100644
--- a/test/test.py
+++ b/test/test.py
@@ -75,3 +75,54 @@ def test_random_seed():
     hp_embedding, _, _ = mp_op.fit_transform(X)
     hp_embedding2, _, _ = mp_op.fit_transform(X)
     assert not np.all(hp_embedding == hp_embedding2)
+
+
+@parameterized.parameterized(
+    [
+        # n_pca is None -> min(N, features)
+        (100, 50, None, 50),
+        (50, 100, None, 50),
+        # n_pca < min(N, features) -> n_pca
+        (100, 50, 25, 25),
+        # n_pca > 100 -> 100
+        (200, 150, 200, 100),
+        (200, 150, 125, 100),
+        # n_pca > min(N, features) -> min(N, features)
+        (100, 50, 75, 50),
+        (50, 100, 75, 50),
+        (100, 50, 125, 50),
+        (50, 100, 125, 50),
+    ]
+)
+def test_compression_features_pca(N, features, n_pca, expected):
+    partitions = None
+    output, _ = multiscale_phate.compress.get_compression_features(
+        N, features, n_pca, partitions
+    )
+    assert output == expected
+
+
+@parameterized.parameterized(
+    [
+        # TODO: is this desired behavior? seems pathological
+        # partitions is None -> None
+        (100, None, None),
+        # partitions > N -> None
+        (100, 101, None),
+        (200000, 200001, None),
+        # partitions > 50000 -> 50000
+        (110000, 50001, 50000),
+        # N > 100000 -> 20000
+        (110000, None, 20000),
+        (110000, 100, 20000),
+        (110000, 50000, 20000),
+        (110000, 110001, 20000),
+    ]
+)
+def test_compression_features_partitions(N, partitions, expected):
+    n_pca = None
+    features = 50
+    _, output = multiscale_phate.compress.get_compression_features(
+        N, features, n_pca, partitions
+    )
+    assert output == expected

From e2748c9ddd37a1deef8702314773b647dcef3cd0 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 19 Nov 2020 10:02:27 -0500
Subject: [PATCH 11/13] embeddings can be different sizes

---
 test/test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test.py b/test/test.py
index f36524c..ce7e461 100644
--- a/test/test.py
+++ b/test/test.py
@@ -74,7 +74,8 @@ def test_random_seed():
     mp_op = multiscale_phate.Multiscale_PHATE(partitions=100, landmarks=50)
     hp_embedding, _, _ = mp_op.fit_transform(X)
     hp_embedding2, _, _ = mp_op.fit_transform(X)
-    assert not np.all(hp_embedding == hp_embedding2)
+    if hp_embedding.shape[0] == hp_embedding2.shape[0]:
+        assert not np.all(hp_embedding == hp_embedding2)
 
 
 @parameterized.parameterized(

From 2646aba4fa92e0b337fea4bb55c6e3aba1bb771e Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 19 Nov 2020 10:03:05 -0500
Subject: [PATCH 12/13] remove duplicate line

---
 multiscale_phate/compress.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/multiscale_phate/compress.py b/multiscale_phate/compress.py
index 8b2db97..7080322 100644
--- a/multiscale_phate/compress.py
+++ b/multiscale_phate/compress.py
@@ -36,8 +36,6 @@ def get_compression_features(N, features, n_pca, partitions):
     if n_pca > 100:
         n_pca = 100
 
-        n_pca = 100
-
     # if N<100000:
     #     partitions=None
     if partitions is not None and partitions >= N:

From feb8b934995d72475e5d6fd4d1db09a6b5d12475 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 19 Nov 2020 10:45:45 -0500
Subject: [PATCH 13/13] print output if assert fails

---
 test/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test.py b/test/test.py
index ce7e461..7dc1ad6 100644
--- a/test/test.py
+++ b/test/test.py
@@ -100,7 +100,7 @@ def test_compression_features_pca(N, features, n_pca, expected):
     output, _ = multiscale_phate.compress.get_compression_features(
         N, features, n_pca, partitions
     )
-    assert output == expected
+    assert output == expected, (output, expected)
 
 
 @parameterized.parameterized(
@@ -126,4 +126,4 @@ def test_compression_features_partitions(N, partitions, expected):
     _, output = multiscale_phate.compress.get_compression_features(
         N, features, n_pca, partitions
     )
-    assert output == expected
+    assert output == expected, (output, expected)