KrishnaswamyLab · scottgigante · Nov 19, 2020 · Nov 19, 2020 · Nov 19, 2020 · Nov 19, 2020
diff --git a/README.md b/README.md
@@ -2,8 +2,8 @@ Multiscale PHATE
 ================
 
 [![Latest PyPi version](https://img.shields.io/pypi/v/multiscale_phate.svg)](https://pypi.org/project/multiscale_phate/)
-[![Travis CI Build](https://api.travis-ci.com/KrishnaswamyLab/Multiscale_PHATE.svg?branch=master)](https://travis-ci.com/KrishnaswamyLab/Multiscale_PHATE)
-[![Coverage Status](https://coveralls.io/repos/github/KrishnaswamyLab/Multiscale_PHATE/badge.svg?branch=master)](https://coveralls.io/github/KrishnaswamyLab/Multiscale_PHATE?branch=master)
+[![Travis CI Build](https://api.travis-ci.com/KrishnaswamyLab/Multiscale_PHATE.svg?branch=main)](https://travis-ci.com/KrishnaswamyLab/Multiscale_PHATE)
+[![Coverage Status](https://coveralls.io/repos/github/KrishnaswamyLab/Multiscale_PHATE/badge.svg?branch=main)](https://coveralls.io/github/KrishnaswamyLab/Multiscale_PHATE?branch=main)
 [![Twitter](https://img.shields.io/twitter/follow/KrishnaswamyLab.svg?style=social&label=Follow)](https://twitter.com/KrishnaswamyLab)
 [![GitHub stars](https://img.shields.io/github/stars/KrishnaswamyLab/Multiscale_PHATE.svg?style=social&label=Stars)](https://github.com/KrishnaswamyLab/Multiscale_PHATE/)
 [![Code style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
@@ -19,6 +19,12 @@ Installation
 
 Multiscale PHATE is available on `pip`. Install by running the following in a terminal:
 
+```
+pip install --user multiscale_phate
+```
+
+If you wish to install from source, you may do so as follows:
+
 ```
 pip install --user git+https://github.com/KrishnaswamyLab/Multiscale_PHATE
 ```

diff --git a/multiscale_phate/compress.py b/multiscale_phate/compress.py
@@ -6,41 +6,42 @@
 import scipy.spatial.distance
 
 
-def get_compression_features(N, features, n_pca, partitions, landmarks):
-    """Short summary.
+_logger = tasklogger.get_tasklogger("graphtools")
+
+
+def get_compression_features(N, features, n_pca, partitions):
+    """Short summary. TODO
 
     Parameters
     ----------
-    N : type
-        Description of parameter `N`.
-    features : type
-        Description of parameter `features`.
-    n_pca : type
-        Description of parameter `n_pca`.
-    partitions : type
-        Description of parameter `partitions`.
-    landmarks : type
-        Description of parameter `landmarks`.
+    N : type TODO
+        Description of parameter `N`. TODO
+    features : type TODO
+        Description of parameter `features`. TODO
+    n_pca : type TODO
+        Description of parameter `n_pca`. TODO
+    partitions : type TODO
+        Description of parameter `partitions`. TODO
+    landmarks : type TODO
+        Description of parameter `landmarks`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
-    if n_pca == None:
+    if n_pca is None:
         n_pca = min(N, features)
     if n_pca > 100:
         n_pca = 100
 
-        n_pca = 100
-
     # if N<100000:
     #     partitions=None
-    if partitions != None and partitions >= N:
+    if partitions is not None and partitions >= N:
         partitions = None
 
-    if partitions != None and partitions > 50000:
+    if partitions is not None and partitions > 50000:
         partitions = 50000
     elif N > 100000:
         partitions = 20000
@@ -49,25 +50,25 @@ def get_compression_features(N, features, n_pca, partitions, landmarks):
 
 
 def cluster_components(data_subset, num_cluster, size, random_state=None):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    data_subset : type
-        Description of parameter `data_subset`.
-    num_cluster : type
-        Description of parameter `num_cluster`.
-    size : type
-        Description of parameter `size`.
+    data_subset : type TODO
+        Description of parameter `data_subset`. TODO
+    num_cluster : type TODO
+        Description of parameter `num_cluster`. TODO
+    size : type TODO
+        Description of parameter `size`. TODO
     random_state : integer or numpy.RandomState, optional, default: None
         The generator used to initialize MiniBatchKMeans.
         If an integer is given, it fixes the seed.
         Defaults to the global `numpy` random number generator
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     if data_subset.shape[0] == 1:
@@ -91,32 +92,32 @@ def cluster_components(data_subset, num_cluster, size, random_state=None):
 
 
 def subset_data(data, desired_num_clusters, n_jobs, num_cluster=100, random_state=None):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    data : type
-        Description of parameter `data`.
-    desired_num_clusters : type
-        Description of parameter `desired_num_clusters`.
-    n_jobs : type
-        Description of parameter `n_jobs`.
-    num_cluster : type
-        Description of parameter `num_cluster`.
+    data : type TODO
+        Description of parameter `data`. TODO
+    desired_num_clusters : type TODO
+        Description of parameter `desired_num_clusters`. TODO
+    n_jobs : type TODO
+        Description of parameter `n_jobs`. TODO
+    num_cluster : type TODO
+        Description of parameter `num_cluster`. TODO
     random_state : integer or numpy.RandomState, optional, default: None
         The generator used to initialize MiniBatchKMeans.
         If an integer is given, it fixes the seed.
         Defaults to the global `numpy` random number generator
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     N = data.shape[0]
     size = int(N / desired_num_clusters)
-    with tasklogger.log_task("partitions"):
+    with _logger.task("partitions"):
 
         mbk = sklearn.cluster.MiniBatchKMeans(
             init="k-means++",
@@ -156,19 +157,19 @@ def subset_data(data, desired_num_clusters, n_jobs, num_cluster=100, random_stat
 
 
 def merge_clusters(diff_pot_unmerged, clusters):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    diff_pot_unmerged : type
-        Description of parameter `diff_pot_unmerged`.
-    clusters : type
-        Description of parameter `clusters`.
+    diff_pot_unmerged : type TODO
+        Description of parameter `diff_pot_unmerged`. TODO
+    clusters : type TODO
+        Description of parameter `clusters`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
     clusters_uni = np.unique(clusters)
@@ -185,6 +186,22 @@ def merge_clusters(diff_pot_unmerged, clusters):
 
 
 def get_distance_from_centroids(centroids, data, clusters):
+    """Short summary.
+
+    Parameters
+    ----------
+    centroids : type
+        Description of parameter `centroids`.
+    data : type
+        Description of parameter `data`.
+    clusters : type
+        Description of parameter `clusters`.
+
+    Returns
+    -------
+    type
+        Description of returned object.
+    """
     distance = np.zeros(centroids.shape[0])
 
     for c in range(centroids.shape[0]):
@@ -202,30 +219,30 @@ def get_distance_from_centroids(centroids, data, clusters):
 
 
 def map_update_data(centroids, data, new_data, partition_clusters, nn=5, n_jobs=10):
-    """Short summary.
+    """Short summary. TODO
 
     Parameters
     ----------
-    centroids : type
-        Description of parameter `centroids`.
-    data : type
-        Description of parameter `data`.
-    new_data : type
-        Description of parameter `new_data`.
-    partition_clusters : type
-        Description of parameter `partition_clusters`.
-    nn : type
-        Description of parameter `nn`.
-    n_jobs : type
-        Description of parameter `n_jobs`.
+    centroids : type TODO
+        Description of parameter `centroids`. TODO
+    data : type TODO
+        Description of parameter `data`. TODO
+    new_data : type TODO
+        Description of parameter `new_data`. TODO
+    partition_clusters : type TODO
+        Description of parameter `partition_clusters`. TODO
+    nn : type TODO
+        Description of parameter `nn`. TODO
+    n_jobs : type TODO
+        Description of parameter `n_jobs`. TODO
 
     Returns
     -------
-    type
-        Description of returned object.
+    type TODO
+        Description of returned object. TODO
 
     """
-    with tasklogger.log_task("map to computed partitions"):
+    with _logger.task("map to computed partitions"):
         # getting max distance to each partition centroid
         distance_merged = get_distance_from_centroids(
             centroids, data, partition_clusters
@@ -246,7 +263,7 @@ def map_update_data(centroids, data, new_data, partition_clusters, nn=5, n_jobs=
         for r in range(len(subset_partition_assignment)):
             c = 0
             while c < nn:
-                if parition_assignment_bool[r, c] == True:
+                if parition_assignment_bool[r, c] is True:
                     subset_partition_assignment[r] = neighbor_idx[r, c]
                     c = nn + 1
                     break