From 339b27343a4ee00a4cb13b0b1bd712c093d2f309 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 18 Nov 2020 19:05:22 -0500 Subject: [PATCH 01/13] switch install references to pypi --- README.md | 10 ++++++++-- tutorial/10X_pbmc.ipynb | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a1140e8..53b03b1 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ Multiscale PHATE ================ [![Latest PyPi version](https://img.shields.io/pypi/v/multiscale_phate.svg)](https://pypi.org/project/multiscale_phate/) -[![Travis CI Build](https://api.travis-ci.com/KrishnaswamyLab/Multiscale_PHATE.svg?branch=master)](https://travis-ci.com/KrishnaswamyLab/Multiscale_PHATE) -[![Coverage Status](https://coveralls.io/repos/github/KrishnaswamyLab/Multiscale_PHATE/badge.svg?branch=master)](https://coveralls.io/github/KrishnaswamyLab/Multiscale_PHATE?branch=master) +[![Travis CI Build](https://api.travis-ci.com/KrishnaswamyLab/Multiscale_PHATE.svg?branch=main)](https://travis-ci.com/KrishnaswamyLab/Multiscale_PHATE) +[![Coverage Status](https://coveralls.io/repos/github/KrishnaswamyLab/Multiscale_PHATE/badge.svg?branch=main)](https://coveralls.io/github/KrishnaswamyLab/Multiscale_PHATE?branch=main) [![Twitter](https://img.shields.io/twitter/follow/KrishnaswamyLab.svg?style=social&label=Follow)](https://twitter.com/KrishnaswamyLab) [![GitHub stars](https://img.shields.io/github/stars/KrishnaswamyLab/Multiscale_PHATE.svg?style=social&label=Stars)](https://github.com/KrishnaswamyLab/Multiscale_PHATE/) [![Code style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) @@ -19,6 +19,12 @@ Installation Multiscale PHATE is available on `pip`. Install by running the following in a terminal: +``` +pip install --user multiscale_phate +``` + +If you wish to install from source, you may do so as follows: + ``` pip install --user git+https://github.com/KrishnaswamyLab/Multiscale_PHATE ``` diff --git a/tutorial/10X_pbmc.ipynb b/tutorial/10X_pbmc.ipynb index 236d892..2c897b1 100644 --- a/tutorial/10X_pbmc.ipynb +++ b/tutorial/10X_pbmc.ipynb @@ -41,7 +41,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install --user -q git+https://github.com/KrishnaswamyLab/Multiscale_PHATE" + "!pip install --user -q multiscale_phate" ] }, { From 918e8e84e711e3144dab0068753c3191d4b9d91a Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 18 Nov 2020 19:06:19 -0500 Subject: [PATCH 02/13] bump version --- multiscale_phate/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multiscale_phate/version.py b/multiscale_phate/version.py index 76fe15d..8f02035 100644 --- a/multiscale_phate/version.py +++ b/multiscale_phate/version.py @@ -1 +1 @@ -__version__ = "0.0" +__version__ = "0.1.0a0" From dec7b2aa2ee1a00424b9dc5a388a9ae066520a97 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 18 Nov 2020 19:10:47 -0500 Subject: [PATCH 03/13] fix docs --- multiscale_phate/multiscale_phate.py | 57 +++++++++------------------- 1 file changed, 18 insertions(+), 39 deletions(-) diff --git a/multiscale_phate/multiscale_phate.py b/multiscale_phate/multiscale_phate.py index df0765d..15823e7 100644 --- a/multiscale_phate/multiscale_phate.py +++ b/multiscale_phate/multiscale_phate.py @@ -2,7 +2,9 @@ class Multiscale_PHATE(object): - """Multscale PHATE operator which performs dimensionality reduction and clustering across granularities. + """Multscale PHATE operator. + + Performs dimensionality reduction and clustering across granularities. Parameters ---------- @@ -39,6 +41,13 @@ class Multiscale_PHATE(object): used at all, which is useful for debugging. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used + random_state : integer or numpy.RandomState, optional, default: None + The generator used to initialize SMACOF (metric, nonmetric) MDS + If an integer is given, it fixes the seed + Defaults to the global `numpy` random number generator + + Attributes + ---------- NxTs : list of lists Cluster assignment for every point at all levels of Diffusion Condensation tree @@ -70,37 +79,6 @@ class Multiscale_PHATE(object): levels : list List of salient resolutions for downstream analysis, computed via gradient analysis - random_state : integer or numpy.RandomState, optional, default: None - The generator used to initialize SMACOF (metric, nonmetric) MDS - If an integer is given, it fixes the seed - Defaults to the global `numpy` random number generator - - Attributes - ---------- - scale - landmarks - partitions - granularity - n_pca - decay - gamma - knn - n_jobs - NxTs - Xs - Ks - merges - Ps - diff_op - data_pca - pca_op - partition_clusters - dp_pca - epsilon - merge_threshold - gradient - levels - """ def __init__( @@ -144,7 +122,7 @@ def __init__( super().__init__() def fit(self, X): - """Builds Diffusion Condensation tree and computes ideal resolutions. + """Build Diffusion Condensation tree and computes ideal resolutions. Parameters ---------- @@ -201,6 +179,7 @@ def transform( repulse=False, ): """Short summary. + Parameters ---------- visualization_level : int, default = levels[-2] @@ -216,6 +195,7 @@ def transform( Cluster in 'coarse_cluster_level' to zoom in on. repulse : bool, default = False Allows for repulsion between points in multiscale embedding. + Returns ------- embedding : array, shape=[number of points in visualization_level, 2] @@ -228,7 +208,6 @@ def transform( Number of points aggregated into each point as visualized at the granularity of visualization_level """ - if visualization_level is None: visualization_level = self.levels[2] if cluster_level is None: @@ -255,7 +234,7 @@ def transform( ) def build_tree(self): - """Computes and returns a tree from the Diffusion Condensation process. + """Compute and returns a tree from the Diffusion Condensation process. Returns ------- @@ -268,8 +247,9 @@ def build_tree(self): ) def fit_transform(self, X): - """Builds Diffusion Condensation tree, identifies ideal resolutions and returns - Multiscale PHATE embedding and clusters. + """Build Diffusion Condensation tree and identify ideal resolutions. + + Returns Multiscale PHATE embedding and clusters. Parameters ---------- @@ -294,7 +274,7 @@ def fit_transform(self, X): return self.transform() def get_tree_clusters(self, cluster_level): - """Colors Diffusion Condensation tree by a granularity of clusters. + """Color Diffusion Condensation tree by a granularity of clusters. Parameters ---------- @@ -306,6 +286,5 @@ def get_tree_clusters(self, cluster_level): clusters_tree : list, shape=[n_points_aggregated] Cluster labels of each point in computed diffusion condensation tree as dictated by a granularity of the tree - """ return visualize.map_clusters_to_tree(self.NxTs[cluster_level], self.NxTs) From e31f61fbad0250395bb72fdf096f7d6d22f9d5fd Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 18 Nov 2020 19:13:00 -0500 Subject: [PATCH 04/13] document hash --- multiscale_phate/utils.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/multiscale_phate/utils.py b/multiscale_phate/utils.py index 63b013e..82c6125 100644 --- a/multiscale_phate/utils.py +++ b/multiscale_phate/utils.py @@ -2,17 +2,16 @@ def hash_object(X): - """Short summary. + """Compute a unique hash of any Python object. Parameters ---------- - X : type - Description of parameter `X`. + X : object + Object for which to compute unique hash Returns ------- - type - Description of returned object. - + hash : str + Unique hash based on pickle dump of X. """ return hash(pickle.dumps(X)) From ca2ce05143531c9d1f3bb8b1434f01ed8caebc87 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 18 Nov 2020 19:14:51 -0500 Subject: [PATCH 05/13] add TODOs --- multiscale_phate/compress.py | 110 +++++++++++++++++----------------- multiscale_phate/condense.py | 72 +++++++++++----------- multiscale_phate/diffuse.py | 52 ++++++++-------- multiscale_phate/embed.py | 105 ++++++++++++++++---------------- multiscale_phate/tree.py | 108 ++++++++++++++++----------------- multiscale_phate/visualize.py | 34 +++++------ 6 files changed, 238 insertions(+), 243 deletions(-) diff --git a/multiscale_phate/compress.py b/multiscale_phate/compress.py index 8f4d718..0d2800a 100644 --- a/multiscale_phate/compress.py +++ b/multiscale_phate/compress.py @@ -7,25 +7,25 @@ def get_compression_features(N, features, n_pca, partitions, landmarks): - """Short summary. + """Short summary. TODO Parameters ---------- - N : type - Description of parameter `N`. - features : type - Description of parameter `features`. - n_pca : type - Description of parameter `n_pca`. - partitions : type - Description of parameter `partitions`. - landmarks : type - Description of parameter `landmarks`. + N : type TODO + Description of parameter `N`. TODO + features : type TODO + Description of parameter `features`. TODO + n_pca : type TODO + Description of parameter `n_pca`. TODO + partitions : type TODO + Description of parameter `partitions`. TODO + landmarks : type TODO + Description of parameter `landmarks`. TODO Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ if n_pca == None: @@ -49,16 +49,16 @@ def get_compression_features(N, features, n_pca, partitions, landmarks): def cluster_components(data_subset, num_cluster, size, random_state=None): - """Short summary. + """Short summary. TODO Parameters ---------- - data_subset : type - Description of parameter `data_subset`. - num_cluster : type - Description of parameter `num_cluster`. - size : type - Description of parameter `size`. + data_subset : type TODO + Description of parameter `data_subset`. TODO + num_cluster : type TODO + Description of parameter `num_cluster`. TODO + size : type TODO + Description of parameter `size`. TODO random_state : integer or numpy.RandomState, optional, default: None The generator used to initialize MiniBatchKMeans. If an integer is given, it fixes the seed. @@ -66,8 +66,8 @@ def cluster_components(data_subset, num_cluster, size, random_state=None): Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ if data_subset.shape[0] == 1: @@ -91,18 +91,18 @@ def cluster_components(data_subset, num_cluster, size, random_state=None): def subset_data(data, desired_num_clusters, n_jobs, num_cluster=100, random_state=None): - """Short summary. + """Short summary. TODO Parameters ---------- - data : type - Description of parameter `data`. - desired_num_clusters : type - Description of parameter `desired_num_clusters`. - n_jobs : type - Description of parameter `n_jobs`. - num_cluster : type - Description of parameter `num_cluster`. + data : type TODO + Description of parameter `data`. TODO + desired_num_clusters : type TODO + Description of parameter `desired_num_clusters`. TODO + n_jobs : type TODO + Description of parameter `n_jobs`. TODO + num_cluster : type TODO + Description of parameter `num_cluster`. TODO random_state : integer or numpy.RandomState, optional, default: None The generator used to initialize MiniBatchKMeans. If an integer is given, it fixes the seed. @@ -110,8 +110,8 @@ def subset_data(data, desired_num_clusters, n_jobs, num_cluster=100, random_stat Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ N = data.shape[0] @@ -156,19 +156,19 @@ def subset_data(data, desired_num_clusters, n_jobs, num_cluster=100, random_stat def merge_clusters(diff_pot_unmerged, clusters): - """Short summary. + """Short summary. TODO Parameters ---------- - diff_pot_unmerged : type - Description of parameter `diff_pot_unmerged`. - clusters : type - Description of parameter `clusters`. + diff_pot_unmerged : type TODO + Description of parameter `diff_pot_unmerged`. TODO + clusters : type TODO + Description of parameter `clusters`. TODO Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ clusters_uni = np.unique(clusters) @@ -202,27 +202,27 @@ def get_distance_from_centroids(centroids, data, clusters): def map_update_data(centroids, data, new_data, partition_clusters, nn=5, n_jobs=10): - """Short summary. + """Short summary. TODO Parameters ---------- - centroids : type - Description of parameter `centroids`. - data : type - Description of parameter `data`. - new_data : type - Description of parameter `new_data`. - partition_clusters : type - Description of parameter `partition_clusters`. - nn : type - Description of parameter `nn`. - n_jobs : type - Description of parameter `n_jobs`. + centroids : type TODO + Description of parameter `centroids`. TODO + data : type TODO + Description of parameter `data`. TODO + new_data : type TODO + Description of parameter `new_data`. TODO + partition_clusters : type TODO + Description of parameter `partition_clusters`. TODO + nn : type TODO + Description of parameter `nn`. TODO + n_jobs : type TODO + Description of parameter `n_jobs`. TODO Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ with tasklogger.log_task("map to computed partitions"): diff --git a/multiscale_phate/condense.py b/multiscale_phate/condense.py index 97fc560..e62b901 100644 --- a/multiscale_phate/condense.py +++ b/multiscale_phate/condense.py @@ -9,21 +9,21 @@ def comp(node, neigh, visited): - """Short summary. + """Short summary. TODO Parameters ---------- - node : type - Description of parameter `node`. - neigh : type - Description of parameter `neigh`. - visited : type - Description of parameter `visited`. + node : type TODO + Description of parameter `node`. TODO + neigh : type TODO + Description of parameter `neigh`. TODO + visited : type TODO + Description of parameter `visited`. TODO Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ vis = visited.add @@ -37,17 +37,17 @@ def comp(node, neigh, visited): def merge_common(lists): - """Short summary. + """Short summary. TODO Parameters ---------- - lists : type - Description of parameter `lists`. + lists : type TODO + Description of parameter `lists`. TODO Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ neigh = collections.defaultdict(set) @@ -62,19 +62,19 @@ def merge_common(lists): def compute_condensation_param(X, granularity): - """Short summary. + """Short summary. TODO Parameters ---------- - X : type - Description of parameter `X`. - granularity : type - Description of parameter `granularity`. + X : type TODO + Description of parameter `X`. TODO + granularity : type TODO + Description of parameter `granularity`. TODO Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ epsilon = granularity * (0.1 * np.mean(np.std(X))) / (X.shape[0] ** (-1 / 5)) @@ -86,22 +86,22 @@ def compute_condensation_param(X, granularity): def condense(X, clusters, scale, epsilon, merge_threshold, n_jobs, random_state=None): - """Short summary. + """Short summary. TODO Parameters ---------- - X : type - Description of parameter `X`. - clusters : type - Description of parameter `clusters`. - scale : type - Description of parameter `scale`. - epsilon : type - Description of parameter `epsilon`. - merge_threshold : type - Description of parameter `merge_threshold`. - n_jobs : type - Description of parameter `n_jobs`. + X : type TODO + Description of parameter `X`. TODO + clusters : type TODO + Description of parameter `clusters`. TODO + scale : type TODO + Description of parameter `scale`. TODO + epsilon : type TODO + Description of parameter `epsilon`. TODO + merge_threshold : type TODO + Description of parameter `merge_threshold`. TODO + n_jobs : type TODO + Description of parameter `n_jobs`. TODO random_state : integer or numpy.RandomState, optional, default: None The generator used to initialize graphtools. If an integer is given, it fixes the seed. @@ -109,8 +109,8 @@ def condense(X, clusters, scale, epsilon, merge_threshold, n_jobs, random_state= Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ NxT = [] diff --git a/multiscale_phate/diffuse.py b/multiscale_phate/diffuse.py index 7241cad..4b7bbac 100644 --- a/multiscale_phate/diffuse.py +++ b/multiscale_phate/diffuse.py @@ -9,24 +9,24 @@ def compute_diffusion_potential( data, N, decay, gamma, knn, landmarks=2000, n_jobs=10, random_state=None ): - """Short summary. + """Short summary. TODO Parameters ---------- - data : type - Description of parameter `data`. - N : type - Description of parameter `N`. - decay : type - Description of parameter `decay`. - gamma : type - Description of parameter `gamma`. - knn : type - Description of parameter `knn`. - landmarks : type - Description of parameter `landmarks`. - n_jobs : type - Description of parameter `n_jobs`. + data : type TODO + Description of parameter `data`. TODO + N : type TODO + Description of parameter `N`. TODO + decay : type TODO + Description of parameter `decay`. TODO + gamma : type TODO + Description of parameter `gamma`. TODO + knn : type TODO + Description of parameter `knn`. TODO + landmarks : type TODO + Description of parameter `landmarks`. TODO + n_jobs : type TODO + Description of parameter `n_jobs`. TODO random_state : integer or numpy.RandomState, optional, default: None The generator used to initialize PHATE and PCA. If an integer is given, it fixes the seed. @@ -34,8 +34,8 @@ def compute_diffusion_potential( Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ with tasklogger.log_task("diffusion potential"): @@ -68,21 +68,21 @@ def compute_diffusion_potential( def online_update_diffusion_potential(unmapped_data, diff_op, dp_pca): - """Short summary. + """Short summary. TODO Parameters ---------- - unmapped_data : type - Description of parameter `unmapped_data`. - diff_op : type - Description of parameter `diff_op`. - dp_pca : type - Description of parameter `dp_pca`. + unmapped_data : type TODO + Description of parameter `unmapped_data`. TODO + diff_op : type TODO + Description of parameter `diff_op`. TODO + dp_pca : type TODO + Description of parameter `dp_pca`. TODO Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ with tasklogger.log_task("extended diffusion potential"): diff --git a/multiscale_phate/embed.py b/multiscale_phate/embed.py index 07fec97..8099be7 100644 --- a/multiscale_phate/embed.py +++ b/multiscale_phate/embed.py @@ -4,17 +4,17 @@ def repulsion(temp): - """Short summary. + """Short summary. TODO Parameters ---------- - temp : type - Description of parameter `temp`. + temp : type TODO + Description of parameter `temp`. TODO Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ for r in range(temp.shape[0]): @@ -29,19 +29,19 @@ def repulsion(temp): def condense_visualization(merge_pairs, phate): - """Short summary. + """Short summary. TODO Parameters ---------- - merge_pairs : type - Description of parameter `merge_pairs`. - phate : type - Description of parameter `phate`. + merge_pairs : type TODO + Description of parameter `merge_pairs`. TODO + phate : type TODO + Description of parameter `phate`. TODO Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ to_delete = [] @@ -54,19 +54,19 @@ def condense_visualization(merge_pairs, phate): def compute_gradient(Xs, merges): - """Short summary. + """Short summary. TODO Parameters ---------- - Xs : type - Description of parameter `Xs`. - merges : type - Description of parameter `merges`. + Xs : type TODO + Description of parameter `Xs`. TODO + merges : type TODO + Description of parameter `merges`. TODO Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ tasklogger.log_info("Computing gradient...") @@ -89,19 +89,17 @@ def compute_gradient(Xs, merges): def get_levels(grad): - """Short summary. + """Short summary. TODO Parameters ---------- - grad : type - Description of parameter `Xs`. + grad : type TODO + Description of parameter `Xs`. TODO Returns ------- - type - Description of returned object. - - + type TODO + Description of returned object. TODO """ tasklogger.log_info("Identifying salient levels of resolution...") minimum = np.max(grad) @@ -125,11 +123,11 @@ def get_zoom_visualization( n_jobs, random_state=None, ): - """Short summary + """Short summary TODO Parameters ---------- - + TODO random_state : integer or numpy.RandomState, optional, default: None The generator used to initialize MDS. If an integer is given, it fixes the seed. @@ -148,22 +146,21 @@ def get_zoom_visualization( def compute_ideal_visualization_layer(gradient, Xs, min_cells=100): - """Short summary. + """Short summary. TODO Parameters ---------- - gradient : type - Description of parameter `gradient`. - Xs : type - Description of parameter `Xs`. - min_cells : type - Description of parameter `min_cells`. + gradient : type TODO + Description of parameter `gradient`. TODO + Xs : type TODO + Description of parameter `Xs`. TODO + min_cells : type TODO + Description of parameter `min_cells`. TODO Returns ------- - type - Description of returned object. - + type TODO + Description of returned object. TODO """ minimum = np.max(gradient) min_layer = 0 @@ -181,23 +178,22 @@ def compute_ideal_visualization_layer(gradient, Xs, min_cells=100): def get_clusters_sizes_2( clusters_full, layer, NxT, X, repulse=False, n_jobs=10, random_state=None ): - """Short summary. + """Short summary. TODO - Parameters Parameters ---------- - clusters_full : type - Description of parameter `clusters_full`. - layer : type - Description of parameter `layer`. - NxT : type - Description of parameter `NxT`. - X : type - Description of parameter `X`. - repulse : type - Description of parameter `repulse`. - n_jobs : type - Description of parameter `n_jobs`. + clusters_full : type TODO + Description of parameter `clusters_full`. TODO + layer : type TODO + Description of parameter `layer`. TODO + NxT : type TODO + Description of parameter `NxT`. TODO + X : type TODO + Description of parameter `X`. TODO + repulse : type TODO + Description of parameter `repulse`. TODO + n_jobs : type TODO + Description of parameter `n_jobs`. TODO random_state : integer or numpy.RandomState, optional, default: None The generator used to initialize MDS. If an integer is given, it fixes the seed. @@ -205,9 +201,8 @@ def get_clusters_sizes_2( Returns ------- - type - Description of returned object. - + type TODO + Description of returned object. TODO """ unique = np.unique(NxT[layer], return_index=True, return_counts=True) diff --git a/multiscale_phate/tree.py b/multiscale_phate/tree.py index 807a454..b80cf77 100644 --- a/multiscale_phate/tree.py +++ b/multiscale_phate/tree.py @@ -17,30 +17,30 @@ def build_tree( n_jobs=10, random_state=None, ): - """Short summary. + """Short summary. TODO Parameters ---------- - data_input : type - Description of parameter `data_input`. - scale : type - Description of parameter `scale`. - landmarks : type - Description of parameter `landmarks`. - partitions : type - Description of parameter `partitions`. - granularity : type - Description of parameter `granularity`. - n_pca : type - Description of parameter `n_pca`. - decay : type - Description of parameter `decay`. - gamma : type - Description of parameter `gamma`. - knn : type - Description of parameter `knn`. - n_jobs : type - Description of parameter `n_jobs`. + data_input : type TODO + Description of parameter `data_input`. TODO + scale : type TODO + Description of parameter `scale`. TODO + landmarks : type TODO + Description of parameter `landmarks`. TODO + partitions : type TODO + Description of parameter `partitions`. TODO + granularity : type TODO + Description of parameter `granularity`. TODO + n_pca : type TODO + Description of parameter `n_pca`. TODO + decay : type TODO + Description of parameter `decay`. TODO + gamma : type TODO + Description of parameter `gamma`. TODO + knn : type TODO + Description of parameter `knn`. TODO + n_jobs : type TODO + Description of parameter `n_jobs`. TODO random_state : integer or numpy.RandomState, optional, default: None The random number generator. If an integer is given, it fixes the seed. @@ -48,8 +48,8 @@ def build_tree( Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ with tasklogger.log_task("Multiscale PHATE tree"): @@ -124,38 +124,38 @@ def online_update_tree( n_jobs=10, random_state=None, ): - """Short summary. + """Short summary. TODO Parameters ---------- - data_1 : type - Description of parameter `data_1`. - data_2 : type - Description of parameter `data_2`. - pca_centroid : type - Description of parameter `pca_centroid`. - pca_op : type - Description of parameter `pca_op`. - partitions : type - Description of parameter `partitions`. - diff_operator : type - Description of parameter `diff_operator`. - diff_pca_op : type - Description of parameter `diff_pca_op`. - Xs : type - Description of parameter `Xs`. - NxTs : type - Description of parameter `NxTs`. - Ks : type - Description of parameter `Ks`. - Merges : type - Description of parameter `Merges`. - Ps : type - Description of parameter `Ps`. - scale : type - Description of parameter `scale`. - n_jobs : type - Description of parameter `n_jobs`. + data_1 : type TODO + Description of parameter `data_1`. TODO + data_2 : type TODO + Description of parameter `data_2`. TODO + pca_centroid : type TODO + Description of parameter `pca_centroid`. TODO + pca_op : type TODO + Description of parameter `pca_op`. TODO + partitions : type TODO + Description of parameter `partitions`. TODO + diff_operator : type TODO + Description of parameter `diff_operator`. TODO + diff_pca_op : type TODO + Description of parameter `diff_pca_op`. TODO + Xs : type TODO + Description of parameter `Xs`. TODO + NxTs : type TODO + Description of parameter `NxTs`. TODO + Ks : type TODO + Description of parameter `Ks`. TODO + Merges : type TODO + Description of parameter `Merges`. TODO + Ps : type TODO + Description of parameter `Ps`. TODO + scale : type TODO + Description of parameter `scale`. TODO + n_jobs : type TODO + Description of parameter `n_jobs`. TODO random_state : integer or numpy.RandomState, optional, default: None The random number generator. If an integer is given, it fixes the seed. @@ -163,8 +163,8 @@ def online_update_tree( Returns ------- - type - Description of returned object. + type TODO + Description of returned object. TODO """ with tasklogger.log_task("Multiscale PHATE tree mapping"): diff --git a/multiscale_phate/visualize.py b/multiscale_phate/visualize.py index e82a7b9..54650b0 100644 --- a/multiscale_phate/visualize.py +++ b/multiscale_phate/visualize.py @@ -8,15 +8,15 @@ def get_visualization( Xs, NxTs, cluster_level, visualization_level, repulse, random_state=None ): - """Short summary. + """Short summary. TODO Parameters ---------- - Xs : type + Xs : type TODO Description of parameter `Xs`. - NxTs : type + NxTs : type TODO Description of parameter `NxTs`. - merges : type + merges : type TODO Description of parameter `merges`. random_state : integer or numpy.RandomState, optional, default: None The generator used to initialize MDS. @@ -25,7 +25,7 @@ def get_visualization( Returns ------- - type + type TODO Description of returned object. """ @@ -41,15 +41,15 @@ def get_visualization( def build_visualization(Xs, NxTs, merges, gradient, min_cells, random_state=None): - """Short summary. + """Short summary. TODO Parameters ---------- - Xs : type + Xs : type TODO Description of parameter `Xs`. - NxTs : type + NxTs : type TODO Description of parameter `NxTs`. - merges : type + merges : type TODO Description of parameter `merges`. random_state : integer or numpy.RandomState, optional, default: None The generator used to initialize MDS. @@ -58,7 +58,7 @@ def build_visualization(Xs, NxTs, merges, gradient, min_cells, random_state=None Returns ------- - type + type TODO Description of returned object. """ @@ -86,24 +86,24 @@ def map_clusters_to_tree(clusters, NxTs): def build_condensation_tree(data_pca, diff_op, NxT, merged_list, Ps): - """Short summary. + """Short summary. TODO Parameters ---------- - data_pca : type + data_pca : type TODO Description of parameter `data_pca`. - diff_op : type + diff_op : type TODO Description of parameter `diff_op`. - NxT : type + NxT : type TODO Description of parameter `NxT`. - merged_list : type + merged_list : type TODO Description of parameter `merged_list`. - Ps : type + Ps : type TODO Description of parameter `Ps`. Returns ------- - type + type TODO Description of returned object. """ From 817015243a3d1bf0aff2e898ae5f95714e94e593 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 18 Nov 2020 19:21:32 -0500 Subject: [PATCH 06/13] add verbosity --- multiscale_phate/compress.py | 7 +++++-- multiscale_phate/condense.py | 8 +++++--- multiscale_phate/diffuse.py | 14 +++++++++----- multiscale_phate/embed.py | 6 ++++-- multiscale_phate/multiscale_phate.py | 13 +++++++++++++ multiscale_phate/tree.py | 29 ++++++++++++++++++++-------- multiscale_phate/visualize.py | 6 ++++-- 7 files changed, 61 insertions(+), 22 deletions(-) diff --git a/multiscale_phate/compress.py b/multiscale_phate/compress.py index 0d2800a..6414ac2 100644 --- a/multiscale_phate/compress.py +++ b/multiscale_phate/compress.py @@ -6,6 +6,9 @@ import scipy.spatial.distance +_logger = tasklogger.get_tasklogger("graphtools") + + def get_compression_features(N, features, n_pca, partitions, landmarks): """Short summary. TODO @@ -116,7 +119,7 @@ def subset_data(data, desired_num_clusters, n_jobs, num_cluster=100, random_stat """ N = data.shape[0] size = int(N / desired_num_clusters) - with tasklogger.log_task("partitions"): + with _logger.task("partitions"): mbk = sklearn.cluster.MiniBatchKMeans( init="k-means++", @@ -225,7 +228,7 @@ def map_update_data(centroids, data, new_data, partition_clusters, nn=5, n_jobs= Description of returned object. TODO """ - with tasklogger.log_task("map to computed partitions"): + with _logger.task("map to computed partitions"): # getting max distance to each partition centroid distance_merged = get_distance_from_centroids( centroids, data, partition_clusters diff --git a/multiscale_phate/condense.py b/multiscale_phate/condense.py index e62b901..1079463 100644 --- a/multiscale_phate/condense.py +++ b/multiscale_phate/condense.py @@ -7,6 +7,8 @@ import scipy.spatial.distance import sklearn.metrics.pairwise +_logger = tasklogger.get_tasklogger("graphtools") + def comp(node, neigh, visited): """Short summary. TODO @@ -80,8 +82,8 @@ def compute_condensation_param(X, granularity): epsilon = granularity * (0.1 * np.mean(np.std(X))) / (X.shape[0] ** (-1 / 5)) D = scipy.spatial.distance.pdist(X, metric="euclidean") merge_threshold = np.percentile(D, 0.001) + 0.001 - tasklogger.log_info("Setting epsilon to " + str(round(epsilon, 4))) - tasklogger.log_info("Setting merge threshold to " + str(round(merge_threshold, 4))) + _logger.info("Setting epsilon to " + str(round(epsilon, 4))) + _logger.info("Setting merge threshold to " + str(round(merge_threshold, 4))) return epsilon, merge_threshold @@ -130,7 +132,7 @@ def condense(X, clusters, scale, epsilon, merge_threshold, n_jobs, random_state= X_list.append(X_1) P_list = [] merged = [] - with tasklogger.log_task("condensation"): + with _logger.task("condensation"): while X_1.shape[0] > 1: D = sklearn.metrics.pairwise.pairwise_distances( X_1, metric="euclidean", n_jobs=n_jobs diff --git a/multiscale_phate/diffuse.py b/multiscale_phate/diffuse.py index 4b7bbac..f8c4c48 100644 --- a/multiscale_phate/diffuse.py +++ b/multiscale_phate/diffuse.py @@ -5,9 +5,11 @@ from . import compress +_logger = tasklogger.get_tasklogger("graphtools") + def compute_diffusion_potential( - data, N, decay, gamma, knn, landmarks=2000, n_jobs=10, random_state=None + data, N, decay, gamma, knn, landmarks=2000, n_jobs=10, verbose=0, random_state=None ): """Short summary. TODO @@ -27,6 +29,8 @@ def compute_diffusion_potential( Description of parameter `landmarks`. TODO n_jobs : type TODO Description of parameter `n_jobs`. TODO + verbose : `int`, optional (default: 0) + If `> 0`, print status messages random_state : integer or numpy.RandomState, optional, default: None The generator used to initialize PHATE and PCA. If an integer is given, it fixes the seed. @@ -38,19 +42,19 @@ def compute_diffusion_potential( Description of returned object. TODO """ - with tasklogger.log_task("diffusion potential"): + with _logger.task("diffusion potential"): if landmarks != None and landmarks > data.shape[0]: landmarks = None diff_op = phate.PHATE( - verbose=False, n_landmark=landmarks, decay=decay, gamma=gamma, n_pca=None, knn=knn, n_jobs=n_jobs, + verbose=verbose, random_state=random_state, ) diff_op.fit(data) @@ -85,8 +89,8 @@ def online_update_diffusion_potential(unmapped_data, diff_op, dp_pca): Description of returned object. TODO """ - with tasklogger.log_task("extended diffusion potential"): - with tasklogger.log_task("extended kernel"): + with _logger.task("extended diffusion potential"): + with _logger.task("extended kernel"): # Extending kernel to new data transitions = diff_op.graph.extend_to_data(unmapped_data) diff --git a/multiscale_phate/embed.py b/multiscale_phate/embed.py index 8099be7..b318177 100644 --- a/multiscale_phate/embed.py +++ b/multiscale_phate/embed.py @@ -2,6 +2,8 @@ import phate import tasklogger +_logger = tasklogger.get_tasklogger("graphtools") + def repulsion(temp): """Short summary. TODO @@ -69,7 +71,7 @@ def compute_gradient(Xs, merges): Description of returned object. TODO """ - tasklogger.log_info("Computing gradient...") + _logger.info("Computing gradient...") gradient = [] m = 0 X = Xs[0] @@ -101,7 +103,7 @@ def get_levels(grad): type TODO Description of returned object. TODO """ - tasklogger.log_info("Identifying salient levels of resolution...") + _logger.info("Identifying salient levels of resolution...") minimum = np.max(grad) levels = [] levels.append(0) diff --git a/multiscale_phate/multiscale_phate.py b/multiscale_phate/multiscale_phate.py index 15823e7..53d08b0 100644 --- a/multiscale_phate/multiscale_phate.py +++ b/multiscale_phate/multiscale_phate.py @@ -1,5 +1,9 @@ +import tasklogger + from . import tree, embed, utils, visualize +_logger = tasklogger.get_tasklogger("graphtools") + class Multiscale_PHATE(object): """Multscale PHATE operator. @@ -41,6 +45,8 @@ class Multiscale_PHATE(object): used at all, which is useful for debugging. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used + verbose : `int` or `boolean`, optional (default: 1) + If `True` or `> 0`, print status messages random_state : integer or numpy.RandomState, optional, default: None The generator used to initialize SMACOF (metric, nonmetric) MDS If an integer is given, it fixes the seed @@ -92,6 +98,7 @@ def __init__( gamma=1, knn=5, n_jobs=1, + verbose=1, random_state=None, ): self.scale = scale @@ -103,7 +110,12 @@ def __init__( self.gamma = gamma self.knn = knn self.n_jobs = n_jobs + self.verbose = verbose self.random_state = random_state + + _logger.set_level(int(verbose)) + + # TODO: remove all of the below? Why are they here self.NxTs = None self.Xs = None self.Ks = None @@ -162,6 +174,7 @@ def fit(self, X): gamma=self.gamma, knn=self.knn, n_jobs=self.n_jobs, + verbose=self.verbose, random_state=self.random_state, ) diff --git a/multiscale_phate/tree.py b/multiscale_phate/tree.py index b80cf77..12af8d4 100644 --- a/multiscale_phate/tree.py +++ b/multiscale_phate/tree.py @@ -3,6 +3,8 @@ import sklearn.decomposition from . import compress, diffuse, condense +_logger = tasklogger.get_tasklogger("graphtools") + def build_tree( data_input, @@ -15,6 +17,7 @@ def build_tree( gamma=1, knn=5, n_jobs=10, + verbose=1, random_state=None, ): """Short summary. TODO @@ -41,6 +44,8 @@ def build_tree( Description of parameter `knn`. TODO n_jobs : type TODO Description of parameter `n_jobs`. TODO + verbose : `int`, optional (default: 1) + If `> 0`, print status messages random_state : integer or numpy.RandomState, optional, default: None The random number generator. If an integer is given, it fixes the seed. @@ -52,7 +57,7 @@ def build_tree( Description of returned object. TODO """ - with tasklogger.log_task("Multiscale PHATE tree"): + with _logger.task("Multiscale PHATE tree"): N, features = data_input.shape # Computing compression features @@ -60,7 +65,7 @@ def build_tree( N, features, n_pca, partitions, landmarks ) - with tasklogger.log_task("PCA"): + with _logger.task("PCA"): pca_op = sklearn.decomposition.PCA(n_components=n_pca) data_pca = pca_op.fit_transform(np.array(data_input)) clusters = np.arange(N) @@ -74,7 +79,15 @@ def build_tree( clusters = partition_clusters X, diff_op, diff_pca = diffuse.compute_diffusion_potential( - data_pca, N, decay, gamma, knn, landmarks, n_jobs, random_state=random_state + data_pca, + N, + decay, + gamma, + knn, + landmarks, + n_jobs, + verbose=verbose - 1, + random_state=random_state, ) epsilon, merge_threshold = condense.compute_condensation_param( @@ -167,9 +180,9 @@ def online_update_tree( Description of returned object. TODO """ - with tasklogger.log_task("Multiscale PHATE tree mapping"): + with _logger.task("Multiscale PHATE tree mapping"): if data_1.shape[0] != len(np.unique(partitions)): - tasklogger.log_info("PCA compressing new data...") + _logger.info("PCA compressing new data...") data_pca_1 = pca_op.transform(np.array(data_1)) data_pca_2 = pca_op.transform(np.array(data_2)) @@ -177,7 +190,7 @@ def online_update_tree( partition_assignments = compress.map_update_data( pca_centroid, data_pca_1, data_pca_2, partitions, nn=5, n_jobs=n_jobs ) - tasklogger.log_info( + _logger.info( "Points not mapped to partitions: " + str(sum(partition_assignments == -1)) ) @@ -223,7 +236,7 @@ def online_update_tree( else: clusters = new_partition_clusters - tasklogger.log_info("Rebuilding condensation tree...") + _logger.info("Rebuilding condensation tree...") clusters_idx = [] for c in clusters: @@ -236,7 +249,7 @@ def online_update_tree( return NxTs_l, Xs, Ks, Merges, Ps, pca_centroid else: - tasklogger.log_info("PCA compressing new data...") + _logger.info("PCA compressing new data...") data_pca_2 = pca_op.transform(np.array(data_2)) diff_pot_1 = diffuse.online_update_diffusion_potential( data_pca_2, diff_operator, diff_pca_op diff --git a/multiscale_phate/visualize.py b/multiscale_phate/visualize.py index 54650b0..cc53626 100644 --- a/multiscale_phate/visualize.py +++ b/multiscale_phate/visualize.py @@ -4,6 +4,8 @@ from . import embed +_logger = tasklogger.get_tasklogger("graphtools") + def get_visualization( Xs, NxTs, cluster_level, visualization_level, repulse, random_state=None @@ -107,7 +109,7 @@ def build_condensation_tree(data_pca, diff_op, NxT, merged_list, Ps): Description of returned object. """ - with tasklogger.log_task("base visualization"): + with _logger.task("base visualization"): with warnings.catch_warnings(): warnings.filterwarnings( "ignore", @@ -131,7 +133,7 @@ def build_condensation_tree(data_pca, diff_op, NxT, merged_list, Ps): m = 0 - with tasklogger.log_task("tree"): + with _logger.task("tree"): for l in range(0, len(Ps)): if len(np.unique(NxT[l])) != len(np.unique(NxT[l + 1])): tree_phate_1 = embed.condense_visualization(merged_list[m], tree_phate) From 47dc43d1c278a97cdf2ac8469111dea2ed266204 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 18 Nov 2020 19:22:59 -0500 Subject: [PATCH 07/13] assert results different with different random seed --- test/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.py b/test/test.py index 68369f3..27d48ab 100644 --- a/test/test.py +++ b/test/test.py @@ -74,4 +74,4 @@ def test_random_seed(): mp_op = multiscale_phate.Multiscale_PHATE(partitions=100, landmarks=50) hp_embedding, _, _ = mp_op.fit_transform(X) hp_embedding2, _, _ = mp_op.fit_transform(X) - # np.testing.assert_all_close(hp_embedding, hp_embedding2) + assert not np.all(hp_embedding == hp_embedding2) From 22e724f4d6432d0c618d9a633955e8fde979e573 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 18 Nov 2020 19:30:08 -0500 Subject: [PATCH 08/13] Fix flake8 errors --- multiscale_phate/compress.py | 24 ++++++++++++++++++++---- multiscale_phate/condense.py | 2 -- multiscale_phate/diffuse.py | 2 +- multiscale_phate/embed.py | 21 ++++++++++----------- multiscale_phate/tree.py | 11 ++++++----- multiscale_phate/visualize.py | 35 +++++++++++++++++++++++++---------- 6 files changed, 62 insertions(+), 33 deletions(-) diff --git a/multiscale_phate/compress.py b/multiscale_phate/compress.py index 6414ac2..33fa921 100644 --- a/multiscale_phate/compress.py +++ b/multiscale_phate/compress.py @@ -31,7 +31,7 @@ def get_compression_features(N, features, n_pca, partitions, landmarks): Description of returned object. TODO """ - if n_pca == None: + if n_pca is None: n_pca = min(N, features) if n_pca > 100: n_pca = 100 @@ -40,10 +40,10 @@ def get_compression_features(N, features, n_pca, partitions, landmarks): # if N<100000: # partitions=None - if partitions != None and partitions >= N: + if partitions is not None and partitions >= N: partitions = None - if partitions != None and partitions > 50000: + if partitions is not None and partitions > 50000: partitions = 50000 elif N > 100000: partitions = 20000 @@ -188,6 +188,22 @@ def merge_clusters(diff_pot_unmerged, clusters): def get_distance_from_centroids(centroids, data, clusters): + """Short summary. + + Parameters + ---------- + centroids : type + Description of parameter `centroids`. + data : type + Description of parameter `data`. + clusters : type + Description of parameter `clusters`. + + Returns + ------- + type + Description of returned object. + """ distance = np.zeros(centroids.shape[0]) for c in range(centroids.shape[0]): @@ -249,7 +265,7 @@ def map_update_data(centroids, data, new_data, partition_clusters, nn=5, n_jobs= for r in range(len(subset_partition_assignment)): c = 0 while c < nn: - if parition_assignment_bool[r, c] == True: + if parition_assignment_bool[r, c] is True: subset_partition_assignment[r] = neighbor_idx[r, c] c = nn + 1 break diff --git a/multiscale_phate/condense.py b/multiscale_phate/condense.py index 1079463..4a6251d 100644 --- a/multiscale_phate/condense.py +++ b/multiscale_phate/condense.py @@ -120,8 +120,6 @@ def condense(X, clusters, scale, epsilon, merge_threshold, n_jobs, random_state= NxT.append(clusters) X_cont = [] - N = X.shape[0] - for c in range(len(np.unique(clusters))): loc = np.where(c == clusters)[0] X_cont.append(list(loc)) diff --git a/multiscale_phate/diffuse.py b/multiscale_phate/diffuse.py index f8c4c48..425b000 100644 --- a/multiscale_phate/diffuse.py +++ b/multiscale_phate/diffuse.py @@ -44,7 +44,7 @@ def compute_diffusion_potential( """ with _logger.task("diffusion potential"): - if landmarks != None and landmarks > data.shape[0]: + if landmarks is not None and landmarks > data.shape[0]: landmarks = None diff_op = phate.PHATE( diff --git a/multiscale_phate/embed.py b/multiscale_phate/embed.py index b318177..9214193 100644 --- a/multiscale_phate/embed.py +++ b/multiscale_phate/embed.py @@ -76,17 +76,17 @@ def compute_gradient(Xs, merges): m = 0 X = Xs[0] - for l in range(0, len(Xs) - 1): - if X.shape[0] != Xs[l + 1].shape[0]: + for layer in range(0, len(Xs) - 1): + if X.shape[0] != Xs[layer + 1].shape[0]: X_1 = condense_visualization(merges[m], X) m = m + 1 - while X_1.shape[0] != Xs[l + 1].shape[0]: + while X_1.shape[0] != Xs[layer + 1].shape[0]: X_1 = condense_visualization(merges[m], X_1) m = m + 1 else: X_1 = X - gradient.append(np.sum(np.abs(X_1 - Xs[l + 1]))) - X = Xs[l + 1] + gradient.append(np.sum(np.abs(X_1 - Xs[layer + 1]))) + X = Xs[layer + 1] return np.array(gradient) @@ -135,7 +135,6 @@ def get_zoom_visualization( If an integer is given, it fixes the seed. Defaults to the global `numpy` random number generator """ - unique = np.unique( NxTs[zoom_visualization_level], return_index=True, return_counts=True ) @@ -167,13 +166,13 @@ def compute_ideal_visualization_layer(gradient, Xs, min_cells=100): minimum = np.max(gradient) min_layer = 0 - for l in range(1, len(Xs)): - if Xs[l].shape[0] < min_cells: + for layer in range(1, len(Xs)): + if Xs[layer].shape[0] < min_cells: break - if gradient[l] < minimum: + if gradient[layer] < minimum: # print("New minimum!") - minimum = gradient[l] - min_layer = l + minimum = gradient[layer] + min_layer = layer return min_layer diff --git a/multiscale_phate/tree.py b/multiscale_phate/tree.py index 12af8d4..0734888 100644 --- a/multiscale_phate/tree.py +++ b/multiscale_phate/tree.py @@ -71,7 +71,7 @@ def build_tree( clusters = np.arange(N) # Subsetting if required - if partitions != None: + if partitions is not None: partition_clusters = compress.subset_data( data_pca, partitions, n_jobs=n_jobs, random_state=random_state ) @@ -191,8 +191,9 @@ def online_update_tree( pca_centroid, data_pca_1, data_pca_2, partitions, nn=5, n_jobs=n_jobs ) _logger.info( - "Points not mapped to partitions: " - + str(sum(partition_assignments == -1)) + "Points not mapped to partitions: {}".format( + sum(partition_assignments == -1) + ) ) # creating new joint paritions mapping @@ -244,8 +245,8 @@ def online_update_tree( NxTs_l = [] - for l in range(len(NxTs)): - NxTs_l.append(NxTs[l][clusters_idx]) + for layer in range(len(NxTs)): + NxTs_l.append(NxTs[layer][clusters_idx]) return NxTs_l, Xs, Ks, Merges, Ps, pca_centroid else: diff --git a/multiscale_phate/visualize.py b/multiscale_phate/visualize.py index cc53626..eafa23d 100644 --- a/multiscale_phate/visualize.py +++ b/multiscale_phate/visualize.py @@ -64,7 +64,6 @@ def build_visualization(Xs, NxTs, merges, gradient, min_cells, random_state=None Description of returned object. """ - min_layer = embed.compute_ideal_visualization_layer(gradient, Xs, min_cells) (hp_embedding, cluster_viz, sizes_viz,) = embed.get_clusters_sizes_2( np.array(NxTs[-35]), @@ -78,10 +77,25 @@ def build_visualization(Xs, NxTs, merges, gradient, min_cells, random_state=None def map_clusters_to_tree(clusters, NxTs): + """Short summary. + + Parameters + ---------- + clusters : type + Description of parameter `clusters`. + NxTs : type + Description of parameter `NxTs`. + + Returns + ------- + type + Description of returned object. + + """ clusters_tree = [] - for l in range(len(NxTs) - 1): - _, ind = np.unique(NxTs[l], return_index=True) + for layer in range(len(NxTs) - 1): + _, ind = np.unique(NxTs[layer], return_index=True) clusters_tree.extend(clusters[ind]) return clusters_tree @@ -114,8 +128,9 @@ def build_condensation_tree(data_pca, diff_op, NxT, merged_list, Ps): warnings.filterwarnings( "ignore", category=RuntimeWarning, - message="Pre-fit PHATE should not be used to transform a new data matrix. " - "Please fit PHATE to the new data by running 'fit' with the new data.", + message="Pre-fit PHATE should not be used to transform a new data " + "matrix. Please fit PHATE to the new data by running 'fit' with the " + "new data.", ) tree_phate = diff_op.transform(data_pca) @@ -134,21 +149,21 @@ def build_condensation_tree(data_pca, diff_op, NxT, merged_list, Ps): m = 0 with _logger.task("tree"): - for l in range(0, len(Ps)): - if len(np.unique(NxT[l])) != len(np.unique(NxT[l + 1])): + for layer in range(0, len(Ps)): + if len(np.unique(NxT[layer])) != len(np.unique(NxT[layer + 1])): tree_phate_1 = embed.condense_visualization(merged_list[m], tree_phate) m = m + 1 - if Ps[l].shape[0] != tree_phate_1.shape[0]: + if Ps[layer].shape[0] != tree_phate_1.shape[0]: tree_phate_1 = embed.condense_visualization( merged_list[m], tree_phate_1 ) m = m + 1 - tree_phate = Ps[l] @ tree_phate_1 + tree_phate = Ps[layer] @ tree_phate_1 embeddings.append( np.concatenate( [ tree_phate, - np.repeat(l + 1, tree_phate.shape[0]).reshape( + np.repeat(layer + 1, tree_phate.shape[0]).reshape( tree_phate.shape[0], 1 ), ], From de6ba0dc82914b1b69729848fbda3e19f6ceb553 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 19 Nov 2020 09:46:12 -0500 Subject: [PATCH 09/13] remove unused argument --- multiscale_phate/compress.py | 2 +- multiscale_phate/tree.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/multiscale_phate/compress.py b/multiscale_phate/compress.py index 33fa921..8b2db97 100644 --- a/multiscale_phate/compress.py +++ b/multiscale_phate/compress.py @@ -9,7 +9,7 @@ _logger = tasklogger.get_tasklogger("graphtools") -def get_compression_features(N, features, n_pca, partitions, landmarks): +def get_compression_features(N, features, n_pca, partitions): """Short summary. TODO Parameters diff --git a/multiscale_phate/tree.py b/multiscale_phate/tree.py index 0734888..4e7d2d1 100644 --- a/multiscale_phate/tree.py +++ b/multiscale_phate/tree.py @@ -62,7 +62,7 @@ def build_tree( # Computing compression features n_pca, partitions = compress.get_compression_features( - N, features, n_pca, partitions, landmarks + N, features, n_pca, partitions ) with _logger.task("PCA"): From da6cf434bd76d5235e9d378c9d56f2bb96943528 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 19 Nov 2020 09:59:33 -0500 Subject: [PATCH 10/13] test get_compression_features --- test/test.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/test/test.py b/test/test.py index 27d48ab..f36524c 100644 --- a/test/test.py +++ b/test/test.py @@ -75,3 +75,54 @@ def test_random_seed(): hp_embedding, _, _ = mp_op.fit_transform(X) hp_embedding2, _, _ = mp_op.fit_transform(X) assert not np.all(hp_embedding == hp_embedding2) + + +@parameterized.parameterized( + [ + # n_pca is None -> min(N, features) + (100, 50, None, 50), + (50, 100, None, 50), + # n_pca < min(N, features) -> n_pca + (100, 50, 25, 25), + # n_pca > 100 -> 100 + (200, 150, 200, 100), + (200, 150, 125, 100), + # n_pca > min(N, features) -> min(N, features) + (100, 50, 75, 50), + (50, 100, 75, 50), + (100, 50, 125, 50), + (50, 100, 125, 50), + ] +) +def test_compression_features_pca(N, features, n_pca, expected): + partitions = None + output, _ = multiscale_phate.compress.get_compression_features( + N, features, n_pca, partitions + ) + assert output == expected + + +@parameterized.parameterized( + [ + # TODO: is this desired behavior? seems pathological + # partitions is None -> None + (100, None, None), + # partitions > N -> None + (100, 101, None), + (200000, 200001, None), + # partitions > 50000 -> 50000 + (110000, 50001, 50000), + # N > 100000 -> 20000 + (110000, None, 20000), + (110000, 100, 20000), + (110000, 50000, 20000), + (110000, 110001, 20000), + ] +) +def test_compression_features_partitions(N, partitions, expected): + n_pca = None + features = 50 + _, output = multiscale_phate.compress.get_compression_features( + N, features, n_pca, partitions + ) + assert output == expected From e2748c9ddd37a1deef8702314773b647dcef3cd0 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 19 Nov 2020 10:02:27 -0500 Subject: [PATCH 11/13] embeddings can be different sizes --- test/test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test.py b/test/test.py index f36524c..ce7e461 100644 --- a/test/test.py +++ b/test/test.py @@ -74,7 +74,8 @@ def test_random_seed(): mp_op = multiscale_phate.Multiscale_PHATE(partitions=100, landmarks=50) hp_embedding, _, _ = mp_op.fit_transform(X) hp_embedding2, _, _ = mp_op.fit_transform(X) - assert not np.all(hp_embedding == hp_embedding2) + if hp_embedding.shape[0] == hp_embedding2.shape[0]: + assert not np.all(hp_embedding == hp_embedding2) @parameterized.parameterized( From 2646aba4fa92e0b337fea4bb55c6e3aba1bb771e Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 19 Nov 2020 10:03:05 -0500 Subject: [PATCH 12/13] remove duplicate line --- multiscale_phate/compress.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/multiscale_phate/compress.py b/multiscale_phate/compress.py index 8b2db97..7080322 100644 --- a/multiscale_phate/compress.py +++ b/multiscale_phate/compress.py @@ -36,8 +36,6 @@ def get_compression_features(N, features, n_pca, partitions): if n_pca > 100: n_pca = 100 - n_pca = 100 - # if N<100000: # partitions=None if partitions is not None and partitions >= N: From feb8b934995d72475e5d6fd4d1db09a6b5d12475 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 19 Nov 2020 10:45:45 -0500 Subject: [PATCH 13/13] print output if assert fails --- test/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test.py b/test/test.py index ce7e461..7dc1ad6 100644 --- a/test/test.py +++ b/test/test.py @@ -100,7 +100,7 @@ def test_compression_features_pca(N, features, n_pca, expected): output, _ = multiscale_phate.compress.get_compression_features( N, features, n_pca, partitions ) - assert output == expected + assert output == expected, (output, expected) @parameterized.parameterized( @@ -126,4 +126,4 @@ def test_compression_features_partitions(N, partitions, expected): _, output = multiscale_phate.compress.get_compression_features( N, features, n_pca, partitions ) - assert output == expected + assert output == expected, (output, expected)