Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Documentation & testing cleanup #2

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Multiscale PHATE
================

[![Latest PyPi version](https://img.shields.io/pypi/v/multiscale_phate.svg)](https://pypi.org/project/multiscale_phate/)
[![Travis CI Build](https://api.travis-ci.com/KrishnaswamyLab/Multiscale_PHATE.svg?branch=master)](https://travis-ci.com/KrishnaswamyLab/Multiscale_PHATE)
[![Coverage Status](https://coveralls.io/repos/github/KrishnaswamyLab/Multiscale_PHATE/badge.svg?branch=master)](https://coveralls.io/github/KrishnaswamyLab/Multiscale_PHATE?branch=master)
[![Travis CI Build](https://api.travis-ci.com/KrishnaswamyLab/Multiscale_PHATE.svg?branch=main)](https://travis-ci.com/KrishnaswamyLab/Multiscale_PHATE)
[![Coverage Status](https://coveralls.io/repos/github/KrishnaswamyLab/Multiscale_PHATE/badge.svg?branch=main)](https://coveralls.io/github/KrishnaswamyLab/Multiscale_PHATE?branch=main)
[![Twitter](https://img.shields.io/twitter/follow/KrishnaswamyLab.svg?style=social&label=Follow)](https://twitter.com/KrishnaswamyLab)
[![GitHub stars](https://img.shields.io/github/stars/KrishnaswamyLab/Multiscale_PHATE.svg?style=social&label=Stars)](https://github.com/KrishnaswamyLab/Multiscale_PHATE/)
[![Code style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
Expand All @@ -19,6 +19,12 @@ Installation

Multiscale PHATE is available on `pip`. Install by running the following in a terminal:

```
pip install --user multiscale_phate
```

If you wish to install from source, you may do so as follows:

```
pip install --user git+https://github.com/KrishnaswamyLab/Multiscale_PHATE
```
Expand Down
145 changes: 81 additions & 64 deletions multiscale_phate/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,41 +6,42 @@
import scipy.spatial.distance


def get_compression_features(N, features, n_pca, partitions, landmarks):
"""Short summary.
_logger = tasklogger.get_tasklogger("graphtools")


def get_compression_features(N, features, n_pca, partitions):
"""Short summary. TODO

Parameters
----------
N : type
Description of parameter `N`.
features : type
Description of parameter `features`.
n_pca : type
Description of parameter `n_pca`.
partitions : type
Description of parameter `partitions`.
landmarks : type
Description of parameter `landmarks`.
N : type TODO
Description of parameter `N`. TODO
features : type TODO
Description of parameter `features`. TODO
n_pca : type TODO
Description of parameter `n_pca`. TODO
partitions : type TODO
Description of parameter `partitions`. TODO
landmarks : type TODO
Description of parameter `landmarks`. TODO

Returns
-------
type
Description of returned object.
type TODO
Description of returned object. TODO

"""
if n_pca == None:
if n_pca is None:
n_pca = min(N, features)
if n_pca > 100:
n_pca = 100

n_pca = 100

# if N<100000:
# partitions=None
if partitions != None and partitions >= N:
if partitions is not None and partitions >= N:
partitions = None

if partitions != None and partitions > 50000:
if partitions is not None and partitions > 50000:
partitions = 50000
elif N > 100000:
partitions = 20000
Expand All @@ -49,25 +50,25 @@ def get_compression_features(N, features, n_pca, partitions, landmarks):


def cluster_components(data_subset, num_cluster, size, random_state=None):
"""Short summary.
"""Short summary. TODO

Parameters
----------
data_subset : type
Description of parameter `data_subset`.
num_cluster : type
Description of parameter `num_cluster`.
size : type
Description of parameter `size`.
data_subset : type TODO
Description of parameter `data_subset`. TODO
num_cluster : type TODO
Description of parameter `num_cluster`. TODO
size : type TODO
Description of parameter `size`. TODO
random_state : integer or numpy.RandomState, optional, default: None
The generator used to initialize MiniBatchKMeans.
If an integer is given, it fixes the seed.
Defaults to the global `numpy` random number generator

Returns
-------
type
Description of returned object.
type TODO
Description of returned object. TODO

"""
if data_subset.shape[0] == 1:
Expand All @@ -91,32 +92,32 @@ def cluster_components(data_subset, num_cluster, size, random_state=None):


def subset_data(data, desired_num_clusters, n_jobs, num_cluster=100, random_state=None):
"""Short summary.
"""Short summary. TODO

Parameters
----------
data : type
Description of parameter `data`.
desired_num_clusters : type
Description of parameter `desired_num_clusters`.
n_jobs : type
Description of parameter `n_jobs`.
num_cluster : type
Description of parameter `num_cluster`.
data : type TODO
Description of parameter `data`. TODO
desired_num_clusters : type TODO
Description of parameter `desired_num_clusters`. TODO
n_jobs : type TODO
Description of parameter `n_jobs`. TODO
num_cluster : type TODO
Description of parameter `num_cluster`. TODO
random_state : integer or numpy.RandomState, optional, default: None
The generator used to initialize MiniBatchKMeans.
If an integer is given, it fixes the seed.
Defaults to the global `numpy` random number generator

Returns
-------
type
Description of returned object.
type TODO
Description of returned object. TODO

"""
N = data.shape[0]
size = int(N / desired_num_clusters)
with tasklogger.log_task("partitions"):
with _logger.task("partitions"):

mbk = sklearn.cluster.MiniBatchKMeans(
init="k-means++",
Expand Down Expand Up @@ -156,19 +157,19 @@ def subset_data(data, desired_num_clusters, n_jobs, num_cluster=100, random_stat


def merge_clusters(diff_pot_unmerged, clusters):
"""Short summary.
"""Short summary. TODO

Parameters
----------
diff_pot_unmerged : type
Description of parameter `diff_pot_unmerged`.
clusters : type
Description of parameter `clusters`.
diff_pot_unmerged : type TODO
Description of parameter `diff_pot_unmerged`. TODO
clusters : type TODO
Description of parameter `clusters`. TODO

Returns
-------
type
Description of returned object.
type TODO
Description of returned object. TODO

"""
clusters_uni = np.unique(clusters)
Expand All @@ -185,6 +186,22 @@ def merge_clusters(diff_pot_unmerged, clusters):


def get_distance_from_centroids(centroids, data, clusters):
"""Short summary.

Parameters
----------
centroids : type
Description of parameter `centroids`.
data : type
Description of parameter `data`.
clusters : type
Description of parameter `clusters`.

Returns
-------
type
Description of returned object.
"""
distance = np.zeros(centroids.shape[0])

for c in range(centroids.shape[0]):
Expand All @@ -202,30 +219,30 @@ def get_distance_from_centroids(centroids, data, clusters):


def map_update_data(centroids, data, new_data, partition_clusters, nn=5, n_jobs=10):
"""Short summary.
"""Short summary. TODO

Parameters
----------
centroids : type
Description of parameter `centroids`.
data : type
Description of parameter `data`.
new_data : type
Description of parameter `new_data`.
partition_clusters : type
Description of parameter `partition_clusters`.
nn : type
Description of parameter `nn`.
n_jobs : type
Description of parameter `n_jobs`.
centroids : type TODO
Description of parameter `centroids`. TODO
data : type TODO
Description of parameter `data`. TODO
new_data : type TODO
Description of parameter `new_data`. TODO
partition_clusters : type TODO
Description of parameter `partition_clusters`. TODO
nn : type TODO
Description of parameter `nn`. TODO
n_jobs : type TODO
Description of parameter `n_jobs`. TODO

Returns
-------
type
Description of returned object.
type TODO
Description of returned object. TODO

"""
with tasklogger.log_task("map to computed partitions"):
with _logger.task("map to computed partitions"):
# getting max distance to each partition centroid
distance_merged = get_distance_from_centroids(
centroids, data, partition_clusters
Expand All @@ -246,7 +263,7 @@ def map_update_data(centroids, data, new_data, partition_clusters, nn=5, n_jobs=
for r in range(len(subset_partition_assignment)):
c = 0
while c < nn:
if parition_assignment_bool[r, c] == True:
if parition_assignment_bool[r, c] is True:
subset_partition_assignment[r] = neighbor_idx[r, c]
c = nn + 1
break
Expand Down
Loading