Skip to content

Commit

Permalink
Merge pull request #45 from KrishnaswamyLab/dev
Browse files Browse the repository at this point in the history
graphtools v1.3.1
  • Loading branch information
scottgigante authored Oct 15, 2019
2 parents e31a41a + a431bf0 commit 275c104
Show file tree
Hide file tree
Showing 6 changed files with 288 additions and 293 deletions.
13 changes: 7 additions & 6 deletions graphtools/api.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import numpy as np
import warnings
import tasklogger
from scipy import sparse
import pickle
import pygsp
import tasklogger

from . import base, graphs

from . import base
from . import graphs
_logger = tasklogger.get_tasklogger('graphtools')


def Graph(data,
Expand Down Expand Up @@ -173,7 +174,7 @@ def Graph(data,
“Numerical Recipes (3rd edition)”,
Cambridge University Press, 2007, page 795.
"""
tasklogger.set_level(verbose)
_logger.set_level(verbose)
if sample_idx is not None and len(np.unique(sample_idx)) == 1:
warnings.warn("Only one unique sample. "
"Not using MNNGraph")
Expand Down Expand Up @@ -239,7 +240,7 @@ def Graph(data,
else:
msg = msg + " and PyGSP inheritance"

tasklogger.log_debug(msg)
_logger.debug(msg)

class_names = [p.__name__.replace("Graph", "") for p in parent_classes]
try:
Expand All @@ -257,7 +258,7 @@ def Graph(data,
pass

# build graph and return
tasklogger.log_debug("Initializing {} with arguments {}".format(
_logger.debug("Initializing {} with arguments {}".format(
parent_classes,
", ".join(["{}='{}'".format(key, value)
for key, value in params.items()
Expand Down
116 changes: 61 additions & 55 deletions graphtools/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from scipy import sparse
import warnings
import numbers
import tasklogger
import pickle
import sys
import tasklogger

try:
import pandas as pd
Expand All @@ -29,6 +29,8 @@

from . import utils

_logger = tasklogger.get_tasklogger('graphtools')


class Base(object):
"""Class that deals with key-word arguments but is otherwise
Expand Down Expand Up @@ -179,7 +181,7 @@ def _parse_n_pca_threshold(self, data, n_pca, rank_threshold):
n_pca = None
elif n_pca is True: # notify that we're going to estimate rank.
n_pca = 'auto'
tasklogger.log_info("Estimating n_pca from matrix rank. "
_logger.info("Estimating n_pca from matrix rank. "
"Supply an integer n_pca "
"for fixed amount.")
if not any([isinstance(n_pca, numbers.Number),
Expand Down Expand Up @@ -233,45 +235,44 @@ def _reduce_data(self):
Reduced data matrix
"""
if self.n_pca is not None and (self.n_pca == 'auto' or self.n_pca < self.data.shape[1]):
tasklogger.log_start("PCA")
n_pca = self.data.shape[1] - 1 if self.n_pca == 'auto' else self.n_pca
if sparse.issparse(self.data):
if isinstance(self.data, sparse.coo_matrix) or \
isinstance(self.data, sparse.lil_matrix) or \
isinstance(self.data, sparse.dok_matrix):
self.data = self.data.tocsr()
self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state)
else:
self.data_pca = PCA(n_pca,
svd_solver='randomized',
random_state=self.random_state)
self.data_pca.fit(self.data)
if self.n_pca == 'auto':
s = self.data_pca.singular_values_
smax = s.max()
if self.rank_threshold == 'auto':
threshold = smax * \
np.finfo(self.data.dtype).eps * max(self.data.shape)
self.rank_threshold = threshold
threshold = self.rank_threshold
gate = np.where(s >= threshold)[0]
self.n_pca = gate.shape[0]
if self.n_pca == 0:
raise ValueError("Supplied threshold {} was greater than "
"maximum singular value {} "
"for the data matrix".format(threshold, smax))
tasklogger.log_info(
"Using rank estimate of {} as n_pca".format(self.n_pca))
# reset the sklearn operator
op = self.data_pca # for line-width brevity..
op.components_ = op.components_[gate, :]
op.explained_variance_ = op.explained_variance_[gate]
op.explained_variance_ratio_ = op.explained_variance_ratio_[
gate]
op.singular_values_ = op.singular_values_[gate]
self.data_pca = op # im not clear if this is needed due to assignment rules
data_nu = self.data_pca.transform(self.data)
tasklogger.log_complete("PCA")
with _logger.task("PCA"):
n_pca = self.data.shape[1] - 1 if self.n_pca == 'auto' else self.n_pca
if sparse.issparse(self.data):
if isinstance(self.data, sparse.coo_matrix) or \
isinstance(self.data, sparse.lil_matrix) or \
isinstance(self.data, sparse.dok_matrix):
self.data = self.data.tocsr()
self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state)
else:
self.data_pca = PCA(n_pca,
svd_solver='randomized',
random_state=self.random_state)
self.data_pca.fit(self.data)
if self.n_pca == 'auto':
s = self.data_pca.singular_values_
smax = s.max()
if self.rank_threshold == 'auto':
threshold = smax * \
np.finfo(self.data.dtype).eps * max(self.data.shape)
self.rank_threshold = threshold
threshold = self.rank_threshold
gate = np.where(s >= threshold)[0]
self.n_pca = gate.shape[0]
if self.n_pca == 0:
raise ValueError("Supplied threshold {} was greater than "
"maximum singular value {} "
"for the data matrix".format(threshold, smax))
_logger.info(
"Using rank estimate of {} as n_pca".format(self.n_pca))
# reset the sklearn operator
op = self.data_pca # for line-width brevity..
op.components_ = op.components_[gate, :]
op.explained_variance_ = op.explained_variance_[gate]
op.explained_variance_ratio_ = op.explained_variance_ratio_[
gate]
op.singular_values_ = op.singular_values_[gate]
self.data_pca = op # im not clear if this is needed due to assignment rules
data_nu = self.data_pca.transform(self.data)
return data_nu
else:
data_nu = self.data
Expand Down Expand Up @@ -472,10 +473,10 @@ def __init__(self,
self.anisotropy = anisotropy

if initialize:
tasklogger.log_debug("Initializing kernel...")
_logger.debug("Initializing kernel...")
self.K
else:
tasklogger.log_debug("Not initializing kernel.")
_logger.debug("Not initializing kernel.")
super().__init__(**kwargs)

def _check_symmetrization(self, kernel_symm, theta):
Expand Down Expand Up @@ -524,18 +525,18 @@ def _build_kernel(self):
def symmetrize_kernel(self, K):
# symmetrize
if self.kernel_symm == "+":
tasklogger.log_debug("Using addition symmetrization.")
_logger.debug("Using addition symmetrization.")
K = (K + K.T) / 2
elif self.kernel_symm == "*":
tasklogger.log_debug("Using multiplication symmetrization.")
_logger.debug("Using multiplication symmetrization.")
K = K.multiply(K.T)
elif self.kernel_symm == 'mnn':
tasklogger.log_debug(
_logger.debug(
"Using mnn symmetrization (theta = {}).".format(self.theta))
K = self.theta * utils.elementwise_minimum(K, K.T) + \
(1 - self.theta) * utils.elementwise_maximum(K, K.T)
elif self.kernel_symm is None:
tasklogger.log_debug("Using no symmetrization.")
_logger.debug("Using no symmetrization.")
pass
else:
# this should never happen
Expand Down Expand Up @@ -729,12 +730,12 @@ def to_pygsp(self, **kwargs):
def to_igraph(self, attribute="weight", **kwargs):
"""Convert to an igraph Graph
Uses the igraph.Graph.Weighted_Adjacency constructor
Uses the igraph.Graph constructor
Parameters
----------
attribute : str, optional (default: "weight")
kwargs : additional arguments for igraph.Graph.Weighted_Adjacency
kwargs : additional arguments for igraph.Graph
"""
try:
import igraph as ig
Expand All @@ -747,8 +748,13 @@ def to_igraph(self, attribute="weight", **kwargs):
# not a pygsp graph
W = self.K.copy()
W = utils.set_diagonal(W, 0)
return ig.Graph.Weighted_Adjacency(utils.to_array(W).tolist(),
attr=attribute, **kwargs)
sources, targets = W.nonzero()
edgelist = list(zip(sources, targets))
g = ig.Graph(W.shape[0], edgelist, **kwargs)
weights = W[W.nonzero()]
weights = utils.to_array(weights)
g.es[attribute] = weights.flatten().tolist()
return g

def to_pickle(self, path):
"""Save the current Graph to a pickle.
Expand Down Expand Up @@ -787,10 +793,10 @@ def _check_shortest_path_distance(self, distance):
def _default_shortest_path_distance(self):
if not self.weighted:
distance = 'data'
tasklogger.log_info("Using ambient data distances.")
_logger.info("Using ambient data distances.")
else:
distance = 'affinity'
tasklogger.log_info("Using negative log affinity distances.")
_logger.info("Using negative log affinity distances.")
return distance

def shortest_path(self, method='auto', distance=None):
Expand Down Expand Up @@ -954,7 +960,7 @@ def __init__(self, data,
# kwargs are ignored
self.n_jobs = n_jobs
self.verbose = verbose
tasklogger.set_level(verbose)
_logger.set_level(verbose)
super().__init__(data, **kwargs)

def get_params(self):
Expand Down Expand Up @@ -1117,6 +1123,6 @@ def set_params(self, **params):
self.n_jobs = params['n_jobs']
if 'verbose' in params:
self.verbose = params['verbose']
tasklogger.set_level(self.verbose)
_logger.set_level(self.verbose)
super().set_params(**params)
return self
Loading

0 comments on commit 275c104

Please sign in to comment.