From 5209f1342586286907256554fe6002edac6c19bb Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Sun, 13 Oct 2019 14:25:50 -0400
Subject: [PATCH 1/2] refactor tasklogger interface

---
 graphtools/api.py    |  13 +-
 graphtools/base.py   | 103 +++++-----
 graphtools/graphs.py | 446 +++++++++++++++++++++----------------------
 requirements.txt     |   2 +-
 setup.py             |   2 +-
 5 files changed, 278 insertions(+), 288 deletions(-)

diff --git a/graphtools/api.py b/graphtools/api.py
index 7fdae5a..80b7746 100644
--- a/graphtools/api.py
+++ b/graphtools/api.py
@@ -1,12 +1,13 @@
 import numpy as np
 import warnings
-import tasklogger
 from scipy import sparse
 import pickle
 import pygsp
+import tasklogger
+
+from . import base, graphs
 
-from . import base
-from . import graphs
+_logger = tasklogger.get_tasklogger('graphtools')
 
 
 def Graph(data,
@@ -173,7 +174,7 @@ def Graph(data,
         “Numerical Recipes (3rd edition)”,
         Cambridge University Press, 2007, page 795.
     """
-    tasklogger.set_level(verbose)
+    _logger.set_level(verbose)
     if sample_idx is not None and len(np.unique(sample_idx)) == 1:
         warnings.warn("Only one unique sample. "
                       "Not using MNNGraph")
@@ -239,7 +240,7 @@ def Graph(data,
         else:
             msg = msg + " and PyGSP inheritance"
 
-    tasklogger.log_debug(msg)
+    _logger.debug(msg)
 
     class_names = [p.__name__.replace("Graph", "") for p in parent_classes]
     try:
@@ -257,7 +258,7 @@ def Graph(data,
                 pass
 
     # build graph and return
-    tasklogger.log_debug("Initializing {} with arguments {}".format(
+    _logger.debug("Initializing {} with arguments {}".format(
         parent_classes,
         ", ".join(["{}='{}'".format(key, value)
                    for key, value in params.items()
diff --git a/graphtools/base.py b/graphtools/base.py
index 58e941b..c7c72d6 100644
--- a/graphtools/base.py
+++ b/graphtools/base.py
@@ -11,9 +11,9 @@
 from scipy import sparse
 import warnings
 import numbers
-import tasklogger
 import pickle
 import sys
+import tasklogger
 
 try:
     import pandas as pd
@@ -29,6 +29,8 @@
 
 from . import utils
 
+_logger = tasklogger.get_tasklogger('graphtools')
+
 
 class Base(object):
     """Class that deals with key-word arguments but is otherwise
@@ -179,7 +181,7 @@ def _parse_n_pca_threshold(self, data, n_pca, rank_threshold):
             n_pca = None
         elif n_pca is True:  # notify that we're going to estimate rank.
             n_pca = 'auto'
-            tasklogger.log_info("Estimating n_pca from matrix rank. "
+            _logger.info("Estimating n_pca from matrix rank. "
                                 "Supply an integer n_pca "
                                 "for fixed amount.")
         if not any([isinstance(n_pca, numbers.Number),
@@ -233,45 +235,44 @@ def _reduce_data(self):
         Reduced data matrix
         """
         if self.n_pca is not None and (self.n_pca == 'auto' or self.n_pca < self.data.shape[1]):
-            tasklogger.log_start("PCA")
-            n_pca = self.data.shape[1] - 1 if self.n_pca == 'auto' else self.n_pca
-            if sparse.issparse(self.data):
-                if isinstance(self.data, sparse.coo_matrix) or \
-                        isinstance(self.data, sparse.lil_matrix) or \
-                        isinstance(self.data, sparse.dok_matrix):
-                    self.data = self.data.tocsr()
-                self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state)
-            else:
-                self.data_pca = PCA(n_pca,
-                                    svd_solver='randomized',
-                                    random_state=self.random_state)
-            self.data_pca.fit(self.data)
-            if self.n_pca == 'auto':
-                s = self.data_pca.singular_values_
-                smax = s.max()
-                if self.rank_threshold == 'auto':
-                    threshold = smax * \
-                        np.finfo(self.data.dtype).eps * max(self.data.shape)
-                    self.rank_threshold = threshold
-                threshold = self.rank_threshold
-                gate = np.where(s >= threshold)[0]
-                self.n_pca = gate.shape[0]
-                if self.n_pca == 0:
-                    raise ValueError("Supplied threshold {} was greater than "
-                                     "maximum singular value {} "
-                                     "for the data matrix".format(threshold, smax))
-                tasklogger.log_info(
-                    "Using rank estimate of {} as n_pca".format(self.n_pca))
-                # reset the sklearn operator
-                op = self.data_pca  # for line-width brevity..
-                op.components_ = op.components_[gate, :]
-                op.explained_variance_ = op.explained_variance_[gate]
-                op.explained_variance_ratio_ = op.explained_variance_ratio_[
-                    gate]
-                op.singular_values_ = op.singular_values_[gate]
-                self.data_pca = op  # im not clear if this is needed due to assignment rules
-            data_nu = self.data_pca.transform(self.data)
-            tasklogger.log_complete("PCA")
+            with _logger.task("PCA"):
+                n_pca = self.data.shape[1] - 1 if self.n_pca == 'auto' else self.n_pca
+                if sparse.issparse(self.data):
+                    if isinstance(self.data, sparse.coo_matrix) or \
+                            isinstance(self.data, sparse.lil_matrix) or \
+                            isinstance(self.data, sparse.dok_matrix):
+                        self.data = self.data.tocsr()
+                    self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state)
+                else:
+                    self.data_pca = PCA(n_pca,
+                                        svd_solver='randomized',
+                                        random_state=self.random_state)
+                self.data_pca.fit(self.data)
+                if self.n_pca == 'auto':
+                    s = self.data_pca.singular_values_
+                    smax = s.max()
+                    if self.rank_threshold == 'auto':
+                        threshold = smax * \
+                            np.finfo(self.data.dtype).eps * max(self.data.shape)
+                        self.rank_threshold = threshold
+                    threshold = self.rank_threshold
+                    gate = np.where(s >= threshold)[0]
+                    self.n_pca = gate.shape[0]
+                    if self.n_pca == 0:
+                        raise ValueError("Supplied threshold {} was greater than "
+                                         "maximum singular value {} "
+                                         "for the data matrix".format(threshold, smax))
+                    _logger.info(
+                        "Using rank estimate of {} as n_pca".format(self.n_pca))
+                    # reset the sklearn operator
+                    op = self.data_pca  # for line-width brevity..
+                    op.components_ = op.components_[gate, :]
+                    op.explained_variance_ = op.explained_variance_[gate]
+                    op.explained_variance_ratio_ = op.explained_variance_ratio_[
+                        gate]
+                    op.singular_values_ = op.singular_values_[gate]
+                    self.data_pca = op  # im not clear if this is needed due to assignment rules
+                data_nu = self.data_pca.transform(self.data)
             return data_nu
         else:
             data_nu = self.data
@@ -472,10 +473,10 @@ def __init__(self,
         self.anisotropy = anisotropy
 
         if initialize:
-            tasklogger.log_debug("Initializing kernel...")
+            _logger.debug("Initializing kernel...")
             self.K
         else:
-            tasklogger.log_debug("Not initializing kernel.")
+            _logger.debug("Not initializing kernel.")
         super().__init__(**kwargs)
 
     def _check_symmetrization(self, kernel_symm, theta):
@@ -524,18 +525,18 @@ def _build_kernel(self):
     def symmetrize_kernel(self, K):
         # symmetrize
         if self.kernel_symm == "+":
-            tasklogger.log_debug("Using addition symmetrization.")
+            _logger.debug("Using addition symmetrization.")
             K = (K + K.T) / 2
         elif self.kernel_symm == "*":
-            tasklogger.log_debug("Using multiplication symmetrization.")
+            _logger.debug("Using multiplication symmetrization.")
             K = K.multiply(K.T)
         elif self.kernel_symm == 'mnn':
-            tasklogger.log_debug(
+            _logger.debug(
                 "Using mnn symmetrization (theta = {}).".format(self.theta))
             K = self.theta * utils.elementwise_minimum(K, K.T) + \
                 (1 - self.theta) * utils.elementwise_maximum(K, K.T)
         elif self.kernel_symm is None:
-            tasklogger.log_debug("Using no symmetrization.")
+            _logger.debug("Using no symmetrization.")
             pass
         else:
             # this should never happen
@@ -787,10 +788,10 @@ def _check_shortest_path_distance(self, distance):
     def _default_shortest_path_distance(self):
         if not self.weighted:
             distance = 'data'
-            tasklogger.log_info("Using ambient data distances.")
+            _logger.info("Using ambient data distances.")
         else:
             distance = 'affinity'
-            tasklogger.log_info("Using negative log affinity distances.")
+            _logger.info("Using negative log affinity distances.")
         return distance
 
     def shortest_path(self, method='auto', distance=None):
@@ -954,7 +955,7 @@ def __init__(self, data,
         # kwargs are ignored
         self.n_jobs = n_jobs
         self.verbose = verbose
-        tasklogger.set_level(verbose)
+        _logger.set_level(verbose)
         super().__init__(data, **kwargs)
 
     def get_params(self):
@@ -1117,6 +1118,6 @@ def set_params(self, **params):
             self.n_jobs = params['n_jobs']
         if 'verbose' in params:
             self.verbose = params['verbose']
-            tasklogger.set_level(self.verbose)
+            _logger.set_level(self.verbose)
         super().set_params(**params)
         return self
diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index 2d4c035..6a1a021 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -15,6 +15,8 @@
 from . import utils
 from .base import DataGraph, PyGSPGraph
 
+_logger = tasklogger.get_tasklogger('graphtools')
+
 
 class kNNGraph(DataGraph):
     """
@@ -290,90 +292,88 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None,
                               k=knn, n=self.data.shape[0]))
 
         Y = self._check_extension_shape(Y)
-        tasklogger.log_start("KNN search")
         if self.decay is None or self.thresh == 1:
-            # binary connectivity matrix
-            K = self.knn_tree.kneighbors_graph(
-                Y, n_neighbors=knn,
-                mode='connectivity')
-            tasklogger.log_complete("KNN search")
+            with _logger.task("KNN search"):
+                # binary connectivity matrix
+                K = self.knn_tree.kneighbors_graph(
+                    Y, n_neighbors=knn,
+                    mode='connectivity')
         else:
-            # sparse fast alpha decay
-            knn_tree = self.knn_tree
-            search_knn = min(knn * 20, self.data_nu.shape[0])
-            distances, indices = knn_tree.kneighbors(
-                Y, n_neighbors=search_knn)
-            self._check_duplicates(distances, indices)
-            tasklogger.log_complete("KNN search")
-            tasklogger.log_start("affinities")
-            if bandwidth is None:
-                bandwidth = distances[:, knn - 1]
-
-            bandwidth = bandwidth * bandwidth_scale
-
-            # check for zero bandwidth
-            bandwidth = np.maximum(bandwidth, np.finfo(float).eps)
-
-            radius = bandwidth * np.power(-1 * np.log(self.thresh),
-                                          1 / self.decay)
-            update_idx = np.argwhere(
-                np.max(distances, axis=1) < radius).reshape(-1)
-            tasklogger.log_debug("search_knn = {}; {} remaining".format(
-                search_knn, len(update_idx)))
-            if len(update_idx) > 0:
-                distances = [d for d in distances]
-                indices = [i for i in indices]
-            while len(update_idx) > Y.shape[0] // 10 and \
-                    search_knn < self.data_nu.shape[0] / 2:
-                # increase the knn search
-                search_knn = min(search_knn * 20, self.data_nu.shape[0])
-                dist_new, ind_new = knn_tree.kneighbors(
-                    Y[update_idx], n_neighbors=search_knn)
-                for i, idx in enumerate(update_idx):
-                    distances[idx] = dist_new[i]
-                    indices[idx] = ind_new[i]
-                update_idx = [i for i, d in enumerate(distances) if np.max(d) <
-                              (radius if isinstance(bandwidth, numbers.Number)
-                               else radius[i])]
-                tasklogger.log_debug("search_knn = {}; {} remaining".format(
-                    search_knn,
-                    len(update_idx)))
-            if search_knn > self.data_nu.shape[0] / 2:
-                knn_tree = NearestNeighbors(
-                    search_knn, algorithm='brute',
-                    n_jobs=self.n_jobs).fit(self.data_nu)
-            if len(update_idx) > 0:
-                tasklogger.log_debug(
-                    "radius search on {}".format(len(update_idx)))
-                # give up - radius search
-                dist_new, ind_new = knn_tree.radius_neighbors(
-                    Y[update_idx, :],
-                    radius=radius
-                    if isinstance(bandwidth, numbers.Number)
-                    else np.max(radius[update_idx]))
-                for i, idx in enumerate(update_idx):
-                    distances[idx] = dist_new[i]
-                    indices[idx] = ind_new[i]
-            if isinstance(bandwidth, numbers.Number):
-                data = np.concatenate(distances) / bandwidth
-            else:
-                data = np.concatenate([distances[i] / bandwidth[i]
-                                       for i in range(len(distances))])
-
-            indices = np.concatenate(indices)
-            indptr = np.concatenate(
-                [[0], np.cumsum([len(d) for d in distances])])
-            K = sparse.csr_matrix((data, indices, indptr),
-                                  shape=(Y.shape[0], self.data_nu.shape[0]))
-            K.data = np.exp(-1 * np.power(K.data, self.decay))
-            # handle nan
-            K.data = np.where(np.isnan(K.data), 1, K.data)
-            # TODO: should we zero values that are below thresh?
-            K.data[K.data < self.thresh] = 0
-            K = K.tocoo()
-            K.eliminate_zeros()
-            K = K.tocsr()
-            tasklogger.log_complete("affinities")
+            with _logger.task("KNN search"):
+                # sparse fast alpha decay
+                knn_tree = self.knn_tree
+                search_knn = min(knn * 20, self.data_nu.shape[0])
+                distances, indices = knn_tree.kneighbors(
+                    Y, n_neighbors=search_knn)
+                self._check_duplicates(distances, indices)
+            with _logger.task("affinities"):
+                if bandwidth is None:
+                    bandwidth = distances[:, knn - 1]
+
+                bandwidth = bandwidth * bandwidth_scale
+
+                # check for zero bandwidth
+                bandwidth = np.maximum(bandwidth, np.finfo(float).eps)
+
+                radius = bandwidth * np.power(-1 * np.log(self.thresh),
+                                              1 / self.decay)
+                update_idx = np.argwhere(
+                    np.max(distances, axis=1) < radius).reshape(-1)
+                _logger.debug("search_knn = {}; {} remaining".format(
+                    search_knn, len(update_idx)))
+                if len(update_idx) > 0:
+                    distances = [d for d in distances]
+                    indices = [i for i in indices]
+                while len(update_idx) > Y.shape[0] // 10 and \
+                        search_knn < self.data_nu.shape[0] / 2:
+                    # increase the knn search
+                    search_knn = min(search_knn * 20, self.data_nu.shape[0])
+                    dist_new, ind_new = knn_tree.kneighbors(
+                        Y[update_idx], n_neighbors=search_knn)
+                    for i, idx in enumerate(update_idx):
+                        distances[idx] = dist_new[i]
+                        indices[idx] = ind_new[i]
+                    update_idx = [i for i, d in enumerate(distances) if np.max(d) <
+                                  (radius if isinstance(bandwidth, numbers.Number)
+                                   else radius[i])]
+                    _logger.debug("search_knn = {}; {} remaining".format(
+                        search_knn,
+                        len(update_idx)))
+                if search_knn > self.data_nu.shape[0] / 2:
+                    knn_tree = NearestNeighbors(
+                        search_knn, algorithm='brute',
+                        n_jobs=self.n_jobs).fit(self.data_nu)
+                if len(update_idx) > 0:
+                    _logger.debug(
+                        "radius search on {}".format(len(update_idx)))
+                    # give up - radius search
+                    dist_new, ind_new = knn_tree.radius_neighbors(
+                        Y[update_idx, :],
+                        radius=radius
+                        if isinstance(bandwidth, numbers.Number)
+                        else np.max(radius[update_idx]))
+                    for i, idx in enumerate(update_idx):
+                        distances[idx] = dist_new[i]
+                        indices[idx] = ind_new[i]
+                if isinstance(bandwidth, numbers.Number):
+                    data = np.concatenate(distances) / bandwidth
+                else:
+                    data = np.concatenate([distances[i] / bandwidth[i]
+                                           for i in range(len(distances))])
+
+                indices = np.concatenate(indices)
+                indptr = np.concatenate(
+                    [[0], np.cumsum([len(d) for d in distances])])
+                K = sparse.csr_matrix((data, indices, indptr),
+                                      shape=(Y.shape[0], self.data_nu.shape[0]))
+                K.data = np.exp(-1 * np.power(K.data, self.decay))
+                # handle nan
+                K.data = np.where(np.isnan(K.data), 1, K.data)
+                # TODO: should we zero values that are below thresh?
+                K.data[K.data < self.thresh] = 0
+                K = K.tocoo()
+                K.eliminate_zeros()
+                K = K.tocsr()
         return K
 
 
@@ -566,40 +566,36 @@ def build_landmark_op(self):
         probabilities between cluster centers by using transition probabilities
         between samples assigned to each cluster.
         """
-        tasklogger.log_start("landmark operator")
-        is_sparse = sparse.issparse(self.kernel)
-        # spectral clustering
-        tasklogger.log_start("SVD")
-        _, _, VT = randomized_svd(self.diff_aff,
-                                  n_components=self.n_svd,
-                                  random_state=self.random_state)
-        tasklogger.log_complete("SVD")
-        tasklogger.log_start("KMeans")
-        kmeans = MiniBatchKMeans(
-            self.n_landmark,
-            init_size=3 * self.n_landmark,
-            batch_size=10000,
-            random_state=self.random_state)
-        self._clusters = kmeans.fit_predict(
-            self.diff_op.dot(VT.T))
-        # some clusters are not assigned
-        tasklogger.log_complete("KMeans")
-
-        # transition matrices
-        pmn = self._landmarks_to_data()
-
-        # row normalize
-        pnm = pmn.transpose()
-        pmn = normalize(pmn, norm='l1', axis=1)
-        pnm = normalize(pnm, norm='l1', axis=1)
-        landmark_op = pmn.dot(pnm)  # sparsity agnostic matrix multiplication
-        if is_sparse:
-            # no need to have a sparse landmark operator
-            landmark_op = landmark_op.toarray()
-        # store output
-        self._landmark_op = landmark_op
-        self._transitions = pnm
-        tasklogger.log_complete("landmark operator")
+        with _logger.task("landmark operator"):
+            is_sparse = sparse.issparse(self.kernel)
+            # spectral clustering
+            with _logger.task("SVD"):
+                _, _, VT = randomized_svd(self.diff_aff,
+                                          n_components=self.n_svd,
+                                          random_state=self.random_state)
+            with _logger.task("KMeans"):
+                kmeans = MiniBatchKMeans(
+                    self.n_landmark,
+                    init_size=3 * self.n_landmark,
+                    batch_size=10000,
+                    random_state=self.random_state)
+                self._clusters = kmeans.fit_predict(
+                    self.diff_op.dot(VT.T))
+
+            # transition matrices
+            pmn = self._landmarks_to_data()
+
+            # row normalize
+            pnm = pmn.transpose()
+            pmn = normalize(pmn, norm='l1', axis=1)
+            pnm = normalize(pnm, norm='l1', axis=1)
+            landmark_op = pmn.dot(pnm)  # sparsity agnostic matrix multiplication
+            if is_sparse:
+                # no need to have a sparse landmark operator
+                landmark_op = landmark_op.toarray()
+            # store output
+            self._landmark_op = landmark_op
+            self._transitions = pnm
 
     def extend_to_data(self, data, **kwargs):
         """Build transition matrix from new data to the graph
@@ -873,46 +869,45 @@ def build_kernel(self):
                 K = K.tolil()
             K = utils.set_diagonal(K, 1)
         else:
-            tasklogger.log_start("affinities")
-            if sparse.issparse(self.data_nu):
-                self.data_nu = self.data_nu.toarray()
-            if self.precomputed == "distance":
-                pdx = self.data_nu
-            elif self.precomputed is None:
-                pdx = pdist(self.data_nu, metric=self.distance)
-                if np.any(pdx == 0):
-                    pdx = squareform(pdx)
-                    duplicate_ids = np.array(
-                        [i for i in np.argwhere(pdx == 0)
-                         if i[1] > i[0]])
-                    duplicate_names = ", ".join(["{} and {}".format(i[0], i[1])
-                                                 for i in duplicate_ids])
-                    warnings.warn(
-                        "Detected zero distance between samples {}. "
-                        "Consider removing duplicates to avoid errors in "
-                        "downstream processing.".format(duplicate_names),
-                        RuntimeWarning)
+            with _logger.task("affinities"):
+                if sparse.issparse(self.data_nu):
+                    self.data_nu = self.data_nu.toarray()
+                if self.precomputed == "distance":
+                    pdx = self.data_nu
+                elif self.precomputed is None:
+                    pdx = pdist(self.data_nu, metric=self.distance)
+                    if np.any(pdx == 0):
+                        pdx = squareform(pdx)
+                        duplicate_ids = np.array(
+                            [i for i in np.argwhere(pdx == 0)
+                             if i[1] > i[0]])
+                        duplicate_names = ", ".join(["{} and {}".format(i[0], i[1])
+                                                     for i in duplicate_ids])
+                        warnings.warn(
+                            "Detected zero distance between samples {}. "
+                            "Consider removing duplicates to avoid errors in "
+                            "downstream processing.".format(duplicate_names),
+                            RuntimeWarning)
+                    else:
+                        pdx = squareform(pdx)
                 else:
-                    pdx = squareform(pdx)
-            else:
-                raise ValueError(
-                    "precomputed='{}' not recognized. "
-                    "Choose from ['affinity', 'adjacency', 'distance', "
-                    "None]".format(self.precomputed))
-            if self.bandwidth is None:
-                knn_dist = np.partition(
-                    pdx, self.knn + 1, axis=1)[:, :self.knn + 1]
-                bandwidth = np.max(knn_dist, axis=1)
-            elif callable(self.bandwidth):
-                bandwidth = self.bandwidth(pdx)
-            else:
-                bandwidth = self.bandwidth
-            bandwidth = bandwidth * self.bandwidth_scale
-            pdx = (pdx.T / bandwidth).T
-            K = np.exp(-1 * np.power(pdx, self.decay))
-            # handle nan
-            K = np.where(np.isnan(K), 1, K)
-            tasklogger.log_complete("affinities")
+                    raise ValueError(
+                        "precomputed='{}' not recognized. "
+                        "Choose from ['affinity', 'adjacency', 'distance', "
+                        "None]".format(self.precomputed))
+                if self.bandwidth is None:
+                    knn_dist = np.partition(
+                        pdx, self.knn + 1, axis=1)[:, :self.knn + 1]
+                    bandwidth = np.max(knn_dist, axis=1)
+                elif callable(self.bandwidth):
+                    bandwidth = self.bandwidth(pdx)
+                else:
+                    bandwidth = self.bandwidth
+                bandwidth = bandwidth * self.bandwidth_scale
+                pdx = (pdx.T / bandwidth).T
+                K = np.exp(-1 * np.power(pdx, self.decay))
+                # handle nan
+                K = np.where(np.isnan(K), 1, K)
         # truncate
         if sparse.issparse(K):
             if not (isinstance(K, sparse.csr_matrix) or
@@ -966,21 +961,20 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None, bandwidth_scale=None
         if self.precomputed is not None:
             raise ValueError("Cannot extend kernel on precomputed graph")
         else:
-            tasklogger.log_start("affinities")
-            Y = self._check_extension_shape(Y)
-            pdx = cdist(Y, self.data_nu, metric=self.distance)
-            if bandwidth is None:
-                knn_dist = np.partition(pdx, knn, axis=1)[:, :knn]
-                bandwidth = np.max(knn_dist, axis=1)
-            elif callable(bandwidth):
-                bandwidth = bandwidth(pdx)
-            bandwidth = bandwidth_scale * bandwidth
-            pdx = (pdx.T / bandwidth).T
-            K = np.exp(-1 * pdx**self.decay)
-            # handle nan
-            K = np.where(np.isnan(K), 1, K)
-            K[K < self.thresh] = 0
-            tasklogger.log_complete("affinities")
+            with _logger.task("affinities"):
+                Y = self._check_extension_shape(Y)
+                pdx = cdist(Y, self.data_nu, metric=self.distance)
+                if bandwidth is None:
+                    knn_dist = np.partition(pdx, knn, axis=1)[:, :knn]
+                    bandwidth = np.max(knn_dist, axis=1)
+                elif callable(bandwidth):
+                    bandwidth = bandwidth(pdx)
+                bandwidth = bandwidth_scale * bandwidth
+                pdx = (pdx.T / bandwidth).T
+                K = np.exp(-1 * pdx**self.decay)
+                # handle nan
+                K = np.where(np.isnan(K), 1, K)
+                K[K < self.thresh] = 0
         return K
 
     @property
@@ -1003,7 +997,7 @@ def _check_shortest_path_distance(self, distance):
     def _default_shortest_path_distance(self):
         if self.precomputed is not None and not self.weighted:
             distance = 'constant'
-            tasklogger.log_info("Using constant distances.")
+            _logger.info("Using constant distances.")
         else:
             distance = super()._default_shortest_path_distance()
         return distance
@@ -1155,66 +1149,60 @@ def build_kernel(self):
             symmetric matrix with ones down the diagonal
             with no non-negative entries.
         """
-        tasklogger.log_start("subgraphs")
-        self.subgraphs = []
-        from .api import Graph
-        # iterate through sample ids
-        for i, idx in enumerate(self.samples):
-            tasklogger.log_debug("subgraph {}: sample {}, "
-                                 "n = {}, knn = {}".format(
-                                     i, idx, np.sum(self.sample_idx == idx),
-                                     self.knn))
-            # select data for sample
-            data = self.data_nu[self.sample_idx == idx]
-            # build a kNN graph for cells within sample
-            graph = Graph(data, n_pca=None,
-                          knn=self.knn,
-                          decay=self.decay,
-                          bandwidth=self.bandwidth,
-                          distance=self.distance,
-                          thresh=self.thresh,
-                          verbose=self.verbose,
-                          random_state=self.random_state,
-                          n_jobs=self.n_jobs,
-                          kernel_symm='+',
-                          initialize=True)
-            self.subgraphs.append(graph)  # append to list of subgraphs
-        tasklogger.log_complete("subgraphs")
-
-        tasklogger.log_start("MNN kernel")
-        if self.thresh > 0 or self.decay is None:
-            K = sparse.lil_matrix(
-                (self.data_nu.shape[0], self.data_nu.shape[0]))
-        else:
-            K = np.zeros([self.data_nu.shape[0], self.data_nu.shape[0]])
-        for i, X in enumerate(self.subgraphs):
-            K = utils.set_submatrix(
-                K, self.sample_idx == self.samples[i],
-                self.sample_idx == self.samples[i], X.K)
-            within_batch_norm = np.array(np.sum(X.K, 1)).flatten()
-            for j, Y in enumerate(self.subgraphs):
-                if i == j:
-                    continue
-                tasklogger.log_start(
-                    "kernel from sample {} to {}".format(self.samples[i],
-                                                         self.samples[j]))
-                Kij = Y.build_kernel_to_data(
-                    X.data_nu,
-                    knn=self.knn)
-                between_batch_norm = np.array(np.sum(Kij, 1)).flatten()
-                scale = np.minimum(1, within_batch_norm /
-                                   between_batch_norm) * self.beta
-                if sparse.issparse(Kij):
-                    Kij = Kij.multiply(scale[:, None])
-                else:
-                    Kij = Kij * scale[:, None]
+        with _logger.task("subgraphs"):
+            self.subgraphs = []
+            from .api import Graph
+            # iterate through sample ids
+            for i, idx in enumerate(self.samples):
+                _logger.debug("subgraph {}: sample {}, "
+                                     "n = {}, knn = {}".format(
+                                         i, idx, np.sum(self.sample_idx == idx),
+                                         self.knn))
+                # select data for sample
+                data = self.data_nu[self.sample_idx == idx]
+                # build a kNN graph for cells within sample
+                graph = Graph(data, n_pca=None,
+                              knn=self.knn,
+                              decay=self.decay,
+                              bandwidth=self.bandwidth,
+                              distance=self.distance,
+                              thresh=self.thresh,
+                              verbose=self.verbose,
+                              random_state=self.random_state,
+                              n_jobs=self.n_jobs,
+                              kernel_symm='+',
+                              initialize=True)
+                self.subgraphs.append(graph)  # append to list of subgraphs
+
+        with _logger.task("MNN kernel"):
+            if self.thresh > 0 or self.decay is None:
+                K = sparse.lil_matrix(
+                    (self.data_nu.shape[0], self.data_nu.shape[0]))
+            else:
+                K = np.zeros([self.data_nu.shape[0], self.data_nu.shape[0]])
+            for i, X in enumerate(self.subgraphs):
                 K = utils.set_submatrix(
                     K, self.sample_idx == self.samples[i],
-                    self.sample_idx == self.samples[j], Kij)
-                tasklogger.log_complete(
-                    "kernel from sample {} to {}".format(self.samples[i],
-                                                         self.samples[j]))
-        tasklogger.log_complete("MNN kernel")
+                    self.sample_idx == self.samples[i], X.K)
+                within_batch_norm = np.array(np.sum(X.K, 1)).flatten()
+                for j, Y in enumerate(self.subgraphs):
+                    if i == j:
+                        continue
+                    with _logger.task(
+                        "kernel from sample {} to {}".format(self.samples[i], self.samples[j])):
+                        Kij = Y.build_kernel_to_data(
+                            X.data_nu,
+                            knn=self.knn)
+                        between_batch_norm = np.array(np.sum(Kij, 1)).flatten()
+                        scale = np.minimum(1, within_batch_norm /
+                                           between_batch_norm) * self.beta
+                        if sparse.issparse(Kij):
+                            Kij = Kij.multiply(scale[:, None])
+                        else:
+                            Kij = Kij * scale[:, None]
+                        K = utils.set_submatrix(
+                            K, self.sample_idx == self.samples[i],
+                            self.sample_idx == self.samples[j], Kij)
         return K
 
     def build_kernel_to_data(self, Y, theta=None):
diff --git a/requirements.txt b/requirements.txt
index f584815..08fcd1b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,4 @@ scipy>=1.1.0
 pygsp>=>=0.5.1
 scikit-learn>=0.20.0
 future
-tasklogger>=0.4.0
+tasklogger>=1.0
diff --git a/setup.py b/setup.py
index 0d21022..cf358d5 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
     'pygsp>=0.5.1',
     'scikit-learn>=0.20.0',
     'future',
-    'tasklogger>=0.4.0',
+    'tasklogger>=1.0',
 ]
 
 test_requires = [

From 209076a03dbbda3a8730851ba67f9f38ce4ebf66 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Tue, 15 Oct 2019 11:57:29 -0400
Subject: [PATCH 2/2] don't coerce to dense on to_igraph

---
 graphtools/base.py    | 13 +++++++++----
 graphtools/version.py |  2 +-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/graphtools/base.py b/graphtools/base.py
index c7c72d6..a960d02 100644
--- a/graphtools/base.py
+++ b/graphtools/base.py
@@ -730,12 +730,12 @@ def to_pygsp(self, **kwargs):
     def to_igraph(self, attribute="weight", **kwargs):
         """Convert to an igraph Graph
 
-        Uses the igraph.Graph.Weighted_Adjacency constructor
+        Uses the igraph.Graph constructor
 
         Parameters
         ----------
         attribute : str, optional (default: "weight")
-        kwargs : additional arguments for igraph.Graph.Weighted_Adjacency
+        kwargs : additional arguments for igraph.Graph
         """
         try:
             import igraph as ig
@@ -748,8 +748,13 @@ def to_igraph(self, attribute="weight", **kwargs):
             # not a pygsp graph
             W = self.K.copy()
             W = utils.set_diagonal(W, 0)
-        return ig.Graph.Weighted_Adjacency(utils.to_array(W).tolist(),
-                                           attr=attribute, **kwargs)
+        sources, targets = W.nonzero()
+        edgelist = list(zip(sources, targets))
+        g = ig.Graph(W.shape[0], edgelist, **kwargs)
+        weights = W[W.nonzero()]
+        weights = utils.to_array(weights)
+        g.es[attribute] = weights.flatten().tolist()
+        return g
 
     def to_pickle(self, path):
         """Save the current Graph to a pickle.
diff --git a/graphtools/version.py b/graphtools/version.py
index 67bc602..9c73af2 100644
--- a/graphtools/version.py
+++ b/graphtools/version.py
@@ -1 +1 @@
-__version__ = "1.3.0"
+__version__ = "1.3.1"