From c1e84f48af2fbfac27f3a808b6d42d9112c03d7e Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 3 Sep 2018 12:46:39 -0400
Subject: [PATCH 01/26] bump tasklogger version

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index c19f67d..6fed4f1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,4 @@ scipy>=1.1.0
 pygsp>=>=0.5.1
 scikit-learn>=0.19.1
 future
-tasklogger>=0.2
+tasklogger>=0.2.1
diff --git a/setup.py b/setup.py
index fca28af..1394bf8 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
     'pygsp>=0.5.1',
     'scikit-learn>=0.19.1',
     'future',
-    'tasklogger>=0.2',
+    'tasklogger>=0.2.1',
 ]
 
 test_requires = [

From 74897db2e798741a7b1489d6f5ff55a5e160b9ba Mon Sep 17 00:00:00 2001
From: Daniel Burkhardt <burkhardt.d.b@gmail.com>
Date: Thu, 6 Sep 2018 10:31:51 -0400
Subject: [PATCH 02/26] gamma -> theta

---
 graphtools/api.py     | 10 +++---
 graphtools/base.py    | 67 ++++++++++++++++++------------------
 graphtools/graphs.py  | 80 +++++++++++++++++++++----------------------
 test/test_exact.py    |  2 +-
 test/test_knn.py      |  6 ++--
 test/test_landmark.py |  2 +-
 test/test_mnn.py      | 58 +++++++++++++++----------------
 unittest.cfg          |  2 +-
 8 files changed, 113 insertions(+), 114 deletions(-)

diff --git a/graphtools/api.py b/graphtools/api.py
index ede4f39..ee2a55f 100644
--- a/graphtools/api.py
+++ b/graphtools/api.py
@@ -16,7 +16,7 @@ def Graph(data,
           distance='euclidean',
           thresh=1e-4,
           kernel_symm='+',
-          gamma=None,
+          theta=None,
           n_landmark=None,
           n_svd=100,
           beta=1,
@@ -75,12 +75,12 @@ def Graph(data,
         Defines method of MNN symmetrization.
         '+'  : additive
         '*'  : multiplicative
-        'gamma' : min-max
+        'theta' : min-max
         'none' : no symmetrization
 
-    gamma: float (default: None)
-        Min-max symmetrization constant or matrix. Only used if kernel_symm='gamma'.
-        K = `gamma * min(K, K.T) + (1 - gamma) * max(K, K.T)`
+    theta: float (default: None)
+        Min-max symmetrization constant or matrix. Only used if kernel_symm='theta'.
+        K = `theta * min(K, K.T) + (1 - theta) * max(K, K.T)`
 
     precomputed : {'distance', 'affinity', 'adjacency', `None`}, optional (default: `None`)
         If the graph is precomputed, this variable denotes which graph
diff --git a/graphtools/base.py b/graphtools/base.py
index edd678a..53e8fb1 100644
--- a/graphtools/base.py
+++ b/graphtools/base.py
@@ -27,7 +27,6 @@
                     elementwise_maximum,
                     set_diagonal)
 
-
 class Base(object):
     """Class that deals with key-word arguments but is otherwise
     just an object.
@@ -311,12 +310,12 @@ class BaseGraph(with_metaclass(abc.ABCMeta, Base)):
         Defines method of MNN symmetrization.
         '+'  : additive
         '*'  : multiplicative
-        'gamma' : min-max
+        'theta' : min-max
         'none' : no symmetrization
 
-    gamma: float (default: 0.5)
+    theta: float (default: 0.5)
         Min-max symmetrization constant.
-        K = `gamma * min(K, K.T) + (1 - gamma) * max(K, K.T)`
+        K = `theta * min(K, K.T) + (1 - theta) * max(K, K.T)`
 
     initialize : `bool`, optional (default : `True`)
         if false, don't create the kernel matrix.
@@ -337,11 +336,11 @@ class BaseGraph(with_metaclass(abc.ABCMeta, Base)):
     """
 
     def __init__(self, kernel_symm='+',
-                 gamma=None,
+                 theta=None,
                  initialize=True, **kwargs):
         self.kernel_symm = kernel_symm
-        self.gamma = gamma
-        self._check_symmetrization(kernel_symm, gamma)
+        self.theta = theta
+        self._check_symmetrization(kernel_symm, theta)
 
         if initialize:
             tasklogger.log_debug("Initializing kernel...")
@@ -350,25 +349,25 @@ def __init__(self, kernel_symm='+',
             tasklogger.log_debug("Not initializing kernel.")
         super().__init__(**kwargs)
 
-    def _check_symmetrization(self, kernel_symm, gamma):
-        if kernel_symm not in ['+', '*', 'gamma', None]:
+    def _check_symmetrization(self, kernel_symm, theta):
+        if kernel_symm not in ['+', '*', 'theta', None]:
             raise ValueError(
                 "kernel_symm '{}' not recognized. Choose from "
-                "'+', '*', 'gamma', or 'none'.".format(kernel_symm))
-        elif kernel_symm != 'gamma' and gamma is not None:
-            warnings.warn("kernel_symm='{}' but gamma is not None. "
-                          "Setting kernel_symm='gamma'.".format(kernel_symm))
-            self.kernel_symm = kernel_symm = 'gamma'
-
-        if kernel_symm == 'gamma':
-            if gamma is None:
-                warnings.warn("kernel_symm='gamma' but gamma not given. "
-                              "Defaulting to gamma=0.5.")
-                self.gamma = gamma = 0.5
-            elif not isinstance(gamma, numbers.Number) or \
-                    gamma < 0 or gamma > 1:
-                raise ValueError("gamma {} not recognized. Expected "
-                                 "a float between 0 and 1".format(gamma))
+                "'+', '*', 'theta', or 'none'.".format(kernel_symm))
+        elif kernel_symm != 'theta' and theta is not None:
+            warnings.warn("kernel_symm='{}' but theta is not None. "
+                          "Setting kernel_symm='theta'.".format(kernel_symm))
+            self.kernel_symm = kernel_symm = 'theta'
+
+        if kernel_symm == 'theta':
+            if theta is None:
+                warnings.warn("kernel_symm='theta' but theta not given. "
+                              "Defaulting to theta=0.5.")
+                self.theta = theta = 0.5
+            elif not isinstance(theta, numbers.Number) or \
+                    theta < 0 or theta > 1:
+                raise ValueError("theta {} not recognized. Expected "
+                                 "a float between 0 and 1".format(theta))
 
     def _build_kernel(self):
         """Private method to build kernel matrix
@@ -400,26 +399,26 @@ def symmetrize_kernel(self, K):
         elif self.kernel_symm == "*":
             tasklogger.log_debug("Using multiplication symmetrization.")
             K = K.multiply(K.T)
-        elif self.kernel_symm == 'gamma':
+        elif self.kernel_symm == 'theta':
             tasklogger.log_debug(
-                "Using gamma symmetrization (gamma = {}).".format(self.gamma))
-            K = self.gamma * elementwise_minimum(K, K.T) + \
-                (1 - self.gamma) * elementwise_maximum(K, K.T)
+                "Using theta symmetrization (theta = {}).".format(self.theta))
+            K = self.theta * elementwise_minimum(K, K.T) + \
+                (1 - self.theta) * elementwise_maximum(K, K.T)
         elif self.kernel_symm is None:
             tasklogger.log_debug("Using no symmetrization.")
             pass
         else:
             # this should never happen
             raise ValueError(
-                "Expected kernel_symm in ['+', '*', 'gamma' or None]. "
-                "Got {}".format(self.gamma))
+                "Expected kernel_symm in ['+', '*', 'theta' or None]. "
+                "Got {}".format(self.theta))
         return K
 
     def get_params(self):
         """Get parameters from this object
         """
         return {'kernel_symm': self.kernel_symm,
-                'gamma': self.gamma}
+                'theta': self.theta}
 
     def set_params(self, **params):
         """Set parameters on this object
@@ -429,7 +428,7 @@ def set_params(self, **params):
         Valid parameters:
         Invalid parameters: (these would require modifying the kernel matrix)
         - kernel_symm
-        - gamma
+        - theta
 
         Parameters
         ----------
@@ -439,8 +438,8 @@ def set_params(self, **params):
         -------
         self
         """
-        if 'gamma' in params and params['gamma'] != self.gamma:
-            raise ValueError("Cannot update gamma. Please create a new graph")
+        if 'theta' in params and params['theta'] != self.theta:
+            raise ValueError("Cannot update theta. Please create a new graph")
         if 'kernel_symm' in params and \
                 params['kernel_symm'] != self.kernel_symm:
             raise ValueError(
diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index ce8f9ae..65699fd 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -886,33 +886,33 @@ def __init__(self, data, sample_idx,
 
         super().__init__(data, n_pca=n_pca, **kwargs)
 
-    def _check_symmetrization(self, kernel_symm, gamma):
-        if kernel_symm == 'gamma' and gamma is not None and \
-                not isinstance(gamma, numbers.Number):
-            # matrix gamma
+    def _check_symmetrization(self, kernel_symm, theta):
+        if kernel_symm == 'theta' and theta is not None and \
+                not isinstance(theta, numbers.Number):
+            # matrix theta
             try:
-                gamma.shape
+                theta.shape
             except AttributeError:
-                raise ValueError("gamma {} not recognized. "
+                raise ValueError("theta {} not recognized. "
                                  "Expected a float between 0 and 1 "
                                  "or a [n_batch,n_batch] matrix of "
-                                 "floats between 0 and 1".format(gamma))
-            if not np.shape(gamma) == (len(self.samples),
+                                 "floats between 0 and 1".format(theta))
+            if not np.shape(theta) == (len(self.samples),
                                        len(self.samples)):
                 raise ValueError(
-                    "Matrix gamma must be of shape "
+                    "Matrix theta must be of shape "
                     "({}), got ({})".format(
                         (len(self.samples),
-                         len(self.samples)), gamma.shape))
-            elif np.max(gamma) > 1 or np.min(gamma) < 0:
+                         len(self.samples)), theta.shape))
+            elif np.max(theta) > 1 or np.min(theta) < 0:
                 raise ValueError(
-                    "Values in matrix gamma must be between"
+                    "Values in matrix theta must be between"
                     " 0 and 1, got values between {} and {}".format(
-                        np.max(gamma), np.min(gamma)))
-            elif np.any(gamma != gamma.T):
-                raise ValueError("gamma must be a symmetric matrix")
+                        np.max(theta), np.min(theta)))
+            elif np.any(theta != theta.T):
+                raise ValueError("theta must be a symmetric matrix")
         else:
-            super()._check_symmetrization(kernel_symm, gamma)
+            super()._check_symmetrization(kernel_symm, theta)
 
     def _weight_knn(self, sample_size=None):
         """Select adaptive values of knn
@@ -1070,14 +1070,14 @@ def build_kernel(self):
         return K
 
     def symmetrize_kernel(self, K):
-        if self.kernel_symm == 'gamma' and self.gamma is not None and \
-                not isinstance(self.gamma, numbers.Number):
-            # matrix gamma
+        if self.kernel_symm == 'theta' and self.theta is not None and \
+                not isinstance(self.theta, numbers.Number):
+            # matrix theta
             # Gamma can be a matrix with specific values transitions for
             # each batch. This allows for technical replicates and
             # experimental samples to be corrected simultaneously
-            tasklogger.log_debug("Using gamma symmetrization. "
-                                 "Gamma:\n{}".format(self.gamma))
+            tasklogger.log_debug("Using theta symmetrization. "
+                                 "Gamma:\n{}".format(self.theta))
             for i, sample_i in enumerate(self.samples):
                 for j, sample_j in enumerate(self.samples):
                     if j < i:
@@ -1086,9 +1086,9 @@ def symmetrize_kernel(self, K):
                                    self.sample_idx == sample_j)]
                     Kji = K[np.ix_(self.sample_idx == sample_j,
                                    self.sample_idx == sample_i)]
-                    Kij_symm = self.gamma[i, j] * \
+                    Kij_symm = self.theta[i, j] * \
                         elementwise_minimum(Kij, Kji.T) + \
-                        (1 - self.gamma[i, j]) * \
+                        (1 - self.theta[i, j]) * \
                         elementwise_maximum(Kij, Kji.T)
                     K = set_submatrix(K, self.sample_idx == sample_i,
                                       self.sample_idx == sample_j, Kij_symm)
@@ -1100,7 +1100,7 @@ def symmetrize_kernel(self, K):
             K = super().symmetrize_kernel(K)
         return K
 
-    def build_kernel_to_data(self, Y, gamma=None):
+    def build_kernel_to_data(self, Y, theta=None):
         """Build transition matrix from new data to the graph
 
         Creates a transition matrix such that `Y` can be approximated by
@@ -1120,8 +1120,8 @@ def build_kernel_to_data(self, Y, gamma=None):
             to the existing data. `n_features` must match
             either the ambient or PCA dimensions
 
-        gamma : array-like or `None`, optional (default: `None`)
-            if `self.gamma` is a matrix, gamma values must be explicitly
+        theta : array-like or `None`, optional (default: `None`)
+            if `self.theta` is a matrix, theta values must be explicitly
             specified between `Y` and each sample in `self.data`
 
         Returns
@@ -1131,15 +1131,15 @@ def build_kernel_to_data(self, Y, gamma=None):
             Transition matrix from `Y` to `self.data`
         """
         raise NotImplementedError
-        tasklogger.log_warning("building MNN kernel to gamma is experimental")
-        if not isinstance(self.gamma, str) and \
-                not isinstance(self.gamma, numbers.Number):
-            if gamma is None:
+        tasklogger.log_warning("building MNN kernel to theta is experimental")
+        if not isinstance(self.theta, str) and \
+                not isinstance(self.theta, numbers.Number):
+            if theta is None:
                 raise ValueError(
-                    "self.gamma is a matrix but gamma is not provided.")
-            elif len(gamma) != len(self.samples):
+                    "self.theta is a matrix but theta is not provided.")
+            elif len(theta) != len(self.samples):
                 raise ValueError(
-                    "gamma should have one value for every sample")
+                    "theta should have one value for every sample")
 
         Y = self._check_extension_shape(Y)
         kernel_xy = []
@@ -1156,26 +1156,26 @@ def build_kernel_to_data(self, Y, gamma=None):
         kernel_yx = sparse.vstack(kernel_yx)  # n_cells_x x n_cells_y
 
         # symmetrize
-        if gamma is not None:
+        if theta is not None:
             # Gamma can be a vector with specific values transitions for
             # each batch. This allows for technical replicates and
             # experimental samples to be corrected simultaneously
             K = np.empty_like(kernel_xy)
             for i, sample in enumerate(self.samples):
                 sample_idx = self.sample_idx == sample
-                K[:, sample_idx] = gamma[i] * \
+                K[:, sample_idx] = theta[i] * \
                     kernel_xy[:, sample_idx].minimum(
                         kernel_yx[sample_idx, :].T) + \
-                    (1 - gamma[i]) * \
+                    (1 - theta[i]) * \
                     kernel_xy[:, sample_idx].maximum(
                         kernel_yx[sample_idx, :].T)
-        if self.gamma == "+":
+        if self.theta == "+":
             K = (kernel_xy + kernel_yx.T) / 2
-        elif self.gamma == "*":
+        elif self.theta == "*":
             K = kernel_xy.multiply(kernel_yx.T)
         else:
-            K = self.gamma * kernel_xy.minimum(kernel_yx.T) + \
-                (1 - self.gamma) * kernel_xy.maximum(kernel_yx.T)
+            K = self.theta * kernel_xy.minimum(kernel_yx.T) + \
+                (1 - self.theta) * kernel_xy.maximum(kernel_yx.T)
         return K
 
 
diff --git a/test/test_exact.py b/test/test_exact.py
index 542e625..c5ef987 100644
--- a/test/test_exact.py
+++ b/test/test_exact.py
@@ -348,7 +348,7 @@ def test_set_params():
     assert G.get_params() == {'n_pca': 20,
                               'random_state': 42,
                               'kernel_symm': '+',
-                              'gamma': None,
+                              'theta': None,
                               'knn': 3,
                               'decay': 10,
                               'distance': 'euclidean',
diff --git a/test/test_knn.py b/test/test_knn.py
index b8682c2..e274308 100644
--- a/test/test_knn.py
+++ b/test/test_knn.py
@@ -184,7 +184,7 @@ def test_set_params():
         'n_pca': 20,
         'random_state': 42,
         'kernel_symm': '+',
-        'gamma': None,
+        'theta': None,
         'knn': 3,
         'decay': None,
         'distance': 'euclidean',
@@ -204,11 +204,11 @@ def test_set_params():
     assert_raises(ValueError, G.set_params, decay=10)
     assert_raises(ValueError, G.set_params, distance='manhattan')
     assert_raises(ValueError, G.set_params, thresh=1e-3)
-    assert_raises(ValueError, G.set_params, gamma=0.99)
+    assert_raises(ValueError, G.set_params, theta=0.99)
     assert_raises(ValueError, G.set_params, kernel_symm='*')
     G.set_params(knn=G.knn,
                  decay=G.decay,
                  thresh=G.thresh,
                  distance=G.distance,
-                 gamma=G.gamma,
+                 theta=G.theta,
                  kernel_symm=G.kernel_symm)
diff --git a/test/test_landmark.py b/test/test_landmark.py
index 42d9025..0c09a3e 100644
--- a/test/test_landmark.py
+++ b/test/test_landmark.py
@@ -135,7 +135,7 @@ def test_set_params():
     assert G.get_params() == {'n_pca': 20,
                               'random_state': 42,
                               'kernel_symm': '+',
-                              'gamma': None,
+                              'theta': None,
                               'n_landmark': 500,
                               'knn': 3,
                               'decay': None,
diff --git a/test/test_mnn.py b/test/test_mnn.py
index 330a4e4..529dffa 100644
--- a/test/test_mnn.py
+++ b/test/test_mnn.py
@@ -49,38 +49,38 @@ def test_build_mnn_with_precomputed():
 
 
 @raises(ValueError)
-def test_mnn_with_square_gamma_wrong_length():
+def test_mnn_with_square_theta_wrong_length():
     n_sample = len(np.unique(digits['target']))
-    # square matrix gamma of the wrong size
+    # square matrix theta of the wrong size
     build_graph(
         data, thresh=0, n_pca=20,
         decay=10, knn=5, random_state=42,
         sample_idx=digits['target'],
-        kernel_symm='gamma',
-        gamma=np.tile(np.linspace(0, 1, n_sample - 1),
+        kernel_symm='theta',
+        theta=np.tile(np.linspace(0, 1, n_sample - 1),
                       n_sample).reshape(n_sample - 1, n_sample))
 
 
 @raises(ValueError)
-def test_mnn_with_vector_gamma():
+def test_mnn_with_vector_theta():
     n_sample = len(np.unique(digits['target']))
-    # vector gamma
+    # vector theta
     build_graph(
         data, thresh=0, n_pca=20,
         decay=10, knn=5, random_state=42,
         sample_idx=digits['target'],
-        kernel_symm='gamma',
-        gamma=np.linspace(0, 1, n_sample - 1))
+        kernel_symm='theta',
+        theta=np.linspace(0, 1, n_sample - 1))
 
 
 def test_mnn_with_non_zero_indexed_sample_idx():
     X, sample_idx = generate_swiss_roll()
     G = build_graph(X, sample_idx=sample_idx,
-                    kernel_symm='gamma', gamma=0.5,
+                    kernel_symm='theta', theta=0.5,
                     n_pca=None, use_pygsp=True)
     sample_idx += 1
     G2 = build_graph(X, sample_idx=sample_idx,
-                     kernel_symm='gamma', gamma=0.5,
+                     kernel_symm='theta', theta=0.5,
                      n_pca=None, use_pygsp=True)
     assert G.N == G2.N
     assert np.all(G.d == G2.d)
@@ -92,11 +92,11 @@ def test_mnn_with_non_zero_indexed_sample_idx():
 def test_mnn_with_string_sample_idx():
     X, sample_idx = generate_swiss_roll()
     G = build_graph(X, sample_idx=sample_idx,
-                    kernel_symm='gamma', gamma=0.5,
+                    kernel_symm='theta', theta=0.5,
                     n_pca=None, use_pygsp=True)
     sample_idx = np.where(sample_idx == 0, 'a', 'b')
     G2 = build_graph(X, sample_idx=sample_idx,
-                     kernel_symm='gamma', gamma=0.5,
+                     kernel_symm='theta', theta=0.5,
                      n_pca=None, use_pygsp=True)
     assert G.N == G2.N
     assert np.all(G.d == G2.d)
@@ -110,9 +110,9 @@ def test_mnn_with_string_sample_idx():
 #####################################################
 
 
-def test_mnn_graph_float_gamma():
+def test_mnn_graph_float_theta():
     X, sample_idx = generate_swiss_roll()
-    gamma = 0.9
+    theta = 0.9
     k = 10
     a = 20
     metric = 'euclidean'
@@ -139,12 +139,12 @@ def test_mnn_graph_float_gamma():
                 # fill out values in K for NN on diagonal
                 K.iloc[sample_idx == si, sample_idx == sj] = k_ij
 
-    W = np.array((gamma * np.minimum(K, K.T)) +
-                 ((1 - gamma) * np.maximum(K, K.T)))
+    W = np.array((theta * np.minimum(K, K.T)) +
+                 ((1 - theta) * np.maximum(K, K.T)))
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
     G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=1 - beta,
-                          kernel_symm='gamma', gamma=gamma,
+                          kernel_symm='theta', theta=theta,
                           distance=metric, sample_idx=sample_idx, thresh=0,
                           use_pygsp=True)
     assert G.N == G2.N
@@ -154,10 +154,10 @@ def test_mnn_graph_float_gamma():
     assert isinstance(G2, graphtools.graphs.MNNGraph)
 
 
-def test_mnn_graph_matrix_gamma():
+def test_mnn_graph_matrix_theta():
     X, sample_idx = generate_swiss_roll()
     bs = 0.8
-    gamma = np.array([[1, bs],  # 0
+    theta = np.array([[1, bs],  # 0
                       [bs,  1]])  # 3
     k = 10
     a = 20
@@ -187,18 +187,18 @@ def test_mnn_graph_matrix_gamma():
 
     K = np.array(K)
 
-    matrix_gamma = pd.DataFrame(np.zeros((len(sample_idx), len(sample_idx))))
+    matrix_theta = pd.DataFrame(np.zeros((len(sample_idx), len(sample_idx))))
     for ix, si in enumerate(set(sample_idx)):
         for jx, sj in enumerate(set(sample_idx)):
-            matrix_gamma.iloc[sample_idx == si,
-                              sample_idx == sj] = gamma[ix, jx]
+            matrix_theta.iloc[sample_idx == si,
+                              sample_idx == sj] = theta[ix, jx]
 
-    W = np.array((matrix_gamma * np.minimum(K, K.T)) +
-                 ((1 - matrix_gamma) * np.maximum(K, K.T)))
+    W = np.array((matrix_theta * np.minimum(K, K.T)) +
+                 ((1 - matrix_theta) * np.maximum(K, K.T)))
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
     G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=1 - beta,
-                          kernel_symm='gamma', gamma=gamma,
+                          kernel_symm='theta', theta=theta,
                           distance=metric, sample_idx=sample_idx, thresh=0,
                           use_pygsp=True)
     assert G.N == G2.N
@@ -220,21 +220,21 @@ def test_verbose():
     print()
     print("Verbose test: MNN")
     build_graph(X, sample_idx=sample_idx,
-                kernel_symm='gamma', gamma=0.5,
+                kernel_symm='theta', theta=0.5,
                 n_pca=None, verbose=True)
 
 
 def test_set_params():
     X, sample_idx = generate_swiss_roll()
     G = build_graph(X, sample_idx=sample_idx,
-                    kernel_symm='gamma', gamma=0.5,
+                    kernel_symm='theta', theta=0.5,
                     n_pca=None,
                     thresh=1e-4)
     assert G.get_params() == {
         'n_pca': None,
         'random_state': 42,
-        'kernel_symm': 'gamma',
-        'gamma': 0.5,
+        'kernel_symm': 'theta',
+        'theta': 0.5,
         'beta': 1,
         'adaptive_k': 'sqrt',
         'knn': 3,
diff --git a/unittest.cfg b/unittest.cfg
index 0f1a4ec..85c81ba 100644
--- a/unittest.cfg
+++ b/unittest.cfg
@@ -3,4 +3,4 @@ verbose = True
 
 [coverage]
 always-on = True
-coverage = graphtools
\ No newline at end of file
+coverage = graphtools

From 049e4a82b37e2f0338d37146f5a6084b4583c060 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 22:24:50 -0400
Subject: [PATCH 03/26] implement numeric and vector fixed bandwidth

---
 graphtools/api.py    |  1 +
 graphtools/graphs.py | 82 +++++++++++++++++++++++++++++++++++---------
 test/test_exact.py   | 43 +++++++++++++++++++++++
 test/test_knn.py     | 43 +++++++++++++++++++++++
 4 files changed, 152 insertions(+), 17 deletions(-)

diff --git a/graphtools/api.py b/graphtools/api.py
index ee2a55f..9a9af33 100644
--- a/graphtools/api.py
+++ b/graphtools/api.py
@@ -13,6 +13,7 @@ def Graph(data,
           precomputed=None,
           knn=5,
           decay=10,
+          bandwidth=None,
           distance='euclidean',
           thresh=1e-4,
           kernel_symm='+',
diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index 65699fd..8b3c95c 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -35,6 +35,12 @@ class kNNGraph(DataGraph):
     decay : `int` or `None`, optional (default: `None`)
         Rate of alpha decay to use. If `None`, alpha decay is not used.
 
+    bandwidth : `float`, list-like or `None`, optional (default: `None`)
+        Fixed bandwidth to use. If given, overrides `knn`. Can be a single
+        bandwidth or a list-like (shape=[n_samples]) or bandwidths for each
+        sample.
+        TODO: implement `callable` bandwidth
+
     distance : `str`, optional (default: `'euclidean'`)
         Any metric from `scipy.spatial.distance` can be used
         distance metric for building kNN graph.
@@ -55,10 +61,11 @@ class kNNGraph(DataGraph):
     """
 
     def __init__(self, data, knn=5, decay=None,
-                 distance='euclidean',
+                 bandwidth=None, distance='euclidean',
                  thresh=1e-4, n_pca=None, **kwargs):
         self.knn = knn
         self.decay = decay
+        self.bandwidth = bandwidth
         self.distance = distance
         self.thresh = thresh
 
@@ -82,6 +89,7 @@ def get_params(self):
         params = super().get_params()
         params.update({'knn': self.knn,
                        'decay': self.decay,
+                       'bandwidth': self.bandwidth,
                        'distance': self.distance,
                        'thresh': self.thresh,
                        'n_jobs': self.n_jobs,
@@ -101,6 +109,7 @@ def set_params(self, **params):
         Invalid parameters: (these would require modifying the kernel matrix)
         - knn
         - decay
+        - bandwidth
         - distance
         - thresh
 
@@ -116,6 +125,9 @@ def set_params(self, **params):
             raise ValueError("Cannot update knn. Please create a new graph")
         if 'decay' in params and params['decay'] != self.decay:
             raise ValueError("Cannot update decay. Please create a new graph")
+        if 'bandwidth' in params and params['bandwidth'] != self.bandwidth:
+            raise ValueError(
+                "Cannot update bandwidth. Please create a new graph")
         if 'distance' in params and params['distance'] != self.distance:
             raise ValueError("Cannot update distance. "
                              "Please create a new graph")
@@ -184,7 +196,7 @@ def build_kernel(self):
         K = self.build_kernel_to_data(self.data_nu)
         return K
 
-    def build_kernel_to_data(self, Y, knn=None):
+    def build_kernel_to_data(self, Y, knn=None, bandwidth=None):
         """Build a kernel from new input data `Y` to the `self.data`
 
         Parameters
@@ -198,6 +210,9 @@ def build_kernel_to_data(self, Y, knn=None):
         knn : `int` or `None`, optional (default: `None`)
             If `None`, defaults to `self.knn`
 
+        bandwidth : `int` or `None`, optional (default: `None`)
+            If `None`, defaults to `self.bandwidth`
+
         Returns
         -------
 
@@ -212,6 +227,8 @@ def build_kernel_to_data(self, Y, knn=None):
         """
         if knn is None:
             knn = self.knn
+        if bandwidth is None:
+            bandwidth = self.bandwidth
         if knn > self.data.shape[0]:
             warnings.warn("Cannot set knn ({k}) to be greater than "
                           "data.shape[0] ({n}). Setting knn={n}".format(
@@ -247,7 +264,8 @@ def build_kernel_to_data(self, Y, knn=None):
                     RuntimeWarning)
             tasklogger.log_complete("KNN search")
             tasklogger.log_start("affinities")
-            bandwidth = distances[:, knn - 1]
+            if bandwidth is None:
+                bandwidth = distances[:, knn - 1]
             radius = bandwidth * np.power(-1 * np.log(self.thresh),
                                           1 / self.decay)
             update_idx = np.argwhere(
@@ -266,8 +284,9 @@ def build_kernel_to_data(self, Y, knn=None):
                 for i, idx in enumerate(update_idx):
                     distances[idx] = dist_new[i]
                     indices[idx] = ind_new[i]
-                update_idx = [i for i, d in enumerate(distances)
-                              if np.max(d) < radius[i]]
+                update_idx = [i for i, d in enumerate(distances) if np.max(d) <
+                              (radius if isinstance(bandwidth, numbers.Number)
+                               else radius[i])]
                 tasklogger.log_debug("search_knn = {}; {} remaining".format(
                     search_knn,
                     len(update_idx)))
@@ -281,12 +300,18 @@ def build_kernel_to_data(self, Y, knn=None):
                 # give up - radius search
                 dist_new, ind_new = knn_tree.radius_neighbors(
                     Y[update_idx, :],
-                    radius=np.max(radius[update_idx]))
+                    radius=radius
+                    if isinstance(bandwidth, numbers.Number)
+                    else np.max(radius[update_idx]))
                 for i, idx in enumerate(update_idx):
                     distances[idx] = dist_new[i]
                     indices[idx] = ind_new[i]
-            data = np.concatenate([distances[i] / bandwidth[i]
-                                   for i in range(len(distances))])
+            if isinstance(bandwidth, numbers.Number):
+                data = np.concatenate(distances) / bandwidth
+            else:
+                data = np.concatenate([distances[i] / bandwidth[i]
+                                       for i in range(len(distances))])
+
             indices = np.concatenate(indices)
             indptr = np.concatenate(
                 [[0], np.cumsum([len(d) for d in distances])])
@@ -590,6 +615,12 @@ class TraditionalGraph(DataGraph):
     decay : `int` or `None`, optional (default: `None`)
         Rate of alpha decay to use. If `None`, alpha decay is not used.
 
+    bandwidth : `float`, list-like or `None`, optional (default: `None`)
+        Fixed bandwidth to use. If given, overrides `knn`. Can be a single
+        bandwidth or a list-like (shape=[n_samples]) or bandwidths for each
+        sample.
+        TODO: implement `callable` bandwidth
+
     distance : `str`, optional (default: `'euclidean'`)
         Any metric from `scipy.spatial.distance` can be used
         distance metric for building kNN graph.
@@ -613,8 +644,11 @@ class TraditionalGraph(DataGraph):
         Only one of `precomputed` and `n_pca` can be set.
     """
 
-    def __init__(self, data, knn=5, decay=10,
-                 distance='euclidean', n_pca=None,
+    def __init__(self, data,
+                 knn=5, decay=10,
+                 bandwidth=None,
+                 distance='euclidean',
+                 n_pca=None,
                  thresh=1e-4,
                  precomputed=None, **kwargs):
         if precomputed is not None and n_pca is not None:
@@ -640,6 +674,7 @@ def __init__(self, data, knn=5, decay=10,
                                  "non-negative".format(precomputed))
         self.knn = knn
         self.decay = decay
+        self.bandwidth = bandwidth
         self.distance = distance
         self.thresh = thresh
         self.precomputed = precomputed
@@ -653,6 +688,7 @@ def get_params(self):
         params = super().get_params()
         params.update({'knn': self.knn,
                        'decay': self.decay,
+                       'bandwidth': self.bandwidth,
                        'distance': self.distance,
                        'precomputed': self.precomputed})
         return params
@@ -667,6 +703,7 @@ def set_params(self, **params):
         - distance
         - knn
         - decay
+        - bandwidth
 
         Parameters
         ----------
@@ -690,6 +727,10 @@ def set_params(self, **params):
         if 'decay' in params and params['decay'] != self.decay and \
                 self.precomputed is None:
             raise ValueError("Cannot update decay. Please create a new graph")
+        if 'bandwidth' in params and params['bandwidth'] != self.bandwidth and \
+                self.precomputed is None:
+            raise ValueError(
+                "Cannot update bandwidth. Please create a new graph")
         # update superclass parameters
         super().set_params(**params)
         return self
@@ -752,9 +793,12 @@ def build_kernel(self):
                     "precomputed='{}' not recognized. "
                     "Choose from ['affinity', 'adjacency', 'distance', "
                     "None]".format(self.precomputed))
-            knn_dist = np.partition(pdx, self.knn, axis=1)[:, :self.knn]
-            epsilon = np.max(knn_dist, axis=1)
-            pdx = (pdx.T / epsilon).T
+            if self.bandwidth is None:
+                knn_dist = np.partition(pdx, self.knn, axis=1)[:, :self.knn]
+                bandwidth = np.max(knn_dist, axis=1)
+            else:
+                bandwidth = self.bandwidth
+            pdx = (pdx.T / bandwidth).T
             K = np.exp(-1 * np.power(pdx, self.decay))
             # handle nan
             K = np.where(np.isnan(K), 1, K)
@@ -773,7 +817,7 @@ def build_kernel(self):
             K[K < self.thresh] = 0
         return K
 
-    def build_kernel_to_data(self, Y, knn=None):
+    def build_kernel_to_data(self, Y, knn=None, bandwidth=None):
         """Build transition matrix from new data to the graph
 
         Creates a transition matrix such that `Y` can be approximated by
@@ -805,15 +849,18 @@ def build_kernel_to_data(self, Y, knn=None):
         """
         if knn is None:
             knn = self.knn
+        if bandwidth is None:
+            bandwidth = self.bandwidth
         if self.precomputed is not None:
             raise ValueError("Cannot extend kernel on precomputed graph")
         else:
             tasklogger.log_start("affinities")
             Y = self._check_extension_shape(Y)
             pdx = cdist(Y, self.data_nu, metric=self.distance)
-            knn_dist = np.partition(pdx, knn, axis=1)[:, :knn]
-            epsilon = np.max(knn_dist, axis=1)
-            pdx = (pdx.T / epsilon).T
+            if bandwidth is None:
+                knn_dist = np.partition(pdx, knn, axis=1)[:, :knn]
+                bandwidth = np.max(knn_dist, axis=1)
+            pdx = (pdx.T / bandwidth).T
             K = np.exp(-1 * pdx**self.decay)
             # handle nan
             K = np.where(np.isnan(K), 1, K)
@@ -957,6 +1004,7 @@ def get_params(self):
                        'adaptive_k': self.adaptive_k,
                        'knn': self.knn,
                        'decay': self.decay,
+                       'bandwidth': self.bandwidth,
                        'distance': self.distance,
                        'thresh': self.thresh,
                        'n_jobs': self.n_jobs})
diff --git a/test/test_exact.py b/test/test_exact.py
index c5ef987..6a77ce4 100644
--- a/test/test_exact.py
+++ b/test/test_exact.py
@@ -289,6 +289,48 @@ def test_truncated_exact_graph_no_pca():
     assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
 
 
+def test_exact_graph_fixed_bandwidth():
+    decay = 5
+    bandwidth = 2
+    n_pca = 20
+    pca = PCA(n_pca, svd_solver='randomized', random_state=42).fit(data)
+    data_nu = pca.transform(data)
+    pdx = squareform(pdist(data_nu, metric='euclidean'))
+    K = np.exp(-1 * (pdx / bandwidth)**decay)
+    K = K + K.T
+    W = np.divide(K, 2)
+    np.fill_diagonal(W, 0)
+    G = pygsp.graphs.Graph(W)
+    G2 = build_graph(data, n_pca=n_pca,
+                     graphtype='exact',
+                     decay=decay, bandwidth=bandwidth,
+                     random_state=42,
+                     thresh=0,
+                     use_pygsp=True)
+    assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
+    assert(G.N == G2.N)
+    assert(np.all(G.d == G2.d))
+    assert((G2.W != G.W).sum() == 0)
+    assert((G.W != G2.W).nnz == 0)
+    bandwidth = np.random.gamma(5, 0.5, len(data))
+    K = np.exp(-1 * (pdx.T / bandwidth).T**decay)
+    K = K + K.T
+    W = np.divide(K, 2)
+    np.fill_diagonal(W, 0)
+    G = pygsp.graphs.Graph(W)
+    G2 = build_graph(data, n_pca=n_pca,
+                     graphtype='exact',
+                     decay=decay, bandwidth=bandwidth,
+                     random_state=42,
+                     thresh=0,
+                     use_pygsp=True)
+    assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
+    assert(G.N == G2.N)
+    assert(np.all(G.d == G2.d))
+    assert((G2.W != G.W).sum() == 0)
+    assert((G.W != G2.W).nnz == 0)
+
+
 #####################################################
 # Check interpolation
 #####################################################
@@ -351,6 +393,7 @@ def test_set_params():
                               'theta': None,
                               'knn': 3,
                               'decay': 10,
+                              'bandwidth': None,
                               'distance': 'euclidean',
                               'precomputed': None}
     assert_raises(ValueError, G.set_params, knn=15)
diff --git a/test/test_knn.py b/test/test_knn.py
index e274308..1b2360f 100644
--- a/test/test_knn.py
+++ b/test/test_knn.py
@@ -124,6 +124,48 @@ def test_sparse_alpha_knn_graph():
     assert(isinstance(G2, graphtools.graphs.kNNGraph))
 
 
+def test_knn_graph_fixed_bandwidth():
+    k = 3
+    decay = 5
+    bandwidth = 10
+    n_pca = 20
+    thresh = 1e-4
+    pca = PCA(n_pca, svd_solver='randomized', random_state=42).fit(data)
+    data_nu = pca.transform(data)
+    pdx = squareform(pdist(data_nu, metric='euclidean'))
+    K = np.exp(-1 * np.power(pdx / bandwidth, decay))
+    K[K < thresh] = 0
+    K = K + K.T
+    W = np.divide(K, 2)
+    np.fill_diagonal(W, 0)
+    G = pygsp.graphs.Graph(W)
+    G2 = build_graph(data, n_pca=n_pca,
+                     decay=decay, bandwidth=bandwidth,
+                     knn=k, random_state=42,
+                     thresh=thresh,
+                     use_pygsp=True)
+    assert(isinstance(G2, graphtools.graphs.kNNGraph))
+    np.testing.assert_array_equal(G.N, G2.N)
+    np.testing.assert_array_equal(G.d, G2.d)
+    np.testing.assert_array_equal((G.W != G2.W).nnz, 0)
+    bandwidth = np.random.gamma(20, 0.5, len(data))
+    K = np.exp(-1 * (pdx.T / bandwidth).T**decay)
+    K[K < thresh] = 0
+    K = K + K.T
+    W = np.divide(K, 2)
+    np.fill_diagonal(W, 0)
+    G = pygsp.graphs.Graph(W)
+    G2 = build_graph(data, n_pca=n_pca,
+                     decay=decay, bandwidth=bandwidth,
+                     knn=k, random_state=42,
+                     thresh=thresh,
+                     use_pygsp=True)
+    assert(isinstance(G2, graphtools.graphs.kNNGraph))
+    np.testing.assert_array_equal(G.N, G2.N)
+    np.testing.assert_array_equal(G.d, G2.d)
+    np.testing.assert_array_equal((G.W != G2.W).nnz, 0)
+
+
 @warns(UserWarning)
 def test_knn_graph_sparse_no_pca():
     build_graph(sp.coo_matrix(data), n_pca=None,  # n_pca,
@@ -187,6 +229,7 @@ def test_set_params():
         'theta': None,
         'knn': 3,
         'decay': None,
+        'bandwidth': None,
         'distance': 'euclidean',
         'thresh': 0,
         'n_jobs': -1,

From 40d41da22fe2ddbfc17e5f6c2eb4787c85cf37b6 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 22:36:52 -0400
Subject: [PATCH 04/26] fix landmark and knn tests

---
 test/test_knn.py      |  2 +-
 test/test_landmark.py | 25 +++++++++++++------------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/test/test_knn.py b/test/test_knn.py
index 1b2360f..96aba5d 100644
--- a/test/test_knn.py
+++ b/test/test_knn.py
@@ -163,7 +163,7 @@ def test_knn_graph_fixed_bandwidth():
     assert(isinstance(G2, graphtools.graphs.kNNGraph))
     np.testing.assert_array_equal(G.N, G2.N)
     np.testing.assert_array_equal(G.d, G2.d)
-    np.testing.assert_array_equal((G.W != G2.W).nnz, 0)
+    np.testing.assert_allclose(G.W.toarray(), G2.W.toarray(), atol=1e-4)
 
 
 @warns(UserWarning)
diff --git a/test/test_landmark.py b/test/test_landmark.py
index 0c09a3e..51a8740 100644
--- a/test/test_landmark.py
+++ b/test/test_landmark.py
@@ -132,18 +132,19 @@ def test_verbose():
 def test_set_params():
     G = build_graph(data, n_landmark=500, decay=None)
     G.landmark_op
-    assert G.get_params() == {'n_pca': 20,
-                              'random_state': 42,
-                              'kernel_symm': '+',
-                              'theta': None,
-                              'n_landmark': 500,
-                              'knn': 3,
-                              'decay': None,
-                              'distance':
-                              'euclidean',
-                              'thresh': 0,
-                              'n_jobs': -1,
-                              'verbose': 0}
+    assert G.get_params() == {
+        'n_pca': 20,
+        'random_state': 42,
+        'kernel_symm': '+',
+        'theta': None,
+        'n_landmark': 500,
+        'knn': 3,
+        'decay': None,
+        'bandwidth': None,
+        'distance': 'euclidean',
+        'thresh': 0,
+        'n_jobs': -1,
+        'verbose': 0}
     G.set_params(n_landmark=300)
     assert G.landmark_op.shape == (300, 300)
     G.set_params(n_landmark=G.n_landmark, n_svd=G.n_svd)

From 5f2bd79746a3ff841f1ddef0b87e113a9ae38456 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 17 Sep 2018 22:43:02 -0400
Subject: [PATCH 05/26] implement fixed bandwidth mnn

---
 graphtools/graphs.py | 5 ++++-
 test/test_mnn.py     | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index 8b3c95c..4e53353 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -904,6 +904,7 @@ def __init__(self, data, sample_idx,
                  knn=5, beta=1, n_pca=None,
                  adaptive_k='sqrt',
                  decay=None,
+                 bandwidth=None,
                  distance='euclidean',
                  thresh=1e-4,
                  n_jobs=1,
@@ -916,6 +917,7 @@ def __init__(self, data, sample_idx,
         self.knn = knn
         self.decay = decay
         self.distance = distance
+        self.bandwidth = bandwidth
         self.thresh = thresh
         self.n_jobs = n_jobs
         self.weighted_knn = self._weight_knn()
@@ -1043,7 +1045,7 @@ def set_params(self, **params):
                 "Cannot update adaptive_k. Please create a new graph")
 
         # knn arguments
-        knn_kernel_args = ['knn', 'decay', 'distance', 'thresh']
+        knn_kernel_args = ['knn', 'decay', 'distance', 'thresh', 'bandwidth']
         knn_other_args = ['n_jobs', 'random_state', 'verbose']
         for arg in knn_kernel_args:
             if arg in params and params[arg] != getattr(self, arg):
@@ -1085,6 +1087,7 @@ def build_kernel(self):
             graph = Graph(data, n_pca=None,
                           knn=self.weighted_knn[i],
                           decay=self.decay,
+                          bandwidth=self.bandwidth,
                           distance=self.distance,
                           thresh=self.thresh,
                           verbose=self.verbose,
diff --git a/test/test_mnn.py b/test/test_mnn.py
index 529dffa..3ce8fce 100644
--- a/test/test_mnn.py
+++ b/test/test_mnn.py
@@ -239,6 +239,7 @@ def test_set_params():
         'adaptive_k': 'sqrt',
         'knn': 3,
         'decay': 10,
+        'bandwidth': None,
         'distance': 'euclidean',
         'thresh': 1e-4,
         'n_jobs': 1

From 264bd2dc6858acdf8335d86d286d735420f6d7a2 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Tue, 18 Sep 2018 12:35:42 -0400
Subject: [PATCH 06/26] fix check for equal kernels

---
 test/test_knn.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/test/test_knn.py b/test/test_knn.py
index 96aba5d..d24ed82 100644
--- a/test/test_knn.py
+++ b/test/test_knn.py
@@ -147,7 +147,9 @@ def test_knn_graph_fixed_bandwidth():
     assert(isinstance(G2, graphtools.graphs.kNNGraph))
     np.testing.assert_array_equal(G.N, G2.N)
     np.testing.assert_array_equal(G.d, G2.d)
-    np.testing.assert_array_equal((G.W != G2.W).nnz, 0)
+    np.testing.assert_allclose(
+        (G.W - G2.W).data,
+        np.zeros_like((G.W - G2.W).data), atol=1e-14)
     bandwidth = np.random.gamma(20, 0.5, len(data))
     K = np.exp(-1 * (pdx.T / bandwidth).T**decay)
     K[K < thresh] = 0
@@ -163,7 +165,9 @@ def test_knn_graph_fixed_bandwidth():
     assert(isinstance(G2, graphtools.graphs.kNNGraph))
     np.testing.assert_array_equal(G.N, G2.N)
     np.testing.assert_array_equal(G.d, G2.d)
-    np.testing.assert_allclose(G.W.toarray(), G2.W.toarray(), atol=1e-4)
+    np.testing.assert_allclose(
+        (G.W - G2.W).data,
+        np.zeros_like((G.W - G2.W).data), atol=1e-14)
 
 
 @warns(UserWarning)

From 56e16a42cb3a7ea448561b566d302beffd0d3bdd Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Tue, 18 Sep 2018 12:35:52 -0400
Subject: [PATCH 07/26] set random seed for swiss roll generation

---
 test/load_tests/__init__.py | 2 +-
 test/test_mnn.py            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/load_tests/__init__.py b/test/load_tests/__init__.py
index 3f62c9d..f51e45a 100644
--- a/test/load_tests/__init__.py
+++ b/test/load_tests/__init__.py
@@ -38,7 +38,7 @@ def generate_swiss_roll(n_samples=1000, noise=0.5, seed=42):
     t = 1.5 * np.pi * (1 + 2 * generator.rand(1, n_samples))
     x = t * np.cos(t)
     y = t * np.sin(t)
-    sample_idx = np.random.choice([0, 1], n_samples, replace=True)
+    sample_idx = generator.choice([0, 1], n_samples, replace=True)
     z = sample_idx
     t = np.squeeze(t)
     X = np.concatenate((x, y))
diff --git a/test/test_mnn.py b/test/test_mnn.py
index 3ce8fce..b827470 100644
--- a/test/test_mnn.py
+++ b/test/test_mnn.py
@@ -148,7 +148,7 @@ def test_mnn_graph_float_theta():
                           distance=metric, sample_idx=sample_idx, thresh=0,
                           use_pygsp=True)
     assert G.N == G2.N
-    assert np.all(G.d == G2.d)
+    np.testing.assert_array_equal(G.dw, G2.dw)
     assert (G.W != G2.W).nnz == 0
     assert (G2.W != G.W).sum() == 0
     assert isinstance(G2, graphtools.graphs.MNNGraph)
@@ -202,7 +202,7 @@ def test_mnn_graph_matrix_theta():
                           distance=metric, sample_idx=sample_idx, thresh=0,
                           use_pygsp=True)
     assert G.N == G2.N
-    assert np.all(G.d == G2.d)
+    np.testing.assert_array_equal(G.dw, G2.dw)
     assert (G.W != G2.W).nnz == 0
     assert (G2.W != G.W).sum() == 0
     assert isinstance(G2, graphtools.graphs.MNNGraph)

From f5b19631a004986e07e7a951dc5a2f2c1fbd4d23 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Tue, 16 Oct 2018 11:18:44 -0400
Subject: [PATCH 08/26] add conda badge

---
 README.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.rst b/README.rst
index d18d970..2a2a264 100644
--- a/README.rst
+++ b/README.rst
@@ -5,6 +5,9 @@ graphtools
 .. image:: https://img.shields.io/pypi/v/graphtools.svg
     :target: https://pypi.org/project/graphtools/
     :alt: Latest PyPi version
+.. image:: https://anaconda.org/conda-forge/tasklogger/badges/version.svg
+    :target: https://anaconda.org/conda-forge/tasklogger/
+    :alt: Latest Conda version
 .. image:: https://api.travis-ci.com/KrishnaswamyLab/graphtools.svg?branch=master
     :target: https://travis-ci.com/KrishnaswamyLab/graphtools
     :alt: Travis CI Build

From 5acc72429bb19bdb1ce0439e58dc60427e956c32 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Tue, 16 Oct 2018 11:19:30 -0400
Subject: [PATCH 09/26] expose landmarkgraph clusters

---
 graphtools/graphs.py | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index 4e53353..2445711 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -360,8 +360,14 @@ class LandmarkGraph(DataGraph):
     transitions : array-like, shape=[n_samples, n_landmark]
         Transition probabilities between samples and landmarks.
 
-    _clusters : array-like, shape=[n_samples]
+    clusters : array-like, shape=[n_samples]
         Private attribute. Cluster assignments for each sample.
+
+    Examples
+    --------
+    >>> G = graphtools.Graph(data, n_landmark=1000)
+    >>> X_landmark = transform(G.landmark_op)
+    >>> X_full = G.interpolate(X_landmark)
     """
 
     def __init__(self, data, n_landmark=2000, n_svd=100, **kwargs):
@@ -456,6 +462,23 @@ def landmark_op(self):
             self.build_landmark_op()
             return self._landmark_op
 
+    @property
+    def clusters(self):
+        """Cluster assignments for each sample.
+
+        Compute or return the cluster assignments
+
+        Returns
+        -------
+        clusters : list-like, shape=[n_samples]
+            Cluster assignments for each sample.
+        """
+        try:
+            return self._clusters
+        except AttributeError:
+            self.build_landmark_op()
+            return self._clusters
+
     @property
     def transitions(self):
         """Transition matrix from samples to landmarks
@@ -475,13 +498,13 @@ def transitions(self):
             return self._transitions
 
     def _landmarks_to_data(self):
-        landmarks = np.unique(self._clusters)
+        landmarks = np.unique(self.clusters)
         if sparse.issparse(self.kernel):
             pmn = sparse.vstack(
-                [sparse.csr_matrix(self.kernel[self._clusters == i, :].sum(
+                [sparse.csr_matrix(self.kernel[self.clusters == i, :].sum(
                     axis=0)) for i in landmarks])
         else:
-            pmn = np.array([np.sum(self.kernel[self._clusters == i, :], axis=0)
+            pmn = np.array([np.sum(self.kernel[self.clusters == i, :], axis=0)
                             for i in landmarks])
         return pmn
 
@@ -557,12 +580,12 @@ def extend_to_data(self, data, **kwargs):
         kernel = self.build_kernel_to_data(data, **kwargs)
         if sparse.issparse(kernel):
             pnm = sparse.hstack(
-                [sparse.csr_matrix(kernel[:, self._clusters == i].sum(
-                    axis=1)) for i in np.unique(self._clusters)])
+                [sparse.csr_matrix(kernel[:, self.clusters == i].sum(
+                    axis=1)) for i in np.unique(self.clusters)])
         else:
             pnm = np.array([np.sum(
-                kernel[:, self._clusters == i],
-                axis=1).T for i in np.unique(self._clusters)]).transpose()
+                kernel[:, self.clusters == i],
+                axis=1).T for i in np.unique(self.clusters)]).transpose()
         pnm = normalize(pnm, norm='l1', axis=1)
         return pnm
 

From 411b7bab403a7e408a4fa048640486648405c42e Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Tue, 16 Oct 2018 11:20:04 -0400
Subject: [PATCH 10/26] test weighted degrees

---
 test/test_knn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_knn.py b/test/test_knn.py
index d24ed82..80c3f11 100644
--- a/test/test_knn.py
+++ b/test/test_knn.py
@@ -164,7 +164,7 @@ def test_knn_graph_fixed_bandwidth():
                      use_pygsp=True)
     assert(isinstance(G2, graphtools.graphs.kNNGraph))
     np.testing.assert_array_equal(G.N, G2.N)
-    np.testing.assert_array_equal(G.d, G2.d)
+    np.testing.assert_allclose(G.dw, G2.dw, atol=1e-14)
     np.testing.assert_allclose(
         (G.W - G2.W).data,
         np.zeros_like((G.W - G2.W).data), atol=1e-14)

From d63007b59d44d5b24e6dd1b3324e9c47578b4096 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 16 Nov 2018 20:06:50 -0500
Subject: [PATCH 11/26] add from_igraph and to_pygsp methods

---
 graphtools/__init__.py |  2 +-
 graphtools/api.py      |  8 ++++++++
 graphtools/base.py     |  5 +++++
 setup.py               |  3 ++-
 test/test_api.py       | 22 ++++++++++++++++++++++
 5 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/graphtools/__init__.py b/graphtools/__init__.py
index 05d693d..8fc8a50 100644
--- a/graphtools/__init__.py
+++ b/graphtools/__init__.py
@@ -1,2 +1,2 @@
-from .api import Graph
+from .api import Graph, from_igraph
 from .version import __version__
diff --git a/graphtools/api.py b/graphtools/api.py
index 9a9af33..2181066 100644
--- a/graphtools/api.py
+++ b/graphtools/api.py
@@ -1,6 +1,7 @@
 import numpy as np
 import warnings
 import tasklogger
+from scipy import sparse
 
 from . import base
 from . import graphs
@@ -222,3 +223,10 @@ def Graph(data,
                    for key, value in params.items()
                    if key != "data"])))
     return Graph(**params)
+
+
+def from_igraph(G, **kwargs):
+    if 'precomputed' in kwargs and kwargs['precomputed'] != 'adjacency':
+        raise ValueError("Cannot build graph from igraph with precomputed={}. "
+                         "Use 'adjacency' instead.".format(kwargs['precomputed']))
+    return Graph(sparse.coo_matrix(G.get_adjacency().data), precomputed='adjacency', **kwargs)
diff --git a/graphtools/base.py b/graphtools/base.py
index 53e8fb1..179b080 100644
--- a/graphtools/base.py
+++ b/graphtools/base.py
@@ -27,6 +27,7 @@
                     elementwise_maximum,
                     set_diagonal)
 
+
 class Base(object):
     """Class that deals with key-word arguments but is otherwise
     just an object.
@@ -534,6 +535,10 @@ def build_kernel(self):
         """
         raise NotImplementedError
 
+    def to_pygsp(self):
+        from . import api
+        return api.Graph(self.K, precomputed="affinity", use_pygsp=True)
+
 
 class PyGSPGraph(with_metaclass(abc.ABCMeta, pygsp.graphs.Graph, Base)):
     """Interface between BaseGraph and PyGSP.
diff --git a/setup.py b/setup.py
index 1394bf8..cd27e15 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,8 @@
     'nose2',
     'pandas',
     'coverage',
-    'coveralls'
+    'coveralls',
+    'python-igraph'
 ]
 
 if sys.version_info[0] == 3:
diff --git a/test/test_api.py b/test/test_api.py
index c099086..0ef9533 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -4,8 +4,30 @@
     build_graph,
     raises,
 )
+import igraph
+import numpy as np
+import graphtools
 
 
+def test_from_igraph():
+    n = 100
+    m = 500
+    K = np.zeros((n, n))
+    for _ in range(m):
+        e = np.random.choice(n, 2, replace=False)
+        K[e[0], e[1]] = K[e[1], e[0]] = 1
+    g = igraph.Graph.Adjacency(K.tolist())
+    G = graphtools.from_igraph(g)
+    G2 = graphtools.Graph(K, precomputed='adjacency')
+    assert np.all(G.K == G2.K)
+
+
+def test_to_pygsp():
+    G = build_graph(data)
+    G2 = G.to_pygsp()
+    assert isinstance(G2, graphtools.graphs.PyGSPGraph)
+    assert np.all(G2.K == G.K)
+
 #####################################################
 # Check parameters
 #####################################################

From 718d191989f19a0f9cde1350e2920e370048cde6 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 16 Nov 2018 20:48:12 -0500
Subject: [PATCH 12/26] normalize between-batch affinities by rowwise magnitude
 of within-batch affinities

---
 graphtools/graphs.py | 27 ++++++++++++++++---------
 test/test_mnn.py     | 48 ++++++++++++++++++++++++++++++++++++--------
 2 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index 2445711..e556054 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -911,9 +911,9 @@ class MNNGraph(DataGraph):
         Batch index
 
     beta : `float`, optional (default: 1)
-        Downweight within-batch affinities by beta
+        Downweight between-batch affinities by beta
 
-    adaptive_k : {'min', 'mean', 'sqrt', `None`} (default: 'sqrt')
+    adaptive_k : {'min', 'mean', 'sqrt', `None`} (default: None)
         Weights MNN kernel adaptively using the number of cells in
         each sample according to the selected method.
 
@@ -925,7 +925,7 @@ class MNNGraph(DataGraph):
 
     def __init__(self, data, sample_idx,
                  knn=5, beta=1, n_pca=None,
-                 adaptive_k='sqrt',
+                 adaptive_k=None,
                  decay=None,
                  bandwidth=None,
                  distance='euclidean',
@@ -1116,7 +1116,7 @@ def build_kernel(self):
                           verbose=self.verbose,
                           random_state=self.random_state,
                           n_jobs=self.n_jobs,
-                          initialize=False)
+                          initialize=True)
             self.subgraphs.append(graph)  # append to list of subgraphs
         tasklogger.log_complete("subgraphs")
 
@@ -1126,16 +1126,25 @@ def build_kernel(self):
         else:
             K = np.zeros([self.data_nu.shape[0], self.data_nu.shape[0]])
         for i, X in enumerate(self.subgraphs):
+            K = set_submatrix(K, self.sample_idx == self.samples[i],
+                              self.sample_idx == self.samples[i], X.K)
+            within_batch_norm = np.array(np.sum(X.K, 1)).flatten()
             for j, Y in enumerate(self.subgraphs):
+                if i == j:
+                    continue
                 tasklogger.log_start(
                     "kernel from sample {} to {}".format(self.samples[i],
                                                          self.samples[j]))
                 Kij = Y.build_kernel_to_data(
                     X.data_nu,
                     knn=self.weighted_knn[i])
-                if i == j:
-                    # downweight within-batch affinities by beta
-                    Kij = Kij * self.beta
+                between_batch_norm = np.array(np.sum(Kij, 1)).flatten()
+                scale = np.minimum(1, within_batch_norm /
+                                   between_batch_norm) * self.beta
+                if sparse.issparse(Kij):
+                    Kij = Kij.multiply(scale[:, None])
+                else:
+                    Kij = Kij * scale[:, None]
                 K = set_submatrix(K, self.sample_idx == self.samples[i],
                                   self.sample_idx == self.samples[j], Kij)
                 tasklogger.log_complete(
@@ -1147,11 +1156,11 @@ def symmetrize_kernel(self, K):
         if self.kernel_symm == 'theta' and self.theta is not None and \
                 not isinstance(self.theta, numbers.Number):
             # matrix theta
-            # Gamma can be a matrix with specific values transitions for
+            # Theta can be a matrix with specific values transitions for
             # each batch. This allows for technical replicates and
             # experimental samples to be corrected simultaneously
             tasklogger.log_debug("Using theta symmetrization. "
-                                 "Gamma:\n{}".format(self.theta))
+                                 "Theta:\n{}".format(self.theta))
             for i, sample_i in enumerate(self.samples):
                 for j, sample_j in enumerate(self.samples):
                     if j < i:
diff --git a/test/test_mnn.py b/test/test_mnn.py
index b827470..dd6c936 100644
--- a/test/test_mnn.py
+++ b/test/test_mnn.py
@@ -12,6 +12,7 @@
     raises,
     cdist,
 )
+from scipy.linalg import norm
 
 
 #####################################################
@@ -116,7 +117,7 @@ def test_mnn_graph_float_theta():
     k = 10
     a = 20
     metric = 'euclidean'
-    beta = 0
+    beta = 0.5
     samples = np.unique(sample_idx)
 
     K = np.zeros((len(X), len(X)))
@@ -133,17 +134,32 @@ def test_mnn_graph_float_theta():
             pdxe_ij = pdx_ij / e_ij[:, np.newaxis]  # normalize
             k_ij = np.exp(-1 * (pdxe_ij ** a))  # apply alpha-decaying kernel
             if si == sj:
-                K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \
-                    (1 - beta)  # fill out values in K for NN on diagonal
+                K.iloc[sample_idx == si, sample_idx == sj] = (
+                    k_ij + k_ij.T) / 2
             else:
                 # fill out values in K for NN on diagonal
                 K.iloc[sample_idx == si, sample_idx == sj] = k_ij
-
+    Kn = K.copy()
+    for i in samples:
+        curr_K = K.iloc[sample_idx == i, sample_idx == i]
+        i_norm = norm(curr_K, 1, axis=1)
+        for j in samples:
+            if i == j:
+                continue
+            else:
+                curr_K = K.iloc[sample_idx == i, sample_idx == j]
+                curr_norm = norm(curr_K, 1, axis=1)
+                scale = np.minimum(
+                    np.ones(len(curr_norm)), i_norm / curr_norm) * beta
+                Kn.iloc[sample_idx == i, sample_idx == j] = (
+                    curr_K.T * scale).T
+
+    K = Kn
     W = np.array((theta * np.minimum(K, K.T)) +
                  ((1 - theta) * np.maximum(K, K.T)))
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
-    G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=1 - beta,
+    G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=beta,
                           kernel_symm='theta', theta=theta,
                           distance=metric, sample_idx=sample_idx, thresh=0,
                           use_pygsp=True)
@@ -179,11 +195,27 @@ def test_mnn_graph_matrix_theta():
             pdxe_ij = pdx_ij / e_ij[:, np.newaxis]  # normalize
             k_ij = np.exp(-1 * (pdxe_ij ** a))  # apply alpha-decaying kernel
             if si == sj:
-                K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \
-                    (1 - beta)  # fill out values in K for NN on diagonal
+                K.iloc[sample_idx == si, sample_idx == sj] = (
+                    k_ij + k_ij.T) / 2
             else:
                 # fill out values in K for NN on diagonal
                 K.iloc[sample_idx == si, sample_idx == sj] = k_ij
+    Kn = K.copy()
+    for i in samples:
+        curr_K = K.iloc[sample_idx == i, sample_idx == i]
+        i_norm = norm(curr_K, 1, axis=1)
+        for j in samples:
+            if i == j:
+                continue
+            else:
+                curr_K = K.iloc[sample_idx == i, sample_idx == j]
+                curr_norm = norm(curr_K, 1, axis=1)
+                scale = np.minimum(
+                    np.ones(len(curr_norm)), i_norm / curr_norm) * beta
+                Kn.iloc[sample_idx == i, sample_idx == j] = (
+                    curr_K.T * scale).T
+
+    K = Kn
 
     K = np.array(K)
 
@@ -197,7 +229,7 @@ def test_mnn_graph_matrix_theta():
                  ((1 - matrix_theta) * np.maximum(K, K.T)))
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
-    G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=1 - beta,
+    G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=beta,
                           kernel_symm='theta', theta=theta,
                           distance=metric, sample_idx=sample_idx, thresh=0,
                           use_pygsp=True)

From b95df52224ef09068a38c4475b6e12f0677a0417 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 16 Nov 2018 20:50:26 -0500
Subject: [PATCH 13/26] ignore igraph warning

---
 test/load_tests/__init__.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/load_tests/__init__.py b/test/load_tests/__init__.py
index f51e45a..f018957 100644
--- a/test/load_tests/__init__.py
+++ b/test/load_tests/__init__.py
@@ -16,6 +16,7 @@ def reset_warnings():
     warnings.resetwarnings()
     warnings.simplefilter("error")
     ignore_numpy_warning()
+    ignore_igraph_warning()
 
 
 def ignore_numpy_warning():
@@ -25,6 +26,14 @@ def ignore_numpy_warning():
         "matrices or deal with linear algebra ")
 
 
+def ignore_igraph_warning():
+    warnings.filterwarnings(
+        "ignore", category=DeprecationWarning,
+        message="The SafeConfigParser class has been renamed to ConfigParser "
+        "in Python 3.2. This alias will be removed in future versions. Use "
+        "ConfigParser directly instead")
+
+
 reset_warnings()
 
 global digits

From d2741c3a522ef8467f36171764cfaf0e15af7f0e Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 16 Nov 2018 21:33:58 -0500
Subject: [PATCH 14/26] resolve python2 division error

---
 graphtools/graphs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index e556054..e090dac 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -1,3 +1,4 @@
+from __future__ import division
 from builtins import super
 import numpy as np
 from sklearn.neighbors import NearestNeighbors

From 324a54730a5c44b0c06a731544eb0cd1ac42cb84 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 16 Nov 2018 21:36:24 -0500
Subject: [PATCH 15/26] update beta docstring

---
 graphtools/api.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/graphtools/api.py b/graphtools/api.py
index 2181066..1eaaa0b 100644
--- a/graphtools/api.py
+++ b/graphtools/api.py
@@ -10,7 +10,7 @@
 def Graph(data,
           n_pca=None,
           sample_idx=None,
-          adaptive_k='sqrt',
+          adaptive_k=None,
           precomputed=None,
           knn=5,
           decay=10,
@@ -90,12 +90,12 @@ def Graph(data,
         Only one of `precomputed` and `n_pca` can be set.
 
     beta: float, optional(default: 1)
-        Multiply within - batch connections by(1 - beta)
+        Multiply between - batch connections by beta
 
     sample_idx: array-like
         Batch index for MNN kernel
 
-    adaptive_k : `{'min', 'mean', 'sqrt', 'none'}` (default: 'sqrt')
+    adaptive_k : `{'min', 'mean', 'sqrt', 'none'}` (default: None)
         Weights MNN kernel adaptively using the number of cells in
         each sample according to the selected method.
 

From a46804a9b4f04f5d994f850b690d7667f4020705 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 16 Nov 2018 21:40:28 -0500
Subject: [PATCH 16/26] update bandwidth docstring

---
 graphtools/api.py    | 5 +++++
 graphtools/graphs.py | 4 ++--
 test/test_api.py     | 2 ++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/graphtools/api.py b/graphtools/api.py
index 1eaaa0b..c083bf3 100644
--- a/graphtools/api.py
+++ b/graphtools/api.py
@@ -63,6 +63,11 @@ def Graph(data,
     decay : `int` or `None`, optional (default: 10)
         Rate of alpha decay to use. If `None`, alpha decay is not used.
 
+    bandwidth : `float`, list-like or `None`, optional (default: `None`)
+        Fixed bandwidth to use. If given, overrides `knn`. Can be a single
+        bandwidth or a list-like (shape=[n_samples]) of bandwidths for each
+        sample.
+
     distance : `str`, optional (default: `'euclidean'`)
         Any metric from `scipy.spatial.distance` can be used
         distance metric for building kNN graph.
diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index e090dac..f1c54aa 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -38,7 +38,7 @@ class kNNGraph(DataGraph):
 
     bandwidth : `float`, list-like or `None`, optional (default: `None`)
         Fixed bandwidth to use. If given, overrides `knn`. Can be a single
-        bandwidth or a list-like (shape=[n_samples]) or bandwidths for each
+        bandwidth or a list-like (shape=[n_samples]) of bandwidths for each
         sample.
         TODO: implement `callable` bandwidth
 
@@ -641,7 +641,7 @@ class TraditionalGraph(DataGraph):
 
     bandwidth : `float`, list-like or `None`, optional (default: `None`)
         Fixed bandwidth to use. If given, overrides `knn`. Can be a single
-        bandwidth or a list-like (shape=[n_samples]) or bandwidths for each
+        bandwidth or a list-like (shape=[n_samples]) of bandwidths for each
         sample.
         TODO: implement `callable` bandwidth
 
diff --git a/test/test_api.py b/test/test_api.py
index 0ef9533..e46a60d 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -4,6 +4,8 @@
     build_graph,
     raises,
 )
+import warnings
+
 import igraph
 import numpy as np
 import graphtools

From 6252eb1a68a3a879ff07d98a80f016db743905f5 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 16 Nov 2018 21:48:39 -0500
Subject: [PATCH 17/26] fix default params for mnngraph

---
 test/test_mnn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_mnn.py b/test/test_mnn.py
index dd6c936..827531e 100644
--- a/test/test_mnn.py
+++ b/test/test_mnn.py
@@ -268,7 +268,7 @@ def test_set_params():
         'kernel_symm': 'theta',
         'theta': 0.5,
         'beta': 1,
-        'adaptive_k': 'sqrt',
+        'adaptive_k': None,
         'knn': 3,
         'decay': 10,
         'bandwidth': None,

From fcc3b4350a4ea1f663c9860a3ba7178c4d771432 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Fri, 16 Nov 2018 22:07:51 -0500
Subject: [PATCH 18/26] document and test api functions

---
 graphtools/api.py  | 29 +++++++++++++++++++++++++----
 graphtools/base.py | 36 ++++++++++++++++++++++++++++++++++--
 test/test_api.py   | 25 +++++++++++++++++++++++++
 3 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/graphtools/api.py b/graphtools/api.py
index c083bf3..9e5d31b 100644
--- a/graphtools/api.py
+++ b/graphtools/api.py
@@ -231,7 +231,28 @@ def Graph(data,
 
 
 def from_igraph(G, **kwargs):
-    if 'precomputed' in kwargs and kwargs['precomputed'] != 'adjacency':
-        raise ValueError("Cannot build graph from igraph with precomputed={}. "
-                         "Use 'adjacency' instead.".format(kwargs['precomputed']))
-    return Graph(sparse.coo_matrix(G.get_adjacency().data), precomputed='adjacency', **kwargs)
+    """Convert an igraph.Graph to a graphtools.Graph
+
+    Creates a graphtools.graphs.TraditionalGraph with a
+    precomputed adjacency matrix
+
+    Parameters
+    ----------
+    G : igraph.Graph
+        Graph to be converted
+    kwargs
+        keyword arguments for graphtools.Graph
+
+    Returns
+    -------
+    G : graphtools.graphs.TraditionalGraph
+    """
+    if 'precomputed' in kwargs:
+        if kwargs['precomputed'] != 'adjacency':
+            warnings.warn(
+                "Cannot build graph from igraph with precomputed={}. "
+                "Use 'adjacency' instead.".format(kwargs['precomputed']),
+                UserWarning)
+        del kwargs['precomputed']
+    return Graph(sparse.coo_matrix(G.get_adjacency().data),
+                 precomputed='adjacency', **kwargs)
diff --git a/graphtools/base.py b/graphtools/base.py
index 179b080..d08c741 100644
--- a/graphtools/base.py
+++ b/graphtools/base.py
@@ -535,9 +535,41 @@ def build_kernel(self):
         """
         raise NotImplementedError
 
-    def to_pygsp(self):
+    def to_pygsp(self, **kwargs):
+        """Convert to a PyGSP graph
+
+        For use only when the user means to create the graph using
+        the flag `use_pygsp=True`, and doesn't wish to recompute the kernel.
+        Creates a graphtools.graphs.TraditionalGraph with a precomputed
+        affinity matrix which also inherits from pygsp.graphs.Graph.
+
+        Parameters
+        ----------
+        kwargs
+            keyword arguments for graphtools.Graph
+
+        Returns
+        -------
+        G : graphtools.base.PyGSPGraph, graphtools.graphs.TraditionalGraph
+        """
         from . import api
-        return api.Graph(self.K, precomputed="affinity", use_pygsp=True)
+        if 'precomputed' in kwargs:
+            if kwargs['precomputed'] != 'affinity':
+                warnings.warn(
+                    "Cannot build PyGSPGraph with precomputed={}. "
+                    "Using 'affinity' instead.".format(kwargs['precomputed']),
+                    UserWarning)
+            del kwargs['precomputed']
+        if 'use_pygsp' in kwargs:
+            if kwargs['use_pygsp'] is not True:
+                warnings.warn(
+                    "Cannot build PyGSPGraph with use_pygsp={}. "
+                    "Use True instead.".format(kwargs['use_pygsp']),
+                    UserWarning)
+            del kwargs['use_pygsp']
+        return api.Graph(self.K,
+                         precomputed="affinity", use_pygsp=True,
+                         **kwargs)
 
 
 class PyGSPGraph(with_metaclass(abc.ABCMeta, pygsp.graphs.Graph, Base)):
diff --git a/test/test_api.py b/test/test_api.py
index e46a60d..64b0f4f 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -3,6 +3,7 @@
     data,
     build_graph,
     raises,
+    warns,
 )
 import warnings
 
@@ -24,12 +25,36 @@ def test_from_igraph():
     assert np.all(G.K == G2.K)
 
 
+@warns(UserWarning)
+def test_from_igraph_invalid_precomputed():
+    n = 100
+    m = 500
+    K = np.zeros((n, n))
+    for _ in range(m):
+        e = np.random.choice(n, 2, replace=False)
+        K[e[0], e[1]] = K[e[1], e[0]] = 1
+    g = igraph.Graph.Adjacency(K.tolist())
+    G = graphtools.from_igraph(g, precomputed='affinity')
+
+
 def test_to_pygsp():
     G = build_graph(data)
     G2 = G.to_pygsp()
     assert isinstance(G2, graphtools.graphs.PyGSPGraph)
     assert np.all(G2.K == G.K)
 
+
+@warns(UserWarning)
+def test_to_pygsp_invalid_precomputed():
+    G = build_graph(data)
+    G2 = G.to_pygsp(precomputed='adjacency')
+
+
+@warns(UserWarning)
+def test_to_pygsp_invalid_use_pygsp():
+    G = build_graph(data)
+    G2 = G.to_pygsp(use_pygsp=False)
+
 #####################################################
 # Check parameters
 #####################################################

From 654cf467ccaaaf9d3c21e3560d726d1cb237dff1 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 21 Nov 2018 19:51:14 -0800
Subject: [PATCH 19/26] fix edc91ea0f01d8d49c8d153b54e9182454bf1fddb md rst
 issue

---
 README.rst | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/README.rst b/README.rst
index 2a2a264..b343cbd 100644
--- a/README.rst
+++ b/README.rst
@@ -31,7 +31,11 @@ Installation
 
 graphtools is available on `pip`. Install by running the following in a terminal::
 
-        pip install --user graphtools
+    pip install --user graphtools
+
+Alternatively, graphtools can be installed using `Conda <https://conda.io/docs/>`_ (most easily obtained via the `Miniconda Python distribution <https://conda.io/miniconda.html>`_)::
+
+    conda install -c conda-forge graphtools
 
 Usage example
 -------------
@@ -40,14 +44,14 @@ The `graphtools.Graph` class provides an all-in-one interface for k-nearest neig
 
 Use it as follows::
 
-        from sklearn import datasets
-        import graphtools
-        digits = datasets.load_digits()
-        G = graphtools.Graph(digits['data'])
-        K = G.kernel
-        P = G.diff_op
-        G = graphtools.Graph(digits['data'], n_landmark=300)
-        L = G.landmark_op
+    from sklearn import datasets
+    import graphtools
+    digits = datasets.load_digits()
+    G = graphtools.Graph(digits['data'])
+    K = G.kernel
+    P = G.diff_op
+    G = graphtools.Graph(digits['data'], n_landmark=300)
+    L = G.landmark_op
 
 Help
 ----

From 305a861fa0827aeb78e53af27d0a3d379d04a512 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 22 Nov 2018 10:04:46 -0800
Subject: [PATCH 20/26] bump version

---
 graphtools/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphtools/version.py b/graphtools/version.py
index 569b121..d3ec452 100644
--- a/graphtools/version.py
+++ b/graphtools/version.py
@@ -1 +1 @@
-__version__ = "0.1.10"
+__version__ = "0.2.0"

From 73528d9dd090d4855025b13f3f75ddc126899ee8 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 22 Nov 2018 10:14:18 -0800
Subject: [PATCH 21/26] test for gamma/theta parameter confusion

---
 graphtools/base.py |  9 +++++++++
 test/test_mnn.py   | 21 +++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/graphtools/base.py b/graphtools/base.py
index d08c741..5f1a2ab 100644
--- a/graphtools/base.py
+++ b/graphtools/base.py
@@ -338,7 +338,16 @@ class BaseGraph(with_metaclass(abc.ABCMeta, Base)):
 
     def __init__(self, kernel_symm='+',
                  theta=None,
+                 gamma=None,
                  initialize=True, **kwargs):
+        if gamma is not None:
+            warnings.warn("gamma is deprecated. "
+                          "Setting theta={}".format(gamma), FutureWarning)
+            theta = gamma
+        if kernel_symm == 'gamma':
+            warnings.warn("kernel_symm='gamma' is deprecated. "
+                          "Setting kernel_symm='theta'", FutureWarning)
+            kernel_symm = 'theta'
         self.kernel_symm = kernel_symm
         self.theta = theta
         self._check_symmetrization(kernel_symm, theta)
diff --git a/test/test_mnn.py b/test/test_mnn.py
index 827531e..898c721 100644
--- a/test/test_mnn.py
+++ b/test/test_mnn.py
@@ -10,6 +10,7 @@
     generate_swiss_roll,
     assert_raises,
     raises,
+    warns,
     cdist,
 )
 from scipy.linalg import norm
@@ -74,6 +75,26 @@ def test_mnn_with_vector_theta():
         theta=np.linspace(0, 1, n_sample - 1))
 
 
+@warns(FutureWarning)
+def test_mnn_with_gamma():
+    build_graph(
+        data, thresh=0, n_pca=20,
+        decay=10, knn=5, random_state=42,
+        sample_idx=digits['target'],
+        kernel_symm='theta',
+        gamma=0.9)
+
+
+@warns(FutureWarning)
+def test_mnn_with_kernel_symm_gamma():
+    build_graph(
+        data, thresh=0, n_pca=20,
+        decay=10, knn=5, random_state=42,
+        sample_idx=digits['target'],
+        kernel_symm='gamma',
+        theta=0.9)
+
+
 def test_mnn_with_non_zero_indexed_sample_idx():
     X, sample_idx = generate_swiss_roll()
     G = build_graph(X, sample_idx=sample_idx,

From daba2c919abbd4a241aa7db61a23d29666cf964b Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 22 Nov 2018 10:19:10 -0800
Subject: [PATCH 22/26] bump tasklogger version

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 6fed4f1..08e1515 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,4 @@ scipy>=1.1.0
 pygsp>=>=0.5.1
 scikit-learn>=0.19.1
 future
-tasklogger>=0.2.1
+tasklogger>=0.4.0
diff --git a/setup.py b/setup.py
index cd27e15..f67b380 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
     'pygsp>=0.5.1',
     'scikit-learn>=0.19.1',
     'future',
-    'tasklogger>=0.2.1',
+    'tasklogger>=0.4.0',
 ]
 
 test_requires = [

From 67283697500a3ab3614aed2835884a9e9b730fff Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 22 Nov 2018 10:30:52 -0800
Subject: [PATCH 23/26] cache packages

---
 .travis.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index d4accf9..e346d3b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,6 +6,8 @@
 
   sudo: required
 
+  cache: packages
+
   addons:
     apt:
       packages:

From 5b95ff9c60af10ba4a820e243c6aa71e6b042cea Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 22 Nov 2018 11:04:22 -0800
Subject: [PATCH 24/26] increase coverage

---
 test/test_exact.py    |  8 ++++++++
 test/test_knn.py      | 19 +++++++++++++++++++
 test/test_landmark.py | 11 +++++++++++
 test/test_mnn.py      | 39 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+)

diff --git a/test/test_exact.py b/test/test_exact.py
index 6a77ce4..43216ac 100644
--- a/test/test_exact.py
+++ b/test/test_exact.py
@@ -68,6 +68,13 @@ def test_precomputed_negative():
                 n_pca=None)
 
 
+@raises(ValueError)
+def test_precomputed_invalid():
+    build_graph(np.random.uniform(0, 1, [200, 200]),
+                precomputed='invalid',
+                n_pca=None)
+
+
 @warns(RuntimeWarning)
 def test_duplicate_data():
     build_graph(np.vstack([data, data[:10]]),
@@ -400,6 +407,7 @@ def test_set_params():
     assert_raises(ValueError, G.set_params, decay=15)
     assert_raises(ValueError, G.set_params, distance='manhattan')
     assert_raises(ValueError, G.set_params, precomputed='distance')
+    assert_raises(ValueError, G.set_params, bandwidth=5)
     G.set_params(knn=G.knn,
                  decay=G.decay,
                  distance=G.distance,
diff --git a/test/test_knn.py b/test/test_knn.py
index 80c3f11..c359ede 100644
--- a/test/test_knn.py
+++ b/test/test_knn.py
@@ -45,6 +45,24 @@ def test_duplicate_data():
                 thresh=1e-4)
 
 
+@warns(UserWarning)
+def test_balltree_cosine():
+    build_graph(data,
+                n_pca=20,
+                decay=10,
+                distance='cosine',
+                thresh=1e-4)
+
+
+@warns(UserWarning)
+def test_k_too_large():
+    build_graph(data,
+                n_pca=20,
+                decay=10,
+                knn=len(data) + 1,
+                thresh=1e-4)
+
+
 #####################################################
 # Check kernel
 #####################################################
@@ -253,6 +271,7 @@ def test_set_params():
     assert_raises(ValueError, G.set_params, thresh=1e-3)
     assert_raises(ValueError, G.set_params, theta=0.99)
     assert_raises(ValueError, G.set_params, kernel_symm='*')
+    assert_raises(ValueError, G.set_params, bandwidth=5)
     G.set_params(knn=G.knn,
                  decay=G.decay,
                  thresh=G.thresh,
diff --git a/test/test_landmark.py b/test/test_landmark.py
index 51a8740..57903ce 100644
--- a/test/test_landmark.py
+++ b/test/test_landmark.py
@@ -42,6 +42,15 @@ def test_landmark_exact_graph():
     assert(G.landmark_op.shape == (n_landmark, n_landmark))
     assert(isinstance(G, graphtools.graphs.TraditionalGraph))
     assert(isinstance(G, graphtools.graphs.LandmarkGraph))
+    assert(G.transitions.shape == (data.shape[0], n_landmark))
+    assert(G.clusters.shape == data.shape[0])
+    assert(len(np.unique(G.clusters)) <= n_landmark)
+    signal = np.random.normal(0, 1, [n_landmark, 10])
+    interpolated_signal = G.interpolate(signal)
+    assert interpolated_signal.shape == (data.shape[0], signal.shape[1])
+    G._reset_landmarks()
+    # no error on double delete
+    G._reset_landmarks()
 
 
 def test_landmark_knn_graph():
@@ -49,6 +58,7 @@ def test_landmark_knn_graph():
     # knn graph
     G = build_graph(data, n_landmark=n_landmark, n_pca=20,
                     decay=None, knn=5, random_state=42)
+    assert(G.transitions.shape == (data.shape[0], n_landmark))
     assert(G.landmark_op.shape == (n_landmark, n_landmark))
     assert(isinstance(G, graphtools.graphs.kNNGraph))
     assert(isinstance(G, graphtools.graphs.LandmarkGraph))
@@ -62,6 +72,7 @@ def test_landmark_mnn_graph():
                     thresh=1e-5, n_pca=None,
                     decay=10, knn=5, random_state=42,
                     sample_idx=sample_idx)
+    assert(G.clusters.shape == data.shape[0])
     assert(G.landmark_op.shape == (n_landmark, n_landmark))
     assert(isinstance(G, graphtools.graphs.MNNGraph))
     assert(isinstance(G, graphtools.graphs.LandmarkGraph))
diff --git a/test/test_mnn.py b/test/test_mnn.py
index 898c721..9bf284a 100644
--- a/test/test_mnn.py
+++ b/test/test_mnn.py
@@ -75,6 +75,26 @@ def test_mnn_with_vector_theta():
         theta=np.linspace(0, 1, n_sample - 1))
 
 
+@raises(ValueError)
+def test_mnn_with_unbounded_theta():
+    build_graph(
+        data, thresh=0, n_pca=20,
+        decay=10, knn=5, random_state=42,
+        sample_idx=digits['target'],
+        kernel_symm='theta',
+        theta=2)
+
+
+@raises(ValueError)
+def test_mnn_with_string_theta():
+    build_graph(
+        data, thresh=0, n_pca=20,
+        decay=10, knn=5, random_state=42,
+        sample_idx=digits['target'],
+        kernel_symm='theta',
+        theta='invalid')
+
+
 @warns(FutureWarning)
 def test_mnn_with_gamma():
     build_graph(
@@ -95,6 +115,25 @@ def test_mnn_with_kernel_symm_gamma():
         theta=0.9)
 
 
+@warns(UserWarning)
+def test_mnn_with_theta_and_kernel_symm_not_theta():
+    build_graph(
+        data, thresh=0, n_pca=20,
+        decay=10, knn=5, random_state=42,
+        sample_idx=digits['target'],
+        kernel_symm='+',
+        theta=0.9)
+
+
+@warns(UserWarning)
+def test_mnn_with_kernel_symmm_theta_and_no_theta():
+    build_graph(
+        data, thresh=0, n_pca=20,
+        decay=10, knn=5, random_state=42,
+        sample_idx=digits['target'],
+        kernel_symm='theta')
+
+
 def test_mnn_with_non_zero_indexed_sample_idx():
     X, sample_idx = generate_swiss_roll()
     G = build_graph(X, sample_idx=sample_idx,

From 5104b74ed0b5c07d60764cfc8e5090bcf7b733f2 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 22 Nov 2018 11:21:33 -0800
Subject: [PATCH 25/26] cache pip

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index e346d3b..64d5223 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,7 +6,7 @@
 
   sudo: required
 
-  cache: packages
+  cache: pip
 
   addons:
     apt:

From 76ba4f0189985418ff189bcafa7e7e5d84e8c753 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 22 Nov 2018 11:21:54 -0800
Subject: [PATCH 26/26] make tests pass

---
 graphtools/graphs.py  | 28 +++++++++++++++++-----------
 test/test_api.py      |  1 +
 test/test_data.py     |  1 +
 test/test_exact.py    | 10 ++++++++++
 test/test_knn.py      |  1 +
 test/test_landmark.py |  5 +++--
 test/test_mnn.py      |  1 +
 7 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index f1c54aa..a0810da 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -64,24 +64,25 @@ class kNNGraph(DataGraph):
     def __init__(self, data, knn=5, decay=None,
                  bandwidth=None, distance='euclidean',
                  thresh=1e-4, n_pca=None, **kwargs):
-        self.knn = knn
-        self.decay = decay
-        self.bandwidth = bandwidth
-        self.distance = distance
-        self.thresh = thresh
 
         if decay is not None and thresh <= 0:
             raise ValueError("Cannot instantiate a kNNGraph with `decay=None` "
                              "and `thresh=0`. Use a TraditionalGraph instead.")
         if knn > data.shape[0]:
             warnings.warn("Cannot set knn ({k}) to be greater than "
-                          "data.shape[0] ({n}). Setting knn={n}".format(
+                          "n_samples ({n}). Setting knn={n}".format(
                               k=knn, n=data.shape[0]))
+            knn = data.shape[0]
         if n_pca is None and data.shape[1] > 500:
             warnings.warn("Building a kNNGraph on data of shape {} is "
                           "expensive. Consider setting n_pca.".format(
                               data.shape), UserWarning)
 
+        self.knn = knn
+        self.decay = decay
+        self.bandwidth = bandwidth
+        self.distance = distance
+        self.thresh = thresh
         super().__init__(data, n_pca=n_pca, **kwargs)
 
     def get_params(self):
@@ -232,7 +233,7 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None):
             bandwidth = self.bandwidth
         if knn > self.data.shape[0]:
             warnings.warn("Cannot set knn ({k}) to be greater than "
-                          "data.shape[0] ({n}). Setting knn={n}".format(
+                          "n_samples ({n}). Setting knn={n}".format(
                               k=knn, n=self.data.shape[0]))
 
         Y = self._check_extension_shape(Y)
@@ -675,15 +676,20 @@ def __init__(self, data,
                  n_pca=None,
                  thresh=1e-4,
                  precomputed=None, **kwargs):
+        if decay is None and precomputed not in ['affinity', 'adjacency']:
+            # decay high enough is basically a binary kernel
+            raise ValueError("`decay` must be provided for a TraditionalGraph"
+                             ". For kNN kernel, use kNNGraph.")
         if precomputed is not None and n_pca is not None:
             # the data itself is a matrix of distances / affinities
             n_pca = None
             warnings.warn("n_pca cannot be given on a precomputed graph."
                           " Setting n_pca=None", RuntimeWarning)
-        if decay is None and precomputed not in ['affinity', 'adjacency']:
-            # decay high enough is basically a binary kernel
-            raise ValueError("`decay` must be provided for a TraditionalGraph"
-                             ". For kNN kernel, use kNNGraph.")
+        if knn > data.shape[0]:
+            warnings.warn("Cannot set knn ({k}) to be greater than or equal to"
+                          " n_samples ({n}). Setting knn={n}".format(
+                              k=knn, n=data.shape[0] - 1))
+            knn = data.shape[0] - 1
         if precomputed is not None:
             if precomputed not in ["distance", "affinity", "adjacency"]:
                 raise ValueError("Precomputed value {} not recognized. "
diff --git a/test/test_api.py b/test/test_api.py
index 64b0f4f..49d2126 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 from load_tests import (
     nose2,
     data,
diff --git a/test/test_data.py b/test/test_data.py
index 39d7966..dfa0889 100644
--- a/test/test_data.py
+++ b/test/test_data.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 from load_tests import (
     np,
     sp,
diff --git a/test/test_exact.py b/test/test_exact.py
index 43216ac..80d84f0 100644
--- a/test/test_exact.py
+++ b/test/test_exact.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 from load_tests import (
     graphtools,
     np,
@@ -83,6 +84,15 @@ def test_duplicate_data():
                 thresh=0)
 
 
+@warns(UserWarning)
+def test_k_too_large():
+    build_graph(data,
+                n_pca=20,
+                decay=10,
+                knn=len(data) + 1,
+                thresh=0)
+
+
 #####################################################
 # Check kernel
 #####################################################
diff --git a/test/test_knn.py b/test/test_knn.py
index c359ede..7d15b0d 100644
--- a/test/test_knn.py
+++ b/test/test_knn.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 from load_tests import (
     graphtools,
     np,
diff --git a/test/test_landmark.py b/test/test_landmark.py
index 57903ce..da0fbbc 100644
--- a/test/test_landmark.py
+++ b/test/test_landmark.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 from load_tests import (
     graphtools,
     np,
@@ -43,7 +44,7 @@ def test_landmark_exact_graph():
     assert(isinstance(G, graphtools.graphs.TraditionalGraph))
     assert(isinstance(G, graphtools.graphs.LandmarkGraph))
     assert(G.transitions.shape == (data.shape[0], n_landmark))
-    assert(G.clusters.shape == data.shape[0])
+    assert(G.clusters.shape == (data.shape[0],))
     assert(len(np.unique(G.clusters)) <= n_landmark)
     signal = np.random.normal(0, 1, [n_landmark, 10])
     interpolated_signal = G.interpolate(signal)
@@ -72,7 +73,7 @@ def test_landmark_mnn_graph():
                     thresh=1e-5, n_pca=None,
                     decay=10, knn=5, random_state=42,
                     sample_idx=sample_idx)
-    assert(G.clusters.shape == data.shape[0])
+    assert(G.clusters.shape == (X.shape[0],))
     assert(G.landmark_op.shape == (n_landmark, n_landmark))
     assert(isinstance(G, graphtools.graphs.MNNGraph))
     assert(isinstance(G, graphtools.graphs.LandmarkGraph))
diff --git a/test/test_mnn.py b/test/test_mnn.py
index 9bf284a..be78437 100644
--- a/test/test_mnn.py
+++ b/test/test_mnn.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 from load_tests import (
     graphtools,
     np,