From 55941379e46620c590574db656669ebb652dcd79 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 29 Nov 2018 23:25:02 -0500
Subject: [PATCH 1/9] test to_igraph when G.W does not exist

---
 test/test_api.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/test/test_api.py b/test/test_api.py
index e8134af..22e174e 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -74,6 +74,11 @@ def test_to_igraph():
     assert isinstance(G2, igraph.Graph)
     assert np.all(np.array(G2.get_adjacency(
         attribute="weight").data) == G.W)
+    G = build_graph(data, use_pygsp=False)
+    G2 = G.to_igraph()
+    assert isinstance(G2, igraph.Graph)
+    assert np.all(np.array(G2.get_adjacency(
+        attribute="weight").data) == G.W)
 
 
 @warns(UserWarning)

From f3dd12bf6f3134a03909cb0d147822ed881934b3 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Sat, 26 Jan 2019 18:08:14 -0500
Subject: [PATCH 2/9] setdiag should operate on coo, lil or dia matrix

---
 graphtools/utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/graphtools/utils.py b/graphtools/utils.py
index e2d9290..4f1df04 100644
--- a/graphtools/utils.py
+++ b/graphtools/utils.py
@@ -33,8 +33,11 @@ def dense_set_diagonal(X, diag):
 
 
 def sparse_set_diagonal(X, diag):
+    cls = type(X)
+    if not isinstance(X, (sparse.lil_matrix, sparse.dia_matrix)):
+        X = X.tocoo()
     X.setdiag(diag)
-    return X
+    return cls(X)
 
 
 def set_diagonal(X, diag):

From f9bb59ebf2f3e46740b99a5e9b6f1b6d5848fa77 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Sat, 26 Jan 2019 18:17:22 -0500
Subject: [PATCH 3/9] fix igraph test

---
 test/test_api.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_api.py b/test/test_api.py
index 22e174e..5379cf5 100644
--- a/test/test_api.py
+++ b/test/test_api.py
@@ -74,8 +74,8 @@ def test_to_igraph():
     assert isinstance(G2, igraph.Graph)
     assert np.all(np.array(G2.get_adjacency(
         attribute="weight").data) == G.W)
-    G = build_graph(data, use_pygsp=False)
-    G2 = G.to_igraph()
+    G3 = build_graph(data, use_pygsp=False)
+    G2 = G3.to_igraph()
     assert isinstance(G2, igraph.Graph)
     assert np.all(np.array(G2.get_adjacency(
         attribute="weight").data) == G.W)

From 292cb429747c32562c8430b203f5f90ebb48ec74 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 28 Jan 2019 11:43:11 -0500
Subject: [PATCH 4/9] auto deploy

---
 .travis.yml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 64d5223..3c056a7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,5 +18,16 @@
     - python setup.py test
     - cd doc; make html; cd ..
 
+  deploy:
+    provider: pypi
+    user: scottgigante
+    password: ${PYPI_PASSWORD}
+    distributions: sdist bdist_wheel
+    skip_existing: true
+    skip_cleanup: true
+    on:
+      tags: true
+      branch: master
+
   after_success:
     - coveralls

From 85f0e22960a186bbafaa3a1eb3ebbf529f6a1f9f Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 4 Feb 2019 14:59:23 -0500
Subject: [PATCH 5/9] don't include self for knn

---
 graphtools/graphs.py  | 126 ++++++++++++++----------------------------
 test/test_exact.py    |  54 +++++++++---------
 test/test_knn.py      |  70 +++++++++++------------
 test/test_landmark.py |  12 ++--
 test/test_mnn.py      |   7 +--
 5 files changed, 112 insertions(+), 157 deletions(-)

diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index 22a40cd..75e2870 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -85,11 +85,11 @@ def __init__(self, data, knn=5, decay=None,
         if decay is None and bandwidth is not None:
             warnings.warn("`bandwidth` is not used when `decay=None`.",
                           UserWarning)
-        if knn > data.shape[0]:
+        if knn > data.shape[0] - 2:
             warnings.warn("Cannot set knn ({k}) to be greater than "
                           "n_samples ({n}). Setting knn={n}".format(
-                              k=knn, n=data.shape[0]))
-            knn = data.shape[0]
+                              k=knn, n=data.shape[0] - 2))
+            knn = data.shape[0] - 2
         if n_pca is None and data.shape[1] > 500:
             warnings.warn("Building a kNNGraph on data of shape {} is "
                           "expensive. Consider setting n_pca.".format(
@@ -189,7 +189,7 @@ def knn_tree(self):
         except AttributeError:
             try:
                 self._knn_tree = NearestNeighbors(
-                    n_neighbors=self.knn,
+                    n_neighbors=self.knn + 1,
                     algorithm='ball_tree',
                     metric=self.distance,
                     n_jobs=self.n_jobs).fit(self.data_nu)
@@ -201,7 +201,7 @@ def knn_tree(self):
                         self.distance),
                     UserWarning)
                 self._knn_tree = NearestNeighbors(
-                    n_neighbors=self.knn,
+                    n_neighbors=self.knn + 1,
                     algorithm='auto',
                     metric=self.distance,
                     n_jobs=self.n_jobs).fit(self.data_nu)
@@ -219,9 +219,35 @@ def build_kernel(self):
             symmetric matrix with ones down the diagonal
             with no non-negative entries.
         """
-        K = self.build_kernel_to_data(self.data_nu)
+        K = self.build_kernel_to_data(self.data_nu, knn=self.knn + 1)
         return K
 
+    def _check_duplicates(self, distances, indices):
+        if np.any(distances[:, 1] == 0):
+            has_duplicates = distances[:, 1] == 0
+            if np.sum(distances[:, 1:] == 0) < 20:
+                idx = np.argwhere((distances == 0) &
+                                  has_duplicates[:, None])
+                duplicate_ids = np.array(
+                    [[indices[i[0], i[1]], i[0]]
+                     for i in idx if indices[i[0], i[1]] < i[0]])
+                duplicate_ids = duplicate_ids[
+                    np.argsort(duplicate_ids[:, 0])]
+                duplicate_names = ", ".join(["{} and {}".format(i[0], i[1])
+                                             for i in duplicate_ids])
+                warnings.warn(
+                    "Detected zero distance between samples {}. "
+                    "Consider removing duplicates to avoid errors in "
+                    "downstream processing.".format(duplicate_names),
+                    RuntimeWarning)
+            else:
+                warnings.warn(
+                    "Detected zero distance between {} pairs of samples. "
+                    "Consider removing duplicates to avoid errors in "
+                    "downstream processing.".format(
+                        np.sum(np.sum(distances[:, 1:]))),
+                    RuntimeWarning)
+
     def build_kernel_to_data(self, Y, knn=None, bandwidth=None,
                              bandwidth_scale=None):
         """Build a kernel from new input data `Y` to the `self.data`
@@ -281,30 +307,7 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None,
             search_knn = min(knn * 20, self.data_nu.shape[0])
             distances, indices = knn_tree.kneighbors(
                 Y, n_neighbors=search_knn)
-            if np.any(distances[:, 1] == 0):
-                has_duplicates = distances[:, 1] == 0
-                if np.sum(distances[:, 1:] == 0) < 20:
-                    idx = np.argwhere((distances == 0) &
-                                      has_duplicates[:, None])
-                    duplicate_ids = np.array(
-                        [[indices[i[0], i[1]], i[0]]
-                         for i in idx if indices[i[0], i[1]] < i[0]])
-                    duplicate_ids = duplicate_ids[
-                        np.argsort(duplicate_ids[:, 0])]
-                    duplicate_names = ", ".join(["{} and {}".format(i[0], i[1])
-                                                 for i in duplicate_ids])
-                    warnings.warn(
-                        "Detected zero distance between samples {}. "
-                        "Consider removing duplicates to avoid errors in "
-                        "downstream processing.".format(duplicate_names),
-                        RuntimeWarning)
-                else:
-                    warnings.warn(
-                        "Detected zero distance between {} pairs of samples. "
-                        "Consider removing duplicates to avoid errors in "
-                        "downstream processing.".format(
-                            np.sum(np.sum(distances[:, 1:]))),
-                        RuntimeWarning)
+            self._check_duplicates(distances, indices)
             tasklogger.log_complete("KNN search")
             tasklogger.log_start("affinities")
             if bandwidth is None:
@@ -338,7 +341,7 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None,
                     len(update_idx)))
             if search_knn > self.data_nu.shape[0] / 2:
                 knn_tree = NearestNeighbors(
-                    knn, algorithm='brute',
+                    search_knn, algorithm='brute',
                     n_jobs=self.n_jobs).fit(self.data_nu)
             if len(update_idx) > 0:
                 tasklogger.log_debug(
@@ -771,11 +774,11 @@ def __init__(self, data,
         if knn is None and bandwidth is None:
             raise ValueError(
                 "Either `knn` or `bandwidth` must be provided.")
-        if knn is not None and knn > data.shape[0]:
-            warnings.warn("Cannot set knn ({k}) to be greater than or equal to"
-                          " n_samples ({n}). Setting knn={n}".format(
-                              k=knn, n=data.shape[0] - 1))
-            knn = data.shape[0] - 1
+        if knn is not None and knn > data.shape[0] - 2:
+            warnings.warn("Cannot set knn ({k}) to be greater than "
+                          " n_samples - 2 ({n}). Setting knn={n}".format(
+                              k=knn, n=data.shape[0] - 2))
+            knn = data.shape[0] - 2
         if precomputed is not None:
             if precomputed not in ["distance", "affinity", "adjacency"]:
                 raise ValueError("Precomputed value {} not recognized. "
@@ -918,7 +921,8 @@ def build_kernel(self):
                     "Choose from ['affinity', 'adjacency', 'distance', "
                     "None]".format(self.precomputed))
             if self.bandwidth is None:
-                knn_dist = np.partition(pdx, self.knn, axis=1)[:, :self.knn]
+                knn_dist = np.partition(
+                    pdx, self.knn + 1, axis=1)[:, :self.knn + 1]
                 bandwidth = np.max(knn_dist, axis=1)
             elif callable(self.bandwidth):
                 bandwidth = self.bandwidth(pdx)
@@ -1300,8 +1304,6 @@ def build_kernel_to_data(self, Y, theta=None):
         transformation of the landmarks can be trivially applied to `Y` by
         performing
 
-        TODO: test this.
-
         `transform_Y = transitions.dot(transform)`
 
         Parameters
@@ -1323,52 +1325,6 @@ def build_kernel_to_data(self, Y, theta=None):
             Transition matrix from `Y` to `self.data`
         """
         raise NotImplementedError
-        tasklogger.log_warning("building MNN kernel to theta is experimental")
-        if not isinstance(self.theta, str) and \
-                not isinstance(self.theta, numbers.Number):
-            if theta is None:
-                raise ValueError(
-                    "self.theta is a matrix but theta is not provided.")
-            elif len(theta) != len(self.samples):
-                raise ValueError(
-                    "theta should have one value for every sample")
-
-        Y = self._check_extension_shape(Y)
-        kernel_xy = []
-        kernel_yx = []
-        # don't really need within Y kernel
-        Y_graph = kNNGraph(Y, n_pca=None, knn=0, **(self.knn_args))
-        y_knn = self._weight_knn(sample_size=Y.shape[0])
-        for i, X in enumerate(self.subgraphs):
-            kernel_xy.append(X.build_kernel_to_data(
-                Y, knn=self.weighted_knn[i]))  # kernel X -> Y
-            kernel_yx.append(Y_graph.build_kernel_to_data(
-                X.data_nu, knn=y_knn))  # kernel Y -> X
-        kernel_xy = sparse.hstack(kernel_xy)  # n_cells_y x n_cells_x
-        kernel_yx = sparse.vstack(kernel_yx)  # n_cells_x x n_cells_y
-
-        # symmetrize
-        if theta is not None:
-            # Gamma can be a vector with specific values transitions for
-            # each batch. This allows for technical replicates and
-            # experimental samples to be corrected simultaneously
-            K = np.empty_like(kernel_xy)
-            for i, sample in enumerate(self.samples):
-                sample_idx = self.sample_idx == sample
-                K[:, sample_idx] = theta[i] * \
-                    kernel_xy[:, sample_idx].minimum(
-                        kernel_yx[sample_idx, :].T) + \
-                    (1 - theta[i]) * \
-                    kernel_xy[:, sample_idx].maximum(
-                        kernel_yx[sample_idx, :].T)
-        if self.theta == "+":
-            K = (kernel_xy + kernel_yx.T) / 2
-        elif self.theta == "*":
-            K = kernel_xy.multiply(kernel_yx.T)
-        else:
-            K = self.theta * kernel_xy.minimum(kernel_yx.T) + \
-                (1 - self.theta) * kernel_xy.maximum(kernel_yx.T)
-        return K
 
 
 class kNNLandmarkGraph(kNNGraph, LandmarkGraph):
diff --git a/test/test_exact.py b/test/test_exact.py
index b46c2b8..5373bcd 100644
--- a/test/test_exact.py
+++ b/test/test_exact.py
@@ -103,7 +103,7 @@ def test_k_too_large():
     build_graph(data,
                 n_pca=20,
                 decay=10,
-                knn=len(data) + 1,
+                knn=len(data) - 1,
                 thresh=0)
 
 
@@ -131,7 +131,7 @@ def test_exact_graph():
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
     G2 = build_graph(data_small, thresh=0, n_pca=n_pca,
-                     decay=a, knn=k, random_state=42,
+                     decay=a, knn=k - 1, random_state=42,
                      bandwidth_scale=bandwidth_scale,
                      use_pygsp=True)
     assert(G.N == G2.N)
@@ -141,7 +141,7 @@ def test_exact_graph():
     assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
     G2 = build_graph(pdx, n_pca=None, precomputed='distance',
                      bandwidth_scale=bandwidth_scale,
-                     decay=a, knn=k, random_state=42, use_pygsp=True)
+                     decay=a, knn=k - 1, random_state=42, use_pygsp=True)
     assert(G.N == G2.N)
     np.testing.assert_equal(G.dw, G2.dw)
     assert((G.W != G2.W).nnz == 0)
@@ -195,7 +195,7 @@ def test_truncated_exact_graph():
     G2 = build_graph(data_small, thresh=thresh,
                      graphtype='exact',
                      n_pca=n_pca,
-                     decay=a, knn=k, random_state=42,
+                     decay=a, knn=k - 1, random_state=42,
                      use_pygsp=True)
     assert(G.N == G2.N)
     np.testing.assert_equal(G.dw, G2.dw)
@@ -204,7 +204,7 @@ def test_truncated_exact_graph():
     assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
     G2 = build_graph(pdx, n_pca=None, precomputed='distance',
                      thresh=thresh,
-                     decay=a, knn=k, random_state=42, use_pygsp=True)
+                     decay=a, knn=k - 1, random_state=42, use_pygsp=True)
     assert(G.N == G2.N)
     np.testing.assert_equal(G.dw, G2.dw)
     assert((G.W != G2.W).nnz == 0)
@@ -252,14 +252,14 @@ def test_truncated_exact_graph_sparse():
     G2 = build_graph(sp.coo_matrix(data_small), thresh=thresh,
                      graphtype='exact',
                      n_pca=n_pca,
-                     decay=a, knn=k, random_state=42,
+                     decay=a, knn=k - 1, random_state=42,
                      use_pygsp=True)
     assert(G.N == G2.N)
     np.testing.assert_allclose(G2.W.toarray(), G.W.toarray())
     assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
     G2 = build_graph(sp.bsr_matrix(pdx), n_pca=None, precomputed='distance',
                      thresh=thresh,
-                     decay=a, knn=k, random_state=42, use_pygsp=True)
+                     decay=a, knn=k - 1, random_state=42, use_pygsp=True)
     assert(G.N == G2.N)
     np.testing.assert_equal(G.dw, G2.dw)
     assert((G.W != G2.W).nnz == 0)
@@ -304,7 +304,7 @@ def test_truncated_exact_graph_no_pca():
     G2 = build_graph(data_small, thresh=thresh,
                      graphtype='exact',
                      n_pca=n_pca,
-                     decay=a, knn=k, random_state=42,
+                     decay=a, knn=k - 1, random_state=42,
                      use_pygsp=True)
     assert(G.N == G2.N)
     np.testing.assert_equal(G.dw, G2.dw)
@@ -314,7 +314,7 @@ def test_truncated_exact_graph_no_pca():
     G2 = build_graph(sp.csr_matrix(data_small), thresh=thresh,
                      graphtype='exact',
                      n_pca=n_pca,
-                     decay=a, knn=k, random_state=42,
+                     decay=a, knn=k - 1, random_state=42,
                      use_pygsp=True)
     assert(G.N == G2.N)
     np.testing.assert_equal(G.dw, G2.dw)
@@ -379,7 +379,7 @@ def test_exact_graph_callable_bandwidth():
     W = np.divide(K, 2)
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
-    G2 = build_graph(data, n_pca=n_pca, knn=knn,
+    G2 = build_graph(data, n_pca=n_pca, knn=knn - 1,
                      decay=decay, bandwidth=bandwidth,
                      random_state=42,
                      thresh=thresh,
@@ -396,7 +396,7 @@ def test_exact_graph_callable_bandwidth():
     W = np.divide(K, 2)
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
-    G2 = build_graph(data, n_pca=n_pca, knn=knn,
+    G2 = build_graph(data, n_pca=n_pca, knn=knn - 1,
                      decay=decay, bandwidth=bandwidth,
                      random_state=42,
                      thresh=thresh,
@@ -432,7 +432,7 @@ def test_exact_graph_anisotropy():
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
     G2 = build_graph(data_small, thresh=0, n_pca=n_pca,
-                     decay=a, knn=k, random_state=42,
+                     decay=a, knn=k - 1, random_state=42,
                      use_pygsp=True, anisotropy=anisotropy)
     assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
     assert(G.N == G2.N)
@@ -441,15 +441,15 @@ def test_exact_graph_anisotropy():
     assert((G.W != G2.W).nnz == 0)
     assert_raises(ValueError, build_graph,
                   data_small, thresh=0, n_pca=n_pca,
-                  decay=a, knn=k, random_state=42,
+                  decay=a, knn=k - 1, random_state=42,
                   use_pygsp=True, anisotropy=-1)
     assert_raises(ValueError, build_graph,
                   data_small, thresh=0, n_pca=n_pca,
-                  decay=a, knn=k, random_state=42,
+                  decay=a, knn=k - 1, random_state=42,
                   use_pygsp=True, anisotropy=2)
     assert_raises(ValueError, build_graph,
                   data_small, thresh=0, n_pca=n_pca,
-                  decay=a, knn=k, random_state=42,
+                  decay=a, knn=k - 1, random_state=42,
                   use_pygsp=True, anisotropy='invalid')
 
 #####################################################
@@ -462,10 +462,10 @@ def test_build_dense_exact_kernel_to_data(**kwargs):
     n = G.data.shape[0]
     K = G.build_kernel_to_data(data[:n // 2, :])
     assert(K.shape == (n // 2, n))
-    K = G.build_kernel_to_data(G.data)
-    assert(np.sum(G.kernel != (K + K.T) / 2) == 0)
-    K = G.build_kernel_to_data(G.data_nu)
-    assert(np.sum(G.kernel != (K + K.T) / 2) == 0)
+    K = G.build_kernel_to_data(G.data, knn=G.knn + 1)
+    np.testing.assert_equal(G.kernel - (K + K.T) / 2, 0)
+    K = G.build_kernel_to_data(G.data_nu, knn=G.knn + 1)
+    np.testing.assert_equal(G.kernel - (K + K.T) / 2, 0)
 
 
 def test_build_dense_exact_callable_bw_kernel_to_data(**kwargs):
@@ -473,10 +473,10 @@ def test_build_dense_exact_callable_bw_kernel_to_data(**kwargs):
     n = G.data.shape[0]
     K = G.build_kernel_to_data(data[:n // 2, :])
     assert(K.shape == (n // 2, n))
-    K = G.build_kernel_to_data(G.data)
-    assert(np.sum(G.kernel != (K + K.T) / 2) == 0)
-    K = G.build_kernel_to_data(G.data_nu)
-    assert(np.sum(G.kernel != (K + K.T) / 2) == 0)
+    K = G.build_kernel_to_data(G.data, knn=G.knn + 1)
+    np.testing.assert_equal(G.kernel - (K + K.T) / 2, 0)
+    K = G.build_kernel_to_data(G.data_nu, knn=G.knn + 1)
+    np.testing.assert_equal(G.kernel - (K + K.T) / 2, 0)
 
 
 def test_build_sparse_exact_kernel_to_data(**kwargs):
@@ -484,10 +484,10 @@ def test_build_sparse_exact_kernel_to_data(**kwargs):
     n = G.data.shape[0]
     K = G.build_kernel_to_data(data[:n // 2, :])
     assert(K.shape == (n // 2, n))
-    K = G.build_kernel_to_data(G.data)
-    assert(np.sum(G.kernel != (K + K.T) / 2) == 0)
-    K = G.build_kernel_to_data(G.data_nu)
-    assert(np.sum(G.kernel != (K + K.T) / 2) == 0)
+    K = G.build_kernel_to_data(G.data, knn=G.knn + 1)
+    np.testing.assert_equal(G.kernel - (K + K.T) / 2, 0)
+    K = G.build_kernel_to_data(G.data_nu, knn=G.knn + 1)
+    np.testing.assert_equal(G.kernel - (K + K.T) / 2, 0)
 
 
 def test_exact_interpolate():
diff --git a/test/test_knn.py b/test/test_knn.py
index d4fda1e..998f635 100644
--- a/test/test_knn.py
+++ b/test/test_knn.py
@@ -69,7 +69,7 @@ def test_k_too_large():
     build_graph(data,
                 n_pca=20,
                 decay=10,
-                knn=len(data) + 1,
+                knn=len(data) - 1,
                 thresh=1e-4)
 
 
@@ -112,13 +112,13 @@ def test_knn_graph():
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
     G2 = build_graph(data, n_pca=n_pca,
-                     decay=None, knn=k, random_state=42,
+                     decay=None, knn=k - 1, random_state=42,
                      use_pygsp=True)
-    assert(G.N == G2.N)
-    assert(np.all(G.d == G2.d))
-    assert((G.W != G2.W).nnz == 0)
-    assert((G2.W != G.W).sum() == 0)
-    assert(isinstance(G2, graphtools.graphs.kNNGraph))
+    assert G.N == G2.N
+    np.testing.assert_equal(G.dw, G2.dw)
+    assert (G.W != G2.W).nnz == 0
+    assert (G2.W != G.W).sum() == 0
+    assert isinstance(G2, graphtools.graphs.kNNGraph)
 
 
 def test_knn_graph_sparse():
@@ -139,11 +139,11 @@ def test_knn_graph_sparse():
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
     G2 = build_graph(sp.coo_matrix(data), n_pca=n_pca,
-                     decay=None, knn=k, random_state=42,
+                     decay=None, knn=k - 1, random_state=42,
                      use_pygsp=True)
-    assert(G.N == G2.N)
+    assert G.N == G2.N
     np.testing.assert_allclose(G2.W.toarray(), G.W.toarray())
-    assert(isinstance(G2, graphtools.graphs.kNNGraph))
+    assert isinstance(G2, graphtools.graphs.kNNGraph)
 
 
 def test_sparse_alpha_knn_graph():
@@ -162,12 +162,12 @@ def test_sparse_alpha_knn_graph():
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
     G2 = build_graph(data, n_pca=None,  # n_pca,
-                     decay=a, knn=k, thresh=thresh,
+                     decay=a, knn=k - 1, thresh=thresh,
                      bandwidth_scale=bandwidth_scale,
                      random_state=42, use_pygsp=True)
-    assert(np.abs(G.W - G2.W).max() < thresh)
-    assert(G.N == G2.N)
-    assert(isinstance(G2, graphtools.graphs.kNNGraph))
+    assert np.abs(G.W - G2.W).max() < thresh
+    assert G.N == G2.N
+    assert isinstance(G2, graphtools.graphs.kNNGraph)
 
 
 def test_knn_graph_fixed_bandwidth():
@@ -192,7 +192,7 @@ def test_knn_graph_fixed_bandwidth():
                      knn=k, random_state=42,
                      thresh=thresh,
                      use_pygsp=True)
-    assert(isinstance(G2, graphtools.graphs.kNNGraph))
+    assert isinstance(G2, graphtools.graphs.kNNGraph)
     np.testing.assert_array_equal(G.N, G2.N)
     np.testing.assert_array_equal(G.d, G2.d)
     np.testing.assert_allclose(
@@ -211,7 +211,7 @@ def test_knn_graph_fixed_bandwidth():
                      knn=k, random_state=42,
                      thresh=thresh,
                      use_pygsp=True)
-    assert(isinstance(G2, graphtools.graphs.kNNGraph))
+    assert isinstance(G2, graphtools.graphs.kNNGraph)
     np.testing.assert_array_equal(G.N, G2.N)
     np.testing.assert_allclose(G.dw, G2.dw, atol=1e-14)
     np.testing.assert_allclose(
@@ -226,7 +226,7 @@ def test_knn_graph_callable_bandwidth():
     bandwidth = lambda x: 2
     n_pca = 20
     thresh = 1e-4
-    build_graph(data, n_pca=n_pca, knn=k,
+    build_graph(data, n_pca=n_pca, knn=k - 1,
                 decay=decay, bandwidth=bandwidth,
                 random_state=42,
                 thresh=thresh, graphtype='knn')
@@ -267,11 +267,11 @@ def test_knn_graph_anisotropy():
     G = pygsp.graphs.Graph(W)
     G2 = build_graph(data_small, n_pca=n_pca,
                      thresh=thresh,
-                     decay=a, knn=k, random_state=42,
+                     decay=a, knn=k - 1, random_state=42,
                      use_pygsp=True, anisotropy=anisotropy)
-    assert(isinstance(G2, graphtools.graphs.kNNGraph))
-    assert(G.N == G2.N)
-    assert(np.all(G.d == G2.d))
+    assert isinstance(G2, graphtools.graphs.kNNGraph)
+    assert G.N == G2.N
+    np.testing.assert_allclose(G.dw, G2.dw, atol=1e-14, rtol=1e-14)
     np.testing.assert_allclose((G2.W - G.W).data, 0, atol=1e-14, rtol=1e-14)
 
 
@@ -283,23 +283,23 @@ def test_knn_graph_anisotropy():
 def test_build_dense_knn_kernel_to_data():
     G = build_graph(data, decay=None)
     n = G.data.shape[0]
-    K = G.build_kernel_to_data(data[:n // 2, :])
-    assert(K.shape == (n // 2, n))
-    K = G.build_kernel_to_data(G.data)
-    assert(np.sum(G.kernel != (K + K.T) / 2) == 0)
-    K = G.build_kernel_to_data(G.data_nu)
-    assert(np.sum(G.kernel != (K + K.T) / 2) == 0)
+    K = G.build_kernel_to_data(data[:n // 2, :], knn=G.knn + 1)
+    assert K.shape == (n // 2, n)
+    K = G.build_kernel_to_data(G.data, knn=G.knn + 1)
+    assert (G.kernel - (K + K.T) / 2).nnz == 0
+    K = G.build_kernel_to_data(G.data_nu, knn=G.knn + 1)
+    assert (G.kernel - (K + K.T) / 2).nnz == 0
 
 
 def test_build_sparse_knn_kernel_to_data():
     G = build_graph(data, decay=None, sparse=True)
     n = G.data.shape[0]
-    K = G.build_kernel_to_data(data[:n // 2, :])
-    assert(K.shape == (n // 2, n))
-    K = G.build_kernel_to_data(G.data)
-    assert(np.sum(G.kernel != (K + K.T) / 2) == 0)
-    K = G.build_kernel_to_data(G.data_nu)
-    assert(np.sum(G.kernel != (K + K.T) / 2) == 0)
+    K = G.build_kernel_to_data(data[:n // 2, :], knn=G.knn + 1)
+    assert K.shape == (n // 2, n)
+    K = G.build_kernel_to_data(G.data, knn=G.knn + 1)
+    assert (G.kernel - (K + K.T) / 2).nnz == 0
+    K = G.build_kernel_to_data(G.data_nu, knn=G.knn + 1)
+    assert (G.kernel - (K + K.T) / 2).nnz == 0
 
 
 def test_knn_interpolate():
@@ -307,8 +307,8 @@ def test_knn_interpolate():
     assert_raises(ValueError, G.interpolate, data)
     pca_data = PCA(2).fit_transform(data)
     transitions = G.extend_to_data(data)
-    assert(np.all(G.interpolate(pca_data, Y=data) ==
-                  G.interpolate(pca_data, transitions=transitions)))
+    np.testing.assert_equal(G.interpolate(pca_data, Y=data), G.interpolate(
+        pca_data, transitions=transitions))
 
 
 #################################################
diff --git a/test/test_landmark.py b/test/test_landmark.py
index 95b7962..950af06 100644
--- a/test/test_landmark.py
+++ b/test/test_landmark.py
@@ -39,7 +39,7 @@ def test_landmark_exact_graph():
     # exact graph
     G = build_graph(data, n_landmark=n_landmark,
                     thresh=0, n_pca=20,
-                    decay=10, knn=5, random_state=42)
+                    decay=10, knn=5 - 1, random_state=42)
     assert(G.landmark_op.shape == (n_landmark, n_landmark))
     assert(isinstance(G, graphtools.graphs.TraditionalGraph))
     assert(isinstance(G, graphtools.graphs.LandmarkGraph))
@@ -58,7 +58,7 @@ def test_landmark_knn_graph():
     n_landmark = 500
     # knn graph
     G = build_graph(data, n_landmark=n_landmark, n_pca=20,
-                    decay=None, knn=5, random_state=42)
+                    decay=None, knn=5 - 1, random_state=42)
     assert(G.transitions.shape == (data.shape[0], n_landmark))
     assert(G.landmark_op.shape == (n_landmark, n_landmark))
     assert(isinstance(G, graphtools.graphs.kNNGraph))
@@ -71,7 +71,7 @@ def test_landmark_mnn_graph():
     # mnn graph
     G = build_graph(X, n_landmark=n_landmark,
                     thresh=1e-5, n_pca=None,
-                    decay=10, knn=5, random_state=42,
+                    decay=10, knn=5 - 1, random_state=42,
                     sample_idx=sample_idx)
     assert(G.clusters.shape == (X.shape[0],))
     assert(G.landmark_op.shape == (n_landmark, n_landmark))
@@ -89,7 +89,7 @@ def test_landmark_exact_pygsp_graph():
     # exact graph
     G = build_graph(data, n_landmark=n_landmark,
                     thresh=0, n_pca=10,
-                    decay=10, knn=3, random_state=42,
+                    decay=10, knn=3 - 1, random_state=42,
                     use_pygsp=True)
     assert(G.landmark_op.shape == (n_landmark, n_landmark))
     assert(isinstance(G, graphtools.graphs.TraditionalGraph))
@@ -101,7 +101,7 @@ def test_landmark_knn_pygsp_graph():
     n_landmark = 500
     # knn graph
     G = build_graph(data, n_landmark=n_landmark, n_pca=10,
-                    decay=None, knn=3, random_state=42,
+                    decay=None, knn=3 - 1, random_state=42,
                     use_pygsp=True)
     assert(G.landmark_op.shape == (n_landmark, n_landmark))
     assert(isinstance(G, graphtools.graphs.kNNGraph))
@@ -115,7 +115,7 @@ def test_landmark_mnn_pygsp_graph():
     # mnn graph
     G = build_graph(X, n_landmark=n_landmark,
                     thresh=1e-3, n_pca=None,
-                    decay=10, knn=3, random_state=42,
+                    decay=10, knn=3 - 1, random_state=42,
                     sample_idx=sample_idx, use_pygsp=True)
     assert(G.landmark_op.shape == (n_landmark, n_landmark))
     assert(isinstance(G, graphtools.graphs.MNNGraph))
diff --git a/test/test_mnn.py b/test/test_mnn.py
index 217913d..5344e25 100644
--- a/test/test_mnn.py
+++ b/test/test_mnn.py
@@ -220,14 +220,13 @@ def test_mnn_graph_float_theta():
                  ((1 - theta) * np.maximum(K, K.T)))
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
-    G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=beta,
+    G2 = graphtools.Graph(X, knn=k, decay=a, beta=beta,
                           kernel_symm='theta', theta=theta,
                           distance=metric, sample_idx=sample_idx, thresh=0,
                           use_pygsp=True)
     assert G.N == G2.N
     np.testing.assert_array_equal(G.dw, G2.dw)
-    assert (G.W != G2.W).nnz == 0
-    assert (G2.W != G.W).sum() == 0
+    np.testing.assert_array_equal((G.W - G2.W).data, 0)
     assert isinstance(G2, graphtools.graphs.MNNGraph)
 
 
@@ -290,7 +289,7 @@ def test_mnn_graph_matrix_theta():
                  ((1 - matrix_theta) * np.maximum(K, K.T)))
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
-    G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=beta,
+    G2 = graphtools.Graph(X, knn=k, decay=a, beta=beta,
                           kernel_symm='theta', theta=theta,
                           distance=metric, sample_idx=sample_idx, thresh=0,
                           use_pygsp=True)

From a1a3721681ae172ced5fc12fa291cdc2638b604e Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 6 Feb 2019 09:46:46 -0500
Subject: [PATCH 6/9] log mnn kernel creation

---
 graphtools/graphs.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index 75e2870..29653a3 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -1233,6 +1233,7 @@ def build_kernel(self):
             self.subgraphs.append(graph)  # append to list of subgraphs
         tasklogger.log_complete("subgraphs")
 
+        tasklogger.log_start("MNN kernel")
         if self.thresh > 0 or self.decay is None:
             K = sparse.lil_matrix(
                 (self.data_nu.shape[0], self.data_nu.shape[0]))
@@ -1263,6 +1264,7 @@ def build_kernel(self):
                 tasklogger.log_complete(
                     "kernel from sample {} to {}".format(self.samples[i],
                                                          self.samples[j]))
+        tasklogger.log_complete("MNN kernel")
         return K
 
     def symmetrize_kernel(self, K):

From c2ad629ac73ff9a55c451e307c76b0b64ffdb17a Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 6 Feb 2019 09:46:54 -0500
Subject: [PATCH 7/9] bump to 1.0.0

---
 graphtools/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphtools/version.py b/graphtools/version.py
index 3ced358..5becc17 100644
--- a/graphtools/version.py
+++ b/graphtools/version.py
@@ -1 +1 @@
-__version__ = "0.2.1"
+__version__ = "1.0.0"

From a54cd4b5b1649b626b6ad991197b340010f6f9e3 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 6 Feb 2019 10:52:49 -0500
Subject: [PATCH 8/9] deprecate adaptive_k and matrix theta, fix tests

---
 graphtools/base.py   |  28 +++++++++++
 graphtools/graphs.py | 112 +++++--------------------------------------
 test/test_mnn.py     |  83 +++++++++++++++-----------------
 3 files changed, 79 insertions(+), 144 deletions(-)

diff --git a/graphtools/base.py b/graphtools/base.py
index 55d72c5..2dc8736 100644
--- a/graphtools/base.py
+++ b/graphtools/base.py
@@ -65,6 +65,9 @@ def _get_param_names(cls):
         return parameters
 
     def set_params(self, **kwargs):
+        # for k in kwargs:
+        #     raise TypeError("set_params() got an unexpected "
+        #                     "keyword argument '{}'".format(k))
         return self
 
 
@@ -866,3 +869,28 @@ def interpolate(self, transform, transitions=None, Y=None):
                 transitions = self.extend_to_data(Y)
         Y_transform = transitions.dot(transform)
         return Y_transform
+
+    def set_params(self, **params):
+        """Set parameters on this object
+
+        Safe setter method - attributes should not be modified directly as some
+        changes are not valid.
+        Valid parameters:
+        - n_jobs
+        - verbose
+
+        Parameters
+        ----------
+        params : key-value pairs of parameter name and new values
+
+        Returns
+        -------
+        self
+        """
+        if 'n_jobs' in params:
+            self.n_jobs = params['n_jobs']
+        if 'verbose' in params:
+            self.verbose = params['verbose']
+            tasklogger.set_level(self.verbose)
+        super().set_params(**params)
+        return self
diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index 29653a3..298bc7e 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -2,12 +2,12 @@
 from builtins import super
 import numpy as np
 from sklearn.neighbors import NearestNeighbors
-from scipy.spatial.distance import pdist, cdist
-from scipy.spatial.distance import squareform
 from sklearn.utils.extmath import randomized_svd
 from sklearn.preprocessing import normalize
 from sklearn.cluster import MiniBatchKMeans
 from sklearn.utils.graph import graph_shortest_path
+from scipy.spatial.distance import pdist, cdist
+from scipy.spatial.distance import squareform
 from scipy import sparse
 import numbers
 import warnings
@@ -1038,8 +1038,8 @@ class MNNGraph(DataGraph):
 
     def __init__(self, data, sample_idx,
                  knn=5, beta=1, n_pca=None,
-                 adaptive_k=None,
                  decay=None,
+                 adaptive_k=None,
                  bandwidth=None,
                  distance='euclidean',
                  thresh=1e-4,
@@ -1049,14 +1049,12 @@ def __init__(self, data, sample_idx,
         self.sample_idx = sample_idx
         self.samples, self.n_cells = np.unique(
             self.sample_idx, return_counts=True)
-        self.adaptive_k = adaptive_k
         self.knn = knn
         self.decay = decay
         self.distance = distance
         self.bandwidth = bandwidth
         self.thresh = thresh
         self.n_jobs = n_jobs
-        self.weighted_knn = self._weight_knn()
 
         if sample_idx is None:
             raise ValueError("sample_idx must be given. For a graph without"
@@ -1068,78 +1066,25 @@ def __init__(self, data, sample_idx,
         elif len(self.samples) == 1:
             raise ValueError(
                 "sample_idx must contain more than one unique value")
+        if adaptive_k is not None:
+            warnings.warn("`adaptive_k` has been deprecated. Using fixed knn.",
+                          DeprecationWarning)
 
         super().__init__(data, n_pca=n_pca, **kwargs)
 
     def _check_symmetrization(self, kernel_symm, theta):
         if kernel_symm == 'theta' and theta is not None and \
                 not isinstance(theta, numbers.Number):
-            # matrix theta
-            try:
-                theta.shape
-            except AttributeError:
-                raise ValueError("theta {} not recognized. "
-                                 "Expected a float between 0 and 1 "
-                                 "or a [n_batch,n_batch] matrix of "
-                                 "floats between 0 and 1".format(theta))
-            if not np.shape(theta) == (len(self.samples),
-                                       len(self.samples)):
-                raise ValueError(
-                    "Matrix theta must be of shape "
-                    "({}), got ({})".format(
-                        (len(self.samples),
-                         len(self.samples)), theta.shape))
-            elif np.max(theta) > 1 or np.min(theta) < 0:
-                raise ValueError(
-                    "Values in matrix theta must be between"
-                    " 0 and 1, got values between {} and {}".format(
-                        np.max(theta), np.min(theta)))
-            elif np.any(theta != theta.T):
-                raise ValueError("theta must be a symmetric matrix")
+            raise TypeError("Expected `theta` as a float. "
+                            "Got {}.".format(type(theta)))
         else:
             super()._check_symmetrization(kernel_symm, theta)
 
-    def _weight_knn(self, sample_size=None):
-        """Select adaptive values of knn
-
-        Parameters
-        ----------
-
-        sample_size : `int` or `None`
-            Number of cells in the sample in question. Used only for
-            out-of-sample extension. If `None`, calculates within-sample
-            knn values.
-
-        Returns
-        -------
-
-        knn : array-like or `int`, weighted knn values
-        """
-        if sample_size is None:
-            # calculate within sample knn values
-            sample_size = self.n_cells
-        if self.adaptive_k == 'min':
-            # the smallest sample has k
-            knn_weight = self.n_cells / np.min(self.n_cells)
-        elif self.adaptive_k == 'mean':
-            # the average sample has k
-            knn_weight = self.n_cells / np.mean(self.n_cells)
-        elif self.adaptive_k == 'sqrt':
-            # the samples are sqrt'd first, then smallest has k
-            knn_weight = np.sqrt(self.n_cells / np.min(self.n_cells))
-        elif self.adaptive_k is None:
-            knn_weight = np.repeat(1, len(self.n_cells))
-        weighted_knn = np.round(self.knn * knn_weight).astype(np.int32)
-        if len(weighted_knn) == 1:
-            weighted_knn = weighted_knn[0]
-        return weighted_knn
-
     def get_params(self):
         """Get parameters from this object
         """
         params = super().get_params()
         params.update({'beta': self.beta,
-                       'adaptive_k': self.adaptive_k,
                        'knn': self.knn,
                        'decay': self.decay,
                        'bandwidth': self.bandwidth,
@@ -1176,9 +1121,6 @@ def set_params(self, **params):
         # mnn specific arguments
         if 'beta' in params and params['beta'] != self.beta:
             raise ValueError("Cannot update beta. Please create a new graph")
-        if 'adaptive_k' in params and params['adaptive_k'] != self.adaptive_k:
-            raise ValueError(
-                "Cannot update adaptive_k. Please create a new graph")
 
         # knn arguments
         knn_kernel_args = ['knn', 'decay', 'distance', 'thresh', 'bandwidth']
@@ -1216,12 +1158,12 @@ def build_kernel(self):
             tasklogger.log_debug("subgraph {}: sample {}, "
                                  "n = {}, knn = {}".format(
                                      i, idx, np.sum(self.sample_idx == idx),
-                                     self.weighted_knn[i]))
+                                     self.knn))
             # select data for sample
             data = self.data_nu[self.sample_idx == idx]
             # build a kNN graph for cells within sample
             graph = Graph(data, n_pca=None,
-                          knn=self.weighted_knn[i],
+                          knn=self.knn,
                           decay=self.decay,
                           bandwidth=self.bandwidth,
                           distance=self.distance,
@@ -1229,6 +1171,7 @@ def build_kernel(self):
                           verbose=self.verbose,
                           random_state=self.random_state,
                           n_jobs=self.n_jobs,
+                          kernel_symm='+',
                           initialize=True)
             self.subgraphs.append(graph)  # append to list of subgraphs
         tasklogger.log_complete("subgraphs")
@@ -1251,7 +1194,7 @@ def build_kernel(self):
                                                          self.samples[j]))
                 Kij = Y.build_kernel_to_data(
                     X.data_nu,
-                    knn=self.weighted_knn[i])
+                    knn=self.knn)
                 between_batch_norm = np.array(np.sum(Kij, 1)).flatten()
                 scale = np.minimum(1, within_batch_norm /
                                    between_batch_norm) * self.beta
@@ -1267,37 +1210,6 @@ def build_kernel(self):
         tasklogger.log_complete("MNN kernel")
         return K
 
-    def symmetrize_kernel(self, K):
-        if self.kernel_symm == 'theta' and self.theta is not None and \
-                not isinstance(self.theta, numbers.Number):
-            # matrix theta
-            # Theta can be a matrix with specific values transitions for
-            # each batch. This allows for technical replicates and
-            # experimental samples to be corrected simultaneously
-            tasklogger.log_debug("Using theta symmetrization. "
-                                 "Theta:\n{}".format(self.theta))
-            for i, sample_i in enumerate(self.samples):
-                for j, sample_j in enumerate(self.samples):
-                    if j < i:
-                        continue
-                    Kij = K[np.ix_(self.sample_idx == sample_i,
-                                   self.sample_idx == sample_j)]
-                    Kji = K[np.ix_(self.sample_idx == sample_j,
-                                   self.sample_idx == sample_i)]
-                    Kij_symm = self.theta[i, j] * \
-                        elementwise_minimum(Kij, Kji.T) + \
-                        (1 - self.theta[i, j]) * \
-                        elementwise_maximum(Kij, Kji.T)
-                    K = set_submatrix(K, self.sample_idx == sample_i,
-                                      self.sample_idx == sample_j, Kij_symm)
-                    if not i == j:
-                        K = set_submatrix(K, self.sample_idx == sample_j,
-                                          self.sample_idx == sample_i,
-                                          Kij_symm.T)
-        else:
-            K = super().symmetrize_kernel(K)
-        return K
-
     def build_kernel_to_data(self, Y, theta=None):
         """Build transition matrix from new data to the graph
 
diff --git a/test/test_mnn.py b/test/test_mnn.py
index 5344e25..3cbdb2c 100644
--- a/test/test_mnn.py
+++ b/test/test_mnn.py
@@ -51,8 +51,8 @@ def test_build_mnn_with_precomputed():
     build_graph(data, n_pca=None, graphtype='mnn', precomputed='distance')
 
 
-@raises(ValueError)
-def test_mnn_with_square_theta_wrong_length():
+@raises(TypeError)
+def test_mnn_with_matrix_theta():
     n_sample = len(np.unique(digits['target']))
     # square matrix theta of the wrong size
     build_graph(
@@ -60,11 +60,11 @@ def test_mnn_with_square_theta_wrong_length():
         decay=10, knn=5, random_state=42,
         sample_idx=digits['target'],
         kernel_symm='theta',
-        theta=np.tile(np.linspace(0, 1, n_sample - 1),
-                      n_sample).reshape(n_sample - 1, n_sample))
+        theta=np.tile(np.linspace(0, 1, n_sample),
+                      n_sample).reshape(n_sample, n_sample))
 
 
-@raises(ValueError)
+@raises(TypeError)
 def test_mnn_with_vector_theta():
     n_sample = len(np.unique(digits['target']))
     # vector theta
@@ -86,7 +86,7 @@ def test_mnn_with_unbounded_theta():
         theta=2)
 
 
-@raises(ValueError)
+@raises(TypeError)
 def test_mnn_with_string_theta():
     build_graph(
         data, thresh=0, n_pca=20,
@@ -135,6 +135,16 @@ def test_mnn_with_kernel_symmm_theta_and_no_theta():
         kernel_symm='theta')
 
 
+@warns(DeprecationWarning)
+def test_mnn_adaptive_k():
+    build_graph(
+        data, thresh=0, n_pca=20,
+        decay=10, knn=5, random_state=42,
+        sample_idx=digits['target'],
+        kernel_symm='theta',
+        theta=0.9, adaptive_k='sqrt')
+
+
 def test_mnn_with_non_zero_indexed_sample_idx():
     X, sample_idx = generate_swiss_roll()
     G = build_graph(X, sample_idx=sample_idx,
@@ -171,14 +181,13 @@ def test_mnn_with_string_sample_idx():
 # Check kernel
 #####################################################
 
-
-def test_mnn_graph_float_theta():
+def test_mnn_graph_no_decay():
     X, sample_idx = generate_swiss_roll()
     theta = 0.9
     k = 10
-    a = 20
+    a = None
     metric = 'euclidean'
-    beta = 0.5
+    beta = 0.2
     samples = np.unique(sample_idx)
 
     K = np.zeros((len(X), len(X)))
@@ -188,18 +197,19 @@ def test_mnn_graph_float_theta():
     for si in samples:
         X_i = X[sample_idx == si]            # get observations in sample i
         for sj in samples:
+            batch_k = k + 1 if si == sj else k
             X_j = X[sample_idx == sj]        # get observation in sample j
             pdx_ij = cdist(X_i, X_j, metric=metric)  # pairwise distances
             kdx_ij = np.sort(pdx_ij, axis=1)  # get kNN
-            e_ij = kdx_ij[:, k]             # dist to kNN
-            pdxe_ij = pdx_ij / e_ij[:, np.newaxis]  # normalize
-            k_ij = np.exp(-1 * (pdxe_ij ** a))  # apply alpha-decaying kernel
+            e_ij = kdx_ij[:, batch_k - 1]             # dist to kNN
+            k_ij = np.where(pdx_ij <= e_ij[:, None], 1, 0)  # apply knn kernel
             if si == sj:
                 K.iloc[sample_idx == si, sample_idx == sj] = (
                     k_ij + k_ij.T) / 2
             else:
                 # fill out values in K for NN on diagonal
                 K.iloc[sample_idx == si, sample_idx == sj] = k_ij
+
     Kn = K.copy()
     for i in samples:
         curr_K = K.iloc[sample_idx == i, sample_idx == i]
@@ -210,10 +220,9 @@ def test_mnn_graph_float_theta():
             else:
                 curr_K = K.iloc[sample_idx == i, sample_idx == j]
                 curr_norm = norm(curr_K, 1, axis=1)
-                scale = np.minimum(
-                    np.ones(len(curr_norm)), i_norm / curr_norm) * beta
-                Kn.iloc[sample_idx == i, sample_idx == j] = (
-                    curr_K.T * scale).T
+                scale = np.minimum(1, i_norm / curr_norm) * beta
+                Kn.iloc[sample_idx == i,
+                        sample_idx == j] = curr_K.values * scale[:, None]
 
     K = Kn
     W = np.array((theta * np.minimum(K, K.T)) +
@@ -230,15 +239,13 @@ def test_mnn_graph_float_theta():
     assert isinstance(G2, graphtools.graphs.MNNGraph)
 
 
-def test_mnn_graph_matrix_theta():
+def test_mnn_graph_decay():
     X, sample_idx = generate_swiss_roll()
-    bs = 0.8
-    theta = np.array([[1, bs],  # 0
-                      [bs,  1]])  # 3
+    theta = 0.9
     k = 10
     a = 20
     metric = 'euclidean'
-    beta = 0
+    beta = 0.2
     samples = np.unique(sample_idx)
 
     K = np.zeros((len(X), len(X)))
@@ -248,10 +255,11 @@ def test_mnn_graph_matrix_theta():
     for si in samples:
         X_i = X[sample_idx == si]            # get observations in sample i
         for sj in samples:
+            batch_k = k if si == sj else k - 1
             X_j = X[sample_idx == sj]        # get observation in sample j
             pdx_ij = cdist(X_i, X_j, metric=metric)  # pairwise distances
             kdx_ij = np.sort(pdx_ij, axis=1)  # get kNN
-            e_ij = kdx_ij[:, k]             # dist to kNN
+            e_ij = kdx_ij[:, batch_k]             # dist to kNN
             pdxe_ij = pdx_ij / e_ij[:, np.newaxis]  # normalize
             k_ij = np.exp(-1 * (pdxe_ij ** a))  # apply alpha-decaying kernel
             if si == sj:
@@ -260,6 +268,7 @@ def test_mnn_graph_matrix_theta():
             else:
                 # fill out values in K for NN on diagonal
                 K.iloc[sample_idx == si, sample_idx == sj] = k_ij
+
     Kn = K.copy()
     for i in samples:
         curr_K = K.iloc[sample_idx == i, sample_idx == i]
@@ -270,23 +279,13 @@ def test_mnn_graph_matrix_theta():
             else:
                 curr_K = K.iloc[sample_idx == i, sample_idx == j]
                 curr_norm = norm(curr_K, 1, axis=1)
-                scale = np.minimum(
-                    np.ones(len(curr_norm)), i_norm / curr_norm) * beta
-                Kn.iloc[sample_idx == i, sample_idx == j] = (
-                    curr_K.T * scale).T
+                scale = np.minimum(1, i_norm / curr_norm) * beta
+                Kn.iloc[sample_idx == i,
+                        sample_idx == j] = curr_K.values * scale[:, None]
 
     K = Kn
-
-    K = np.array(K)
-
-    matrix_theta = pd.DataFrame(np.zeros((len(sample_idx), len(sample_idx))))
-    for ix, si in enumerate(set(sample_idx)):
-        for jx, sj in enumerate(set(sample_idx)):
-            matrix_theta.iloc[sample_idx == si,
-                              sample_idx == sj] = theta[ix, jx]
-
-    W = np.array((matrix_theta * np.minimum(K, K.T)) +
-                 ((1 - matrix_theta) * np.maximum(K, K.T)))
+    W = np.array((theta * np.minimum(K, K.T)) +
+                 ((1 - theta) * np.maximum(K, K.T)))
     np.fill_diagonal(W, 0)
     G = pygsp.graphs.Graph(W)
     G2 = graphtools.Graph(X, knn=k, decay=a, beta=beta,
@@ -295,8 +294,7 @@ def test_mnn_graph_matrix_theta():
                           use_pygsp=True)
     assert G.N == G2.N
     np.testing.assert_array_equal(G.dw, G2.dw)
-    assert (G.W != G2.W).nnz == 0
-    assert (G2.W != G.W).sum() == 0
+    np.testing.assert_array_equal((G.W - G2.W).data, 0)
     assert isinstance(G2, graphtools.graphs.MNNGraph)
 
 
@@ -329,7 +327,6 @@ def test_set_params():
         'theta': 0.5,
         'anisotropy': 0,
         'beta': 1,
-        'adaptive_k': None,
         'knn': 3,
         'decay': 10,
         'bandwidth': None,
@@ -356,10 +353,8 @@ def test_set_params():
     assert_raises(ValueError, G.set_params, distance='manhattan')
     assert_raises(ValueError, G.set_params, thresh=1e-3)
     assert_raises(ValueError, G.set_params, beta=0.2)
-    assert_raises(ValueError, G.set_params, adaptive_k='min')
     G.set_params(knn=G.knn,
                  decay=G.decay,
                  thresh=G.thresh,
                  distance=G.distance,
-                 beta=G.beta,
-                 adaptive_k=G.adaptive_k)
+                 beta=G.beta)

From dfd8321f774291bbad5b08575808f50c8fcc8ff7 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Wed, 6 Feb 2019 11:04:17 -0500
Subject: [PATCH 9/9] deprecate py2.7

---
 .travis.yml | 6 ++++--
 setup.py    | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 3c056a7..7640295 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,12 +1,14 @@
   language: python
   python:
-    - "2.7"
     - "3.5"
     - "3.6"
+    - "3.7-dev"
 
   sudo: required
 
-  cache: pip
+  cache:
+    - pip
+    - apt
 
   addons:
     apt:
diff --git a/setup.py b/setup.py
index f995b52..fe3176f 100644
--- a/setup.py
+++ b/setup.py
@@ -28,8 +28,8 @@
     'sphinxcontrib-bibtex'
 ]
 
-if sys.version_info[:2] < (2, 7) or (3, 0) <= sys.version_info[:2] < (3, 5):
-    raise RuntimeError("Python version 2.7 or >=3.5 required.")
+if sys.version_info[:2] < (3, 5):
+    raise RuntimeError("Python version >=3.5 required.")
 
 version_py = os.path.join(os.path.dirname(
     __file__), 'graphtools', 'version.py')