From ba74c5f4e2963359bcab1f4374a69457b5456c94 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 21 Jun 2018 12:10:51 -0400
Subject: [PATCH 1/7] bump version

---
 graphtools/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphtools/version.py b/graphtools/version.py
index 0a8da88..02a5c48 100644
--- a/graphtools/version.py
+++ b/graphtools/version.py
@@ -1 +1 @@
-__version__ = "0.1.6"
+__version__ = "0.1.7rc"

From e9e57c6b1de8e149b96f6b9d537478bd51ad8230 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 21 Jun 2018 15:10:51 -0400
Subject: [PATCH 2/7] replace len with shape[0] for sparse matrix compatibility

---
 graphtools/graphs.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index 818d552..a2d0268 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -244,7 +244,7 @@ def build_kernel_to_data(self, Y, knn=None):
             if len(update_idx) > 0:
                 distances = [d for d in distances]
                 indices = [i for i in indices]
-            while len(update_idx) > len(Y) // 10 and \
+            while len(update_idx) > Y.shape[0] // 10 and \
                     search_knn < self.data_nu.shape[0] / 2:
                 # increase the knn search
                 search_knn = min(search_knn * 20, self.data_nu.shape[0])
@@ -829,9 +829,9 @@ def __init__(self, data, sample_idx,
         if sample_idx is None:
             raise ValueError("sample_idx must be given. For a graph without"
                              " batch correction, use kNNGraph.")
-        elif len(sample_idx) != len(data):
+        elif len(sample_idx) != data.shape[0]:
             raise ValueError("sample_idx ({}) must be the same length as "
-                             "data ({})".format(len(sample_idx), len(data)))
+                             "data ({})".format(len(sample_idx), data.shape[0]))
         elif len(self.samples) == 1:
             raise ValueError(
                 "sample_idx must contain more than one unique value")
@@ -1092,7 +1092,7 @@ def build_kernel_to_data(self, Y, gamma=None):
         kernel_yx = []
         # don't really need within Y kernel
         Y_graph = kNNGraph(Y, n_pca=None, knn=0, **(self.knn_args))
-        y_knn = self._weight_knn(sample_size=len(Y))
+        y_knn = self._weight_knn(sample_size=Y.shape[0])
         for i, X in enumerate(self.subgraphs):
             kernel_xy.append(X.build_kernel_to_data(
                 Y, knn=self.weighted_knn[i]))  # kernel X -> Y

From 5932b749931ceed3a92b0ac8d7eb2c1fb4d01000 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 21 Jun 2018 15:11:17 -0400
Subject: [PATCH 3/7] allow data to be sparse

---
 graphtools/base.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/graphtools/base.py b/graphtools/base.py
index 33c532d..869435d 100644
--- a/graphtools/base.py
+++ b/graphtools/base.py
@@ -148,10 +148,13 @@ def _reduce_data(self):
             log_complete("PCA")
             return data_nu
         else:
-            data = self.data
-            if sparse.issparse(data):
-                data = data.toarray()
-            return data
+            data_nu = self.data
+            if sparse.issparse(data_nu) and not isinstance(
+                    data_nu, (sparse.csr_matrix,
+                              sparse.csc_matrix,
+                              sparse.bsr_matrix)):
+                data_nu = data_nu.tocsr()
+            return data_nu
 
     def get_params(self):
         """Get parameters from this object

From 7a1248c38c4847c0590ee689fc1e1bc4cd52809b Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 21 Jun 2018 15:11:56 -0400
Subject: [PATCH 4/7] make landmark mnn test fast

---
 test/test_landmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_landmark.py b/test/test_landmark.py
index 18c5b0e..93926b0 100644
--- a/test/test_landmark.py
+++ b/test/test_landmark.py
@@ -54,7 +54,7 @@ def test_landmark_knn_graph():
 def test_landmark_mnn_graph():
     n_landmark = 500
     # mnn graph
-    select_idx = np.random.choice([True, False], len(data), replace=True)
+    select_idx = np.random.choice(len(data), len(data) // 5, replace=False)
     G = build_graph(data[select_idx], n_landmark=n_landmark,
                     thresh=1e-5, n_pca=20,
                     decay=10, knn=5, random_state=42,

From 357a235ca6732b0bda2b6c43985e08a107d6fa18 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 21 Jun 2018 15:12:05 -0400
Subject: [PATCH 5/7] test sparse input

---
 test/load_tests/__init__.py |  2 +-
 test/test_exact.py          | 95 +++++++++++++++++++++++++++++++++++++
 test/test_knn.py            | 35 ++++++++++++++
 3 files changed, 131 insertions(+), 1 deletion(-)

diff --git a/test/load_tests/__init__.py b/test/load_tests/__init__.py
index 4ae5e10..cbeb8e1 100644
--- a/test/load_tests/__init__.py
+++ b/test/load_tests/__init__.py
@@ -1,4 +1,4 @@
-from sklearn.decomposition import PCA
+from sklearn.decomposition import PCA, TruncatedSVD
 from sklearn import datasets
 from scipy.spatial.distance import pdist, cdist, squareform
 import pygsp
diff --git a/test/test_exact.py b/test/test_exact.py
index 6f9ed3b..5f86fbc 100644
--- a/test/test_exact.py
+++ b/test/test_exact.py
@@ -12,6 +12,7 @@
     squareform,
     pdist,
     PCA,
+    TruncatedSVD
 )
 
 #####################################################
@@ -186,6 +187,100 @@ def test_truncated_exact_graph():
     assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
 
 
+def test_truncated_exact_graph_sparse():
+    k = 3
+    a = 13
+    n_pca = 20
+    thresh = 1e-4
+    data_small = data[np.random.choice(
+        len(data), len(data) // 2, replace=False)]
+    pca = TruncatedSVD(n_pca,
+                       random_state=42).fit(data_small)
+    data_small_nu = pca.transform(data_small)
+    pdx = squareform(pdist(data_small_nu, metric='euclidean'))
+    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
+    epsilon = np.max(knn_dist, axis=1)
+    weighted_pdx = (pdx.T / epsilon).T
+    K = np.exp(-1 * weighted_pdx**a)
+    K[K < thresh] = 0
+    W = K + K.T
+    W = np.divide(W, 2)
+    np.fill_diagonal(W, 0)
+    G = pygsp.graphs.Graph(W)
+    G2 = build_graph(sp.coo_matrix(data_small), thresh=thresh,
+                     graphtype='exact',
+                     n_pca=n_pca,
+                     decay=a, knn=k, random_state=42,
+                     use_pygsp=True)
+    assert(G.N == G2.N)
+    np.testing.assert_allclose(G2.W.toarray(), G.W.toarray())
+    assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
+    G2 = build_graph(sp.bsr_matrix(pdx), n_pca=None, precomputed='distance',
+                     thresh=thresh,
+                     decay=a, knn=k, random_state=42, use_pygsp=True)
+    assert(G.N == G2.N)
+    assert(np.all(G.d == G2.d))
+    assert((G.W != G2.W).nnz == 0)
+    assert((G2.W != G.W).sum() == 0)
+    assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
+    G2 = build_graph(sp.lil_matrix(K), n_pca=None,
+                     precomputed='affinity',
+                     thresh=thresh,
+                     random_state=42, use_pygsp=True)
+    assert(G.N == G2.N)
+    assert(np.all(G.d == G2.d))
+    assert((G.W != G2.W).nnz == 0)
+    assert((G2.W != G.W).sum() == 0)
+    assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
+    G2 = build_graph(sp.dok_matrix(W), n_pca=None,
+                     precomputed='adjacency',
+                     random_state=42, use_pygsp=True)
+    assert(G.N == G2.N)
+    assert(np.all(G.d == G2.d))
+    assert((G.W != G2.W).nnz == 0)
+    assert((G2.W != G.W).sum() == 0)
+    assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
+
+
+def test_truncated_exact_graph_no_pca():
+    k = 3
+    a = 13
+    n_pca = None
+    thresh = 1e-4
+    data_small = data[np.random.choice(
+        len(data), len(data) // 10, replace=False)]
+    pdx = squareform(pdist(data_small, metric='euclidean'))
+    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
+    epsilon = np.max(knn_dist, axis=1)
+    weighted_pdx = (pdx.T / epsilon).T
+    K = np.exp(-1 * weighted_pdx**a)
+    K[K < thresh] = 0
+    W = K + K.T
+    W = np.divide(W, 2)
+    np.fill_diagonal(W, 0)
+    G = pygsp.graphs.Graph(W)
+    G2 = build_graph(data_small, thresh=thresh,
+                     graphtype='exact',
+                     n_pca=n_pca,
+                     decay=a, knn=k, random_state=42,
+                     use_pygsp=True)
+    assert(G.N == G2.N)
+    assert(np.all(G.d == G2.d))
+    assert((G.W != G2.W).nnz == 0)
+    assert((G2.W != G.W).sum() == 0)
+    assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
+    G2 = build_graph(sp.csr_matrix(data_small), thresh=thresh,
+                     graphtype='exact',
+                     n_pca=n_pca,
+                     decay=a, knn=k, random_state=42,
+                     use_pygsp=True)
+    assert(G.N == G2.N)
+    assert(np.all(G.d == G2.d))
+    assert((G.W != G2.W).nnz == 0)
+    assert((G2.W != G.W).sum() == 0)
+    assert(isinstance(G2, graphtools.graphs.TraditionalGraph))
+
+
 #####################################################
 # Check interpolation
 #####################################################
diff --git a/test/test_knn.py b/test/test_knn.py
index d221934..09b90a4 100644
--- a/test/test_knn.py
+++ b/test/test_knn.py
@@ -1,16 +1,19 @@
 from load_tests import (
     graphtools,
     np,
+    sp,
     pygsp,
     nose2,
     data,
     datasets,
     build_graph,
     assert_raises,
+    warns,
     raises,
     squareform,
     pdist,
     PCA,
+    TruncatedSVD,
 )
 
 
@@ -66,6 +69,31 @@ def test_knn_graph():
     assert(isinstance(G2, graphtools.graphs.kNNGraph))
 
 
+def test_knn_graph_sparse():
+    k = 3
+    n_pca = 20
+    pca = TruncatedSVD(n_pca, random_state=42).fit(data)
+    data_nu = pca.transform(data)
+    pdx = squareform(pdist(data_nu, metric='euclidean'))
+    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
+    epsilon = np.max(knn_dist, axis=1)
+    K = np.empty_like(pdx)
+    for i in range(len(pdx)):
+        K[i, pdx[i, :] <= epsilon[i]] = 1
+        K[i, pdx[i, :] > epsilon[i]] = 0
+
+    K = K + K.T
+    W = np.divide(K, 2)
+    np.fill_diagonal(W, 0)
+    G = pygsp.graphs.Graph(W)
+    G2 = build_graph(sp.coo_matrix(data), n_pca=n_pca,
+                     decay=None, knn=k, random_state=42,
+                     use_pygsp=True)
+    assert(G.N == G2.N)
+    np.testing.assert_allclose(G2.W.toarray(), G.W.toarray())
+    assert(isinstance(G2, graphtools.graphs.kNNGraph))
+
+
 def test_sparse_alpha_knn_graph():
     data = datasets.make_swiss_roll()[0]
     k = 5
@@ -88,6 +116,13 @@ def test_sparse_alpha_knn_graph():
     assert(isinstance(G2, graphtools.graphs.kNNGraph))
 
 
+@warns(UserWarning)
+def test_knn_graph_sparse_no_pca():
+    build_graph(sp.coo_matrix(data), n_pca=None,  # n_pca,
+                decay=10, knn=3, thresh=1e-4,
+                random_state=42, use_pygsp=True)
+
+
 #####################################################
 # Check interpolation
 #####################################################

From a86af43be2c4b33f813cab2f18f506851eb7ac98 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 21 Jun 2018 15:12:37 -0400
Subject: [PATCH 6/7] bump version

---
 graphtools/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphtools/version.py b/graphtools/version.py
index 02a5c48..f1380ee 100644
--- a/graphtools/version.py
+++ b/graphtools/version.py
@@ -1 +1 @@
-__version__ = "0.1.7rc"
+__version__ = "0.1.7"

From d23c72f05adf7b4c4e0c5bb3882e5e7c3b33a91d Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 21 Jun 2018 15:18:05 -0400
Subject: [PATCH 7/7] make faster mnn landmark test pass

---
 test/test_landmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_landmark.py b/test/test_landmark.py
index 93926b0..896388d 100644
--- a/test/test_landmark.py
+++ b/test/test_landmark.py
@@ -52,7 +52,7 @@ def test_landmark_knn_graph():
 
 
 def test_landmark_mnn_graph():
-    n_landmark = 500
+    n_landmark = 150
     # mnn graph
     select_idx = np.random.choice(len(data), len(data) // 5, replace=False)
     G = build_graph(data[select_idx], n_landmark=n_landmark,