Skip to content

Commit 172bf22

Browse files
committed
Some kind of documentation
1 parent 1c8f490 commit 172bf22

File tree

3 files changed

+99
-91
lines changed

3 files changed

+99
-91
lines changed

use_cases/hand_written_digits.py

+35-47
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@
1414
from datafold.pcfold.kernels import PCManifoldKernel
1515

1616
import full_matrix
17-
from plot_embedding import plot_embedding
17+
from utils import plot_embedding
18+
from utils import sort_eigen_pairs
1819

1920
# Source code taken and adapted from https://scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html
2021

22+
"""Input variables required for the instantiation of FullMatrix"""
2123
executable = "./test_gofmm"
2224
problem_size = 1024
2325
max_leaf_node_size = 512
@@ -30,19 +32,17 @@
3032
matrix_type = "dense"
3133
kernel_type = "gaussian"
3234

35+
"""Loading the hand written digits from sklearn"""
3336
digits = datasets.load_digits(n_class=6)
3437
X = digits.data[0:problem_size,:]
3538
y = digits.target[0:problem_size]
3639
images = digits.images[0:problem_size,:,:]
37-
print("X",X)
38-
print("images",images)
39-
print("images shape",images.shape)
40-
print("target",y)
4140

4241
X_train, X_test, y_train, y_test, images_train, images_test = train_test_split(
4342
X, y, images, train_size=2 / 3, test_size=1 / 3
4443
)
4544

45+
""""Instantiation of point cloud data and find the manifold using DiffusionMaps"""
4646
X_pcm = pfold.PCManifold(X)
4747
X_pcm.optimize_parameters(result_scaling=2)
4848

@@ -59,38 +59,22 @@
5959
dmap = dmap.set_target_coords([1, 2])
6060
X_dmap = dmap.transform(X_pcm)
6161

62-
# Mapping of diffusion maps
63-
plot_embedding(
64-
X_dmap,
65-
y,
66-
images,
67-
title="Diffusion map embedding of the digits (time %.2fs)" % (time.time() - t0),
68-
)
69-
7062
dmap = DiffusionMaps(
7163
kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
7264
n_eigenpairs=6,
7365
dist_kwargs=dict(cut_off=X_pcm.cut_off),
7466
)
7567
dmap = dmap.fit(X_pcm)
76-
plot_pairwise_eigenvector(
77-
eigenvectors=dmap.eigenvectors_[:, 1:],
78-
n=0,
79-
idx_start=1,
80-
fig_params=dict(figsize=(10, 10)),
81-
scatter_params=dict(c=y),
82-
)
83-
84-
plt.savefig('hr_digits_dmap.png')
8568

69+
"""Compute the same kernel matrix with the same optimized datafold parameters, for the instantiation of FullMatrix"""
8670
pcm = pfold.PCManifold(X,
8771
kernel=pfold.DmapKernelFixed(internal_kernel=pfold.GaussianKernel(epsilon=378.0533464967807), is_stochastic=True, alpha=1, symmetrize_kernel=True),
8872
dist_kwargs=dict(cut_off=83.45058418010026, kmin=0, backend= "guess_optimal"))
8973

9074
kernel_output = pcm.compute_kernel_matrix()
9175
( kernel_matrix, cdist_kwargs, ret_extra, ) = PCManifoldKernel.read_kernel_output(kernel_output=kernel_output)
9276

93-
77+
"""Convert the kernel matrix to dense matrix type"""
9478
kernel_matrix_sparse = kernel_matrix.copy()
9579
kernel_matrix_sparse = kernel_matrix_sparse.asfptype()
9680
kernel_matrix = kernel_matrix.todense()
@@ -99,11 +83,10 @@
9983
weights = np.ones((problem_size, num_rhs))
10084

10185

86+
"""Instantiation of FullMatrix"""
10287
kernel_matrix_OP = full_matrix.FullMatrix( executable, problem_size, max_leaf_node_size,
10388
num_of_neighbors, max_off_diagonal_ranks, num_rhs, user_tolerance, computation_budget,
10489
distance_type, matrix_type, kernel_type, kernel_matrix, weights, dtype=np.float32 )
105-
print("weights shape",weights.shape)
106-
print("K shape",kernel_matrix.shape)
10790

10891
n_eigenpairs = 6
10992
solver_kwargs = {
@@ -116,53 +99,58 @@
11699
}
117100

118101
basis_change_matrix = ret_extra['basis_change_matrix']
119-
inv_basis_change_matrix = scipy.sparse.diags(np.reciprocal(basis_change_matrix.data.ravel()))
120102

121103
evals_all, evecs_all = scipy.sparse.linalg.eigsh(kernel_matrix_sparse, **solver_kwargs)
122104
evals_large, evecs_large = scipy.sparse.linalg.eigsh(kernel_matrix_OP, **solver_kwargs)
123105

124-
sort_scipy = np.argsort( evals_all )
125-
sort_scipy = sort_scipy[::-1]
126-
sorted_scipy_evals = evals_all[sort_scipy]
127-
sorted_scipy_evecs = evecs_all[:,sort_scipy]
128-
129-
sort_gofmm = np.argsort( evals_large )
130-
sort_gofmm = sort_gofmm[::-1]
131-
sorted_gofmm_evals = evals_large[sort_gofmm]
132-
sorted_gofmm_evecs = evecs_large[:,sort_gofmm]
133-
134-
sorted_gofmm_evecs = basis_change_matrix @ sorted_gofmm_evecs
135-
sorted_gofmm_evecs /= np.linalg.norm(sorted_gofmm_evecs, axis=0)[np.newaxis, :]
136-
137-
sorted_scipy_evecs = basis_change_matrix @ sorted_scipy_evecs
138-
sorted_scipy_evecs /= np.linalg.norm(sorted_scipy_evecs, axis=0)[np.newaxis, :]
106+
sort_eigen_pairs( evals_all, evecs_all, basis_change_matrix )
107+
sort_eigen_pairs( evals_large, evecs_large, basis_change_matrix )
139108

109+
"""Print eigen pairs and plot hand written digits, eigen vector comparisons"""
140110
print("eigenvalues of gofmm")
141-
print(sorted_gofmm_evals)
111+
print(evals_large)
142112
print("eigenvectors of gofmm sorted")
143-
print(sorted_gofmm_evecs)
113+
print(evecs_large)
144114
print("eigenvalues of scipy")
145-
print(sorted_scipy_evals)
115+
print(evals_all)
146116
print("eigenvectors of scipy")
147-
print(sorted_scipy_evecs)
117+
print(evecs_all)
148118
print("eigenvalues of datafold")
149119
print(dmap.eigenvalues_)
150120
print("eigenvectors of datafold")
151121
print(dmap.eigenvectors_)
152122

153123
plot_pairwise_eigenvector(
154-
eigenvectors=sorted_scipy_evecs[:, 1:],
124+
eigenvectors=evecs_all[:, 1:],
155125
n=0,
156126
idx_start=1,
157127
fig_params=dict(figsize=(10, 10)),
158128
scatter_params=dict(c=y),
159129
)
160130
plt.savefig('hr_digits_scipy.png')
161131
plot_pairwise_eigenvector(
162-
eigenvectors=sorted_gofmm_evecs[:, 1:],
132+
eigenvectors=evecs_large[:, 1:],
163133
n=0,
164134
idx_start=1,
165135
fig_params=dict(figsize=(10, 10)),
166136
scatter_params=dict(c=y),
167137
)
168138
plt.savefig('hr_digits_gofmm.png')
139+
140+
plot_pairwise_eigenvector(
141+
eigenvectors=dmap.eigenvectors_[:, 1:],
142+
n=0,
143+
idx_start=1,
144+
fig_params=dict(figsize=(10, 10)),
145+
scatter_params=dict(c=y),
146+
)
147+
plt.savefig('hr_digits_dmap.png')
148+
149+
plot_embedding(
150+
X_dmap,
151+
y,
152+
images,
153+
title="Diffusion map embedding of the digits (time %.2fs)" % (time.time() - t0),
154+
)
155+
156+

use_cases/mnist.py

+32-43
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@
2020
from tensorflow.keras import layers
2121

2222
import full_matrix
23-
from plot_embedding import plot_embedding
23+
from utils import plot_embedding
24+
from utils import sort_eigen_pairs
2425

26+
"""Input variables required for the instantiation of FullMatrix"""
2527
executable = "./test_gofmm"
2628
problem_size = 1024
2729
max_leaf_node_size = 512
@@ -34,6 +36,7 @@
3436
matrix_type = "dense"
3537
kernel_type = "gaussian"
3638

39+
"""Loading the hand written digits from MNISt"""
3740
num_classes = 10
3841
input_shape = (28, 28, 1)
3942

@@ -46,7 +49,7 @@
4649
X = images.reshape(problem_size, 784)
4750

4851

49-
#DATAFOLD stuff
52+
""""Instantiation of point cloud data and find the manifold using DiffusionMaps"""
5053
X_pcm = pfold.PCManifold(X)
5154
X_pcm.optimize_parameters(result_scaling=2)
5255

@@ -63,51 +66,33 @@
6366
dmap = dmap.set_target_coords([1, 2])
6467
X_dmap = dmap.transform(X_pcm)
6568

66-
# Mapping of diffusion maps
67-
plot_embedding(
68-
X_dmap,
69-
y,
70-
images,
71-
title="Diffusion map embedding of the digits (time %.2fs)" % (time.time() - t0),
72-
)
73-
7469
dmap = DiffusionMaps(
7570
kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
7671
n_eigenpairs=6,
7772
dist_kwargs=dict(cut_off=X_pcm.cut_off),
7873
)
7974
dmap = dmap.fit(X_pcm)
80-
plot_pairwise_eigenvector(
81-
eigenvectors=dmap.eigenvectors_[:, 1:],
82-
n=0,
83-
idx_start=1,
84-
fig_params=dict(figsize=(10, 10)),
85-
scatter_params=dict(c=y),
86-
)
87-
88-
plt.savefig('mnist_digits_dmap.png')
8975

76+
"""Compute the same kernel matrix with the same optimized datafold parameters, for the instantiation of FullMatrix"""
9077
pcm = pfold.PCManifold(X,
9178
kernel=pfold.DmapKernelFixed(internal_kernel=pfold.GaussianKernel(epsilon=24.44322087308319), is_stochastic=True, alpha=1, symmetrize_kernel=True),
9279
dist_kwargs=dict(cut_off=21.219348907470703, kmin=0, backend= "guess_optimal"))
9380

9481
kernel_output = pcm.compute_kernel_matrix()
9582
( kernel_matrix, cdist_kwargs, ret_extra, ) = PCManifoldKernel.read_kernel_output(kernel_output=kernel_output)
9683

97-
84+
"""Convert the kernel matrix to dense matrix type"""
9885
kernel_matrix_sparse = kernel_matrix.copy()
9986
kernel_matrix_sparse = kernel_matrix_sparse.asfptype()
10087
kernel_matrix = kernel_matrix.todense()
10188
kernel_matrix = kernel_matrix.astype("float32")
10289
#kernel_matrix.tofile("KernelMatrix_32768.bin")
10390
weights = np.ones((problem_size, num_rhs))
10491

105-
#GOFMM stuff
92+
"""Instantiation of FullMatrix"""
10693
kernel_matrix_OP = full_matrix.FullMatrix( executable, problem_size, max_leaf_node_size,
10794
num_of_neighbors, max_off_diagonal_ranks, num_rhs, user_tolerance, computation_budget,
10895
distance_type, matrix_type, kernel_type, kernel_matrix, weights, dtype=np.float32 )
109-
print("weights shape",weights.shape)
110-
print("K shape",kernel_matrix.shape)
11196

11297
n_eigenpairs = 6
11398
solver_kwargs = {
@@ -120,35 +105,22 @@
120105
}
121106

122107
basis_change_matrix = ret_extra['basis_change_matrix']
123-
inv_basis_change_matrix = scipy.sparse.diags(np.reciprocal(basis_change_matrix.data.ravel()))
124108

125109
evals_all, evecs_all = scipy.sparse.linalg.eigsh(kernel_matrix_sparse, **solver_kwargs)
126110
evals_large, evecs_large = scipy.sparse.linalg.eigsh(kernel_matrix_OP, **solver_kwargs)
127111

128-
sort_scipy = np.argsort( evals_all )
129-
sort_scipy = sort_scipy[::-1]
130-
sorted_scipy_evals = evals_all[sort_scipy]
131-
sorted_scipy_evecs = evecs_all[:,sort_scipy]
132-
133-
sort_gofmm = np.argsort( evals_large )
134-
sort_gofmm = sort_gofmm[::-1]
135-
sorted_gofmm_evals = evals_large[sort_gofmm]
136-
sorted_gofmm_evecs = evecs_large[:,sort_gofmm]
137-
138-
sorted_gofmm_evecs = basis_change_matrix @ sorted_gofmm_evecs
139-
sorted_gofmm_evecs /= np.linalg.norm(sorted_gofmm_evecs, axis=0)[np.newaxis, :]
140-
141-
sorted_scipy_evecs = basis_change_matrix @ sorted_scipy_evecs
142-
sorted_scipy_evecs /= np.linalg.norm(sorted_scipy_evecs, axis=0)[np.newaxis, :]
112+
sort_eigen_pairs( evals_all, evecs_all, basis_change_matrix )
113+
sort_eigen_pairs( evals_large, evecs_large, basis_change_matrix )
143114

115+
"""Print eigen pairs and plot hand written digits, eigen vector comparisons"""
144116
print("eigenvalues of gofmm")
145-
print(sorted_gofmm_evals)
117+
print(evecs_large)
146118
print("eigenvectors of gofmm sorted")
147-
print(sorted_gofmm_evecs)
119+
print(evecs_large)
148120
print("eigenvalues of scipy")
149-
print(sorted_scipy_evals)
121+
print(evals_all)
150122
print("eigenvectors of scipy")
151-
print(sorted_scipy_evecs)
123+
print(evals_all)
152124
print("eigenvalues of datafold")
153125
print(dmap.eigenvalues_)
154126
print("eigenvectors of datafold")
@@ -170,3 +142,20 @@
170142
scatter_params=dict(c=y),
171143
)
172144
plt.savefig('mnist_digits_gofmm.png')
145+
146+
plot_pairwise_eigenvector(
147+
eigenvectors=dmap.eigenvectors_[:, 1:],
148+
n=0,
149+
idx_start=1,
150+
fig_params=dict(figsize=(10, 10)),
151+
scatter_params=dict(c=y),
152+
)
153+
154+
plt.savefig('mnist_digits_dmap.png')
155+
156+
plot_embedding(
157+
X_dmap,
158+
y,
159+
images,
160+
title="Diffusion map embedding of the digits (time %.2fs)" % (time.time() - t0),
161+
)

use_cases/plot_embedding.py use_cases/utils.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,16 @@
33
from matplotlib import image, offsetbox
44

55
def plot_embedding(X, y, digits, title=None):
6-
"""Scale and visualize the embedding vectors"""
6+
"""Scale and visualize the embedding vectors.
7+
8+
X
9+
Data set of images of shape ( problem_size, 784)
10+
y
11+
Target labels of shape ( problem_size, )
12+
title
13+
Title of the plot
14+
15+
"""
716
x_min, x_max = np.min(X, 0), np.max(X, 0)
817
X = (X - x_min) / (x_max - x_min)
918

@@ -37,3 +46,25 @@ def plot_embedding(X, y, digits, title=None):
3746
if title is not None:
3847
plt.title(title)
3948
plt.savefig("digits.png")
49+
50+
def sort_eigen_pairs( evals, evecs, basis_change_matrix ):
51+
"""Sorts eigen values in descending order and
52+
orders eigen vectors in the corresponding order.
53+
Also does math similar to datafold methods.
54+
55+
evals
56+
Eigen values to be sorted
57+
evecs
58+
Eigen vectors to be sorted
59+
basis_change_matrix
60+
The changed basis obtained from datafold
61+
62+
"""
63+
sort_order = np.argsort( evals )
64+
sort_order = sort_order[::-1]
65+
evals[:] = evals[sort_order]
66+
evecs[:,:] = evecs[:,sort_order]
67+
68+
evecs[:,:] = basis_change_matrix @ evecs
69+
evecs[:,:] /= np.linalg.norm(evecs, axis=0)[np.newaxis, :]
70+

0 commit comments

Comments
 (0)