|
14 | 14 | from datafold.pcfold.kernels import PCManifoldKernel
|
15 | 15 |
|
16 | 16 | import full_matrix
|
17 |
| -from plot_embedding import plot_embedding |
| 17 | +from utils import plot_embedding |
| 18 | +from utils import sort_eigen_pairs |
18 | 19 |
|
19 | 20 | # Source code taken and adapted from https://scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html
|
20 | 21 |
|
| 22 | +"""Input variables required for the instantiation of FullMatrix""" |
21 | 23 | executable = "./test_gofmm"
|
22 | 24 | problem_size = 1024
|
23 | 25 | max_leaf_node_size = 512
|
|
30 | 32 | matrix_type = "dense"
|
31 | 33 | kernel_type = "gaussian"
|
32 | 34 |
|
| 35 | +"""Loading the hand written digits from sklearn""" |
33 | 36 | digits = datasets.load_digits(n_class=6)
|
34 | 37 | X = digits.data[0:problem_size,:]
|
35 | 38 | y = digits.target[0:problem_size]
|
36 | 39 | images = digits.images[0:problem_size,:,:]
|
37 |
| -print("X",X) |
38 |
| -print("images",images) |
39 |
| -print("images shape",images.shape) |
40 |
| -print("target",y) |
41 | 40 |
|
42 | 41 | X_train, X_test, y_train, y_test, images_train, images_test = train_test_split(
|
43 | 42 | X, y, images, train_size=2 / 3, test_size=1 / 3
|
44 | 43 | )
|
45 | 44 |
|
| 45 | +""""Instantiation of point cloud data and find the manifold using DiffusionMaps""" |
46 | 46 | X_pcm = pfold.PCManifold(X)
|
47 | 47 | X_pcm.optimize_parameters(result_scaling=2)
|
48 | 48 |
|
|
59 | 59 | dmap = dmap.set_target_coords([1, 2])
|
60 | 60 | X_dmap = dmap.transform(X_pcm)
|
61 | 61 |
|
62 |
| -# Mapping of diffusion maps |
63 |
| -plot_embedding( |
64 |
| - X_dmap, |
65 |
| - y, |
66 |
| - images, |
67 |
| - title="Diffusion map embedding of the digits (time %.2fs)" % (time.time() - t0), |
68 |
| -) |
69 |
| - |
70 | 62 | dmap = DiffusionMaps(
|
71 | 63 | kernel=pfold.GaussianKernel(epsilon=X_pcm.kernel.epsilon),
|
72 | 64 | n_eigenpairs=6,
|
73 | 65 | dist_kwargs=dict(cut_off=X_pcm.cut_off),
|
74 | 66 | )
|
75 | 67 | dmap = dmap.fit(X_pcm)
|
76 |
| -plot_pairwise_eigenvector( |
77 |
| - eigenvectors=dmap.eigenvectors_[:, 1:], |
78 |
| - n=0, |
79 |
| - idx_start=1, |
80 |
| - fig_params=dict(figsize=(10, 10)), |
81 |
| - scatter_params=dict(c=y), |
82 |
| -) |
83 |
| - |
84 |
| -plt.savefig('hr_digits_dmap.png') |
85 | 68 |
|
| 69 | +"""Compute the same kernel matrix with the same optimized datafold parameters, for the instantiation of FullMatrix""" |
86 | 70 | pcm = pfold.PCManifold(X,
|
87 | 71 | kernel=pfold.DmapKernelFixed(internal_kernel=pfold.GaussianKernel(epsilon=378.0533464967807), is_stochastic=True, alpha=1, symmetrize_kernel=True),
|
88 | 72 | dist_kwargs=dict(cut_off=83.45058418010026, kmin=0, backend= "guess_optimal"))
|
89 | 73 |
|
90 | 74 | kernel_output = pcm.compute_kernel_matrix()
|
91 | 75 | ( kernel_matrix, cdist_kwargs, ret_extra, ) = PCManifoldKernel.read_kernel_output(kernel_output=kernel_output)
|
92 | 76 |
|
93 |
| - |
| 77 | +"""Convert the kernel matrix to dense matrix type""" |
94 | 78 | kernel_matrix_sparse = kernel_matrix.copy()
|
95 | 79 | kernel_matrix_sparse = kernel_matrix_sparse.asfptype()
|
96 | 80 | kernel_matrix = kernel_matrix.todense()
|
|
99 | 83 | weights = np.ones((problem_size, num_rhs))
|
100 | 84 |
|
101 | 85 |
|
| 86 | +"""Instantiation of FullMatrix""" |
102 | 87 | kernel_matrix_OP = full_matrix.FullMatrix( executable, problem_size, max_leaf_node_size,
|
103 | 88 | num_of_neighbors, max_off_diagonal_ranks, num_rhs, user_tolerance, computation_budget,
|
104 | 89 | distance_type, matrix_type, kernel_type, kernel_matrix, weights, dtype=np.float32 )
|
105 |
| -print("weights shape",weights.shape) |
106 |
| -print("K shape",kernel_matrix.shape) |
107 | 90 |
|
108 | 91 | n_eigenpairs = 6
|
109 | 92 | solver_kwargs = {
|
|
116 | 99 | }
|
117 | 100 |
|
118 | 101 | basis_change_matrix = ret_extra['basis_change_matrix']
|
119 |
| -inv_basis_change_matrix = scipy.sparse.diags(np.reciprocal(basis_change_matrix.data.ravel())) |
120 | 102 |
|
121 | 103 | evals_all, evecs_all = scipy.sparse.linalg.eigsh(kernel_matrix_sparse, **solver_kwargs)
|
122 | 104 | evals_large, evecs_large = scipy.sparse.linalg.eigsh(kernel_matrix_OP, **solver_kwargs)
|
123 | 105 |
|
124 |
| -sort_scipy = np.argsort( evals_all ) |
125 |
| -sort_scipy = sort_scipy[::-1] |
126 |
| -sorted_scipy_evals = evals_all[sort_scipy] |
127 |
| -sorted_scipy_evecs = evecs_all[:,sort_scipy] |
128 |
| - |
129 |
| -sort_gofmm = np.argsort( evals_large ) |
130 |
| -sort_gofmm = sort_gofmm[::-1] |
131 |
| -sorted_gofmm_evals = evals_large[sort_gofmm] |
132 |
| -sorted_gofmm_evecs = evecs_large[:,sort_gofmm] |
133 |
| - |
134 |
| -sorted_gofmm_evecs = basis_change_matrix @ sorted_gofmm_evecs |
135 |
| -sorted_gofmm_evecs /= np.linalg.norm(sorted_gofmm_evecs, axis=0)[np.newaxis, :] |
136 |
| - |
137 |
| -sorted_scipy_evecs = basis_change_matrix @ sorted_scipy_evecs |
138 |
| -sorted_scipy_evecs /= np.linalg.norm(sorted_scipy_evecs, axis=0)[np.newaxis, :] |
| 106 | +sort_eigen_pairs( evals_all, evecs_all, basis_change_matrix ) |
| 107 | +sort_eigen_pairs( evals_large, evecs_large, basis_change_matrix ) |
139 | 108 |
|
| 109 | +"""Print eigen pairs and plot hand written digits, eigen vector comparisons""" |
140 | 110 | print("eigenvalues of gofmm")
|
141 |
| -print(sorted_gofmm_evals) |
| 111 | +print(evals_large) |
142 | 112 | print("eigenvectors of gofmm sorted")
|
143 |
| -print(sorted_gofmm_evecs) |
| 113 | +print(evecs_large) |
144 | 114 | print("eigenvalues of scipy")
|
145 |
| -print(sorted_scipy_evals) |
| 115 | +print(evals_all) |
146 | 116 | print("eigenvectors of scipy")
|
147 |
| -print(sorted_scipy_evecs) |
| 117 | +print(evecs_all) |
148 | 118 | print("eigenvalues of datafold")
|
149 | 119 | print(dmap.eigenvalues_)
|
150 | 120 | print("eigenvectors of datafold")
|
151 | 121 | print(dmap.eigenvectors_)
|
152 | 122 |
|
153 | 123 | plot_pairwise_eigenvector(
|
154 |
| - eigenvectors=sorted_scipy_evecs[:, 1:], |
| 124 | + eigenvectors=evecs_all[:, 1:], |
155 | 125 | n=0,
|
156 | 126 | idx_start=1,
|
157 | 127 | fig_params=dict(figsize=(10, 10)),
|
158 | 128 | scatter_params=dict(c=y),
|
159 | 129 | )
|
160 | 130 | plt.savefig('hr_digits_scipy.png')
|
161 | 131 | plot_pairwise_eigenvector(
|
162 |
| - eigenvectors=sorted_gofmm_evecs[:, 1:], |
| 132 | + eigenvectors=evecs_large[:, 1:], |
163 | 133 | n=0,
|
164 | 134 | idx_start=1,
|
165 | 135 | fig_params=dict(figsize=(10, 10)),
|
166 | 136 | scatter_params=dict(c=y),
|
167 | 137 | )
|
168 | 138 | plt.savefig('hr_digits_gofmm.png')
|
| 139 | + |
| 140 | +plot_pairwise_eigenvector( |
| 141 | + eigenvectors=dmap.eigenvectors_[:, 1:], |
| 142 | + n=0, |
| 143 | + idx_start=1, |
| 144 | + fig_params=dict(figsize=(10, 10)), |
| 145 | + scatter_params=dict(c=y), |
| 146 | +) |
| 147 | +plt.savefig('hr_digits_dmap.png') |
| 148 | + |
| 149 | +plot_embedding( |
| 150 | + X_dmap, |
| 151 | + y, |
| 152 | + images, |
| 153 | + title="Diffusion map embedding of the digits (time %.2fs)" % (time.time() - t0), |
| 154 | +) |
| 155 | + |
| 156 | + |
0 commit comments