-
Notifications
You must be signed in to change notification settings - Fork 0
/
ex7_pca.m
235 lines (182 loc) · 7.06 KB
/
ex7_pca.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
%% Machine Learning Online Class
% Exercise 7 | Principle Component Analysis and K-Means Clustering
%
% Instructions
% ------------
%
% This file contains code that helps you get started on the
% exercise. You will need to complete the following functions:
%
% pca.m
% projectData.m
% recoverData.m
% computeCentroids.m
% findClosestCentroids.m
% kMeansInitCentroids.m
%
% For this exercise, you will not need to change any code in this file,
% or any other files other than those mentioned above.
%
%% Initialization
clear ; close all; clc
%% ================== Part 1: Load Example Dataset ===================
% We start this exercise by using a small dataset that is easily to
% visualize
%
fprintf('Visualizing example dataset for PCA.\n\n');
% The following command loads the dataset. You should now have the
% variable X in your environment
load ('ex7data1.mat');
% Visualize the example dataset
plot(X(:, 1), X(:, 2), 'bo');
axis([0.5 6.5 2 8]); axis square;
fprintf('Program paused. Press enter to continue.\n');
pause;
%% =============== Part 2: Principal Component Analysis ===============
% You should now implement PCA, a dimension reduction technique. You
% should complete the code in pca.m
%
fprintf('\nRunning PCA on example dataset.\n\n');
% Before running PCA, it is important to first normalize X
[X_norm, mu, sigma] = featureNormalize(X);
% Run PCA
[U, S] = pca(X_norm);
% Compute mu, the mean of the each feature
% Draw the eigenvectors centered at mean of data. These lines show the
% directions of maximum variations in the dataset.
hold on;
drawLine(mu, mu + 1.5 * S(1,1) * U(:,1)', '-k', 'LineWidth', 2);
drawLine(mu, mu + 1.5 * S(2,2) * U(:,2)', '-k', 'LineWidth', 2);
hold off;
fprintf('Top eigenvector: \n');
fprintf(' U(:,1) = %f %f \n', U(1,1), U(2,1));
fprintf('\n(you should expect to see -0.707107 -0.707107)\n');
fprintf('Program paused. Press enter to continue.\n');
pause;
%% =================== Part 3: Dimension Reduction ===================
% You should now implement the projection step to map the data onto the
% first k eigenvectors. The code will then plot the data in this reduced
% dimensional space. This will show you what the data looks like when
% using only the corresponding eigenvectors to reconstruct it.
%
% You should complete the code in projectData.m
%
fprintf('\nDimension reduction on example dataset.\n\n');
% Plot the normalized dataset (returned from pca)
plot(X_norm(:, 1), X_norm(:, 2), 'bo');
axis([-4 3 -4 3]); axis square
% Project the data onto K = 1 dimension
K = 1;
Z = projectData(X_norm, U, K);
fprintf('Projection of the first example: %f\n', Z(1));
fprintf('\n(this value should be about 1.481274)\n\n');
X_rec = recoverData(Z, U, K);
fprintf('Approximation of the first example: %f %f\n', X_rec(1, 1), X_rec(1, 2));
fprintf('\n(this value should be about -1.047419 -1.047419)\n\n');
% Draw lines connecting the projected points to the original points
hold on;
plot(X_rec(:, 1), X_rec(:, 2), 'ro');
for i = 1:size(X_norm, 1)
drawLine(X_norm(i,:), X_rec(i,:), '--k', 'LineWidth', 1);
end
hold off
fprintf('Program paused. Press enter to continue.\n');
pause;
%% =============== Part 4: Loading and Visualizing Face Data =============
% We start the exercise by first loading and visualizing the dataset.
% The following code will load the dataset into your environment
%
fprintf('\nLoading face dataset.\n\n');
% Load Face dataset
load ('ex7faces.mat')
% Display the first 100 faces in the dataset
displayData(X(1:100, :));
fprintf('Program paused. Press enter to continue.\n');
pause;
%% =========== Part 5: PCA on Face Data: Eigenfaces ===================
% Run PCA and visualize the eigenvectors which are in this case eigenfaces
% We display the first 36 eigenfaces.
%
fprintf(['\nRunning PCA on face dataset.\n' ...
'(this might take a minute or two ...)\n\n']);
% Before running PCA, it is important to first normalize X by subtracting
% the mean value from each feature
[X_norm, mu, sigma] = featureNormalize(X);
% Run PCA
[U, S] = pca(X_norm);
% Visualize the top 36 eigenvectors found
displayData(U(:, 1:36)');
fprintf('Program paused. Press enter to continue.\n');
pause;
%% ============= Part 6: Dimension Reduction for Faces =================
% Project images to the eigen space using the top k eigenvectors
% If you are applying a machine learning algorithm
fprintf('\nDimension reduction for face dataset.\n\n');
K = 100;
Z = projectData(X_norm, U, K);
fprintf('The projected data Z has a size of: ')
fprintf('%d ', size(Z));
fprintf('\n\nProgram paused. Press enter to continue.\n');
pause;
%% ==== Part 7: Visualization of Faces after PCA Dimension Reduction ====
% Project images to the eigen space using the top K eigen vectors and
% visualize only using those K dimensions
% Compare to the original input, which is also displayed
fprintf('\nVisualizing the projected (reduced dimension) faces.\n\n');
K = 100;
X_rec = recoverData(Z, U, K);
% Display normalized data
subplot(1, 2, 1);
displayData(X_norm(1:100,:));
title('Original faces');
axis square;
% Display reconstructed data from only k eigenfaces
subplot(1, 2, 2);
displayData(X_rec(1:100,:));
title('Recovered faces');
axis square;
fprintf('Program paused. Press enter to continue.\n');
pause;
%% === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization ===
% One useful application of PCA is to use it to visualize high-dimensional
% data. In the last K-Means exercise you ran K-Means on 3-dimensional
% pixel colors of an image. We first visualize this output in 3D, and then
% apply PCA to obtain a visualization in 2D.
close all; close all; clc
% Reload the image from the previous exercise and run K-Means on it
% For this to work, you need to complete the K-Means assignment first
A = double(imread('bird_small.png'));
% If imread does not work for you, you can try instead
% load ('bird_small.mat');
A = A / 255;
img_size = size(A);
X = reshape(A, img_size(1) * img_size(2), 3);
K = 16;
max_iters = 10;
initial_centroids = kMeansInitCentroids(X, K);
[centroids, idx] = runkMeans(X, initial_centroids, max_iters);
% Sample 1000 random indexes (since working with all the data is
% too expensive. If you have a fast computer, you may increase this.
sel = floor(rand(1000, 1) * size(X, 1)) + 1;
% Setup Color Palette
palette = hsv(K);
colors = palette(idx(sel), :);
% Visualize the data and centroid memberships in 3D
figure;
scatter3(X(sel, 1), X(sel, 2), X(sel, 3), 10, colors);
title('Pixel dataset plotted in 3D. Color shows centroid memberships');
fprintf('Program paused. Press enter to continue.\n');
pause;
%% === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
% Use PCA to project this cloud to 2D for visualization
% Subtract the mean to use PCA
[X_norm, mu, sigma] = featureNormalize(X);
% PCA and project the data to 2D
[U, S] = pca(X_norm);
Z = projectData(X_norm, U, 2);
% Plot in 2D
figure;
plotDataPoints(Z(sel, :), idx(sel), K);
title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction');
fprintf('Program paused. Press enter to continue.\n');
pause;