-
Notifications
You must be signed in to change notification settings - Fork 1
/
pca_numpy.py
151 lines (113 loc) · 4.98 KB
/
pca_numpy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import numpy as np
np.random.seed(1)
mu_vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class1_sample = np.random.multivariate_normal(mu_vec1,cov_mat1, 20).T
print class1_sample
assert class1_sample.shape == (3,20), "The matrix has not the dimensions 3x20"
mu_vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class2_sample = np.random.multivariate_normal(mu_vec2,cov_mat2, 20).T
print class2_sample
assert class2_sample.shape == (3,20), "The matrix has not the dimensions 3x20"
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111, projection='3d')
ax.plot(class1_sample[0,:], class1_sample[1,:],
class1_sample[2,:],'o', markersize=8, color='blue', alpha=0.5, label='class1')
ax.plot(class2_sample[0,:], class2_sample[1,:],
class2_sample[2,:],'^', markersize=8, color='red', alpha=0.5, label='class2')
plt.title("Samples")
plt.show()
#b/c we dont need class labels lets mrege the 2 samples
all_samples = np.concatenate((class1_sample, class2_sample), axis=1)
#computing the mean vector
mean_x = np.mean(all_samples[0,:])
mean_y = np.mean(all_samples[1,:])
mean_z = np.mean(all_samples[2,:])
mean_vector = np.array([[mean_x,], [mean_y], [mean_z]])
print "mean vector: \n", mean_vector
#calculate covariance matrix
cov_mat = np.cov([all_samples[0,:],all_samples[1,:],all_samples[2,:]])
print "covariance matrix : \n", cov_mat
#calcluate eigenvectors and eignevalues
eig_values_cov, eig_vectors_cov = np.linalg.eig(cov_mat)
#plot eigenvectors centered at the sample mean
from matplotlib.patches import FancyArrowPatch
class Arrow3D(FancyArrowPatch):
def __init__(self, xs, ys, zs, *args, **kwargs):
FancyArrowPatch.__init__(self, (0,0), (0,0), *args, **kwargs)
self._verts3d = xs, ys, zs
def draw(self, renderer):
xs3d, ys3d, zs3d = self._verts3d
xs, ys, zs = proj3d.proj_transform(xs3d, ys3d, zs3d, renderer.M)
self.set_positions((xs[0],ys[0]),(xs[1],ys[1]))
FancyArrowPatch.draw(self, renderer)
fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot(111, projection='3d')
ax.plot(all_samples[0,:], all_samples[1,:],\
all_samples[2,:], 'o', markersize=8, color='green', alpha=0.2)
ax.plot([mean_x], [mean_y], [mean_z], 'o', \
markersize=10, color='red', alpha=0.5)
for v in eig_vectors_cov.T:
a = Arrow3D([mean_x, v[0]], [mean_y, v[1]],\
[mean_z, v[2]], mutation_scale=20, lw=3, arrowstyle="-|>", color="r")
ax.add_artist(a)
ax.set_xlabel('x_values')
ax.set_ylabel('y_values')
ax.set_zlabel('z_values')
plt.title('Eigenvectors')
plt.show()
#sorting the eignevecots by decreasing eignevalues
#to remove the ones that dont hold releveant information
#make a list of (eigenvalue, eigenvector) tupels
eig_pairs = [(np.abs(eig_values_cov[i]), eig_vectors_cov[:,i]) for i in range(len(eig_values_cov))]
#sort from high to low
eig_pairs.sort()
eig_pairs.reverse()
#visually confirm by lowwing at eignevalues
print "eigenvalues"
for i in eig_pairs:
print(i[0])
#now lets create a new matrix with only the two highest eigenvalues
#basically matrix w is the collection of your princial components
matrix_w = np.hstack((eig_pairs[0][1].reshape(3,1), eig_pairs[1][1].reshape(3,1)))
print "Matrix W :\n", matrix_w
#transofrming the smaples onto the new subspace
#use the new W matrix to transofrm our smaples onto the new subspace
#y = w^T * x
transformed = matrix_w.T.dot(all_samples)
plt.plot(transformed[0,0:20], transformed[1,0:20],\
'o', markersize=7, color='blue', alpha=0.5, label='class1')
plt.plot(transformed[0,20:40], transformed[1,20:40],
'^', markersize=7, color='red', alpha=0.5, label='class2')
plt.xlim([-4,4])
plt.ylim([-4,4])
plt.xlabel('x_values')
plt.ylabel('y_values')
plt.legend()
plt.title('Transformed samples with class labels')
plt.show()
#now lets do the samething using matlab pca
from matplotlib.mlab import PCA as mlPCA
mlab_pca = mlPCA(all_samples.T)
print('PC axes in terms of the measurement axes'\
' scaled by the standard deviations:\n',\
mlab_pca.Wt)
plt.plot(mlab_pca.Y[0:20,0],mlab_pca.Y[0:20,1], 'o', markersize=7,\
color='blue', alpha=0.5, label='class1')
plt.plot(mlab_pca.Y[20:40,0], mlab_pca.Y[20:40,1], '^', markersize=7,\
color='red', alpha=0.5, label='class2')
plt.xlabel('x_values')
plt.ylabel('y_values')
plt.xlim([-4,4])
plt.ylim([-4,4])
plt.legend()
plt.title('Transformed samples with class labels from matplotlib.mlab.PCA()')
plt.show()
#if you look at these graphs you'll see that they do not look identical. This is b/c the pca function from matlab scales teh variables to
#unit variance prior to calculating the covariance matrices. This will/cloud eventually lead to different variances along the axes
#You should only be scaling if one variable of the data is measured in inches and the other was say measured in newtons
#for this example we assumed the data was in the same units so we skpped the step of scaling