-
Notifications
You must be signed in to change notification settings - Fork 3
/
cluster.py
69 lines (45 loc) · 1.65 KB
/
cluster.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import sys
import argparse
from numpy import array, zeros, mean, std, subtract, divide, dot, sqrt
from scipy.cluster.vq import vq, kmeans, whiten
parser = argparse.ArgumentParser(description = 'K-means clustering util for image feature processing.')
parser.add_argument('-f', help = 'The file of observations (image features).')
parser.add_argument('-n', default = 'y', help = 'The flag for normailzation.')
parser.add_argument('-o', help = 'The output file.')
parser.add_argument('-k', help = 'The number of clusters.')
args = parser.parse_args()
def load_features(filename):
f = open(filename, 'r')
content = []
for line in f:
vec = map(float, line.split())
norm_vec = sqrt(dot(vec, vec))
content.append(divide(vec, norm_vec))
f.close()
return array(content)
def normalize(output_filename, features):
mean_vec = mean(features, axis = 0)
std_vec = std(features, axis = 0)
f = open(output_filename + ".meta", 'w')
mean_vec.tofile(f, sep = " ")
f.write("\n")
std_vec.tofile(f, sep = " ")
f.close()
norm_array = []
for vec in features:
diff = subtract(vec, mean_vec)
norm_vec = divide(diff, std_vec)
norm_array.append(norm_vec)
return array(norm_array)
def cluster(features, k, output_filename):
(centroids, distortion) = kmeans(features, k)
f = open(output_filename, 'w')
print(centroids)
for vec in centroids:
vec.tofile(f, sep = " ")
f.write("\n")
f.close()
return centroids
features = load_features(args.f)
features = normalize(args.o, features)
centroids = cluster(features, int(args.k), args.o)