-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathKMeansClustering.cpp
104 lines (88 loc) · 2.17 KB
/
KMeansClustering.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include "stdafx.h"
#include "KMeansClustering.h"
#include <cstdlib>
using namespace std;
int KMeansClustering::K() const
{
return _k;
}
void KMeansClustering::K(int k)
{
if (k <= 0)
{
throw string("incorrect number of clusters");
}
_k = k;
}
const DataVector & KMeansClustering::Data() const
{
return *_data;
}
void KMeansClustering::Data(DataVector *data)
{
_data = data;
}
const DataVector& KMeansClustering::Centroids() const
{
return _centroids;
}
const IntVector& KMeansClustering::Clusters() const
{
return *_clusters;
}
void KMeansClustering::Clusters(IntVector *clusters)
{
_clusters = clusters;
}
bool KMeansClustering::NextStep()
{
_oldClusters = *_clusters;
determineCentroids();
assignToClusters();
return _oldClusters != *_clusters;
}
void KMeansClustering::determineCentroids()
{
_centroids = getCleanCentroids();
IntVector sumElements(_k, 0);
for (int i = 0; i < _data->size(); ++i)
{
int centroidIndex = (*_clusters)[i];
auto & centroid = _centroids[centroidIndex];
double newX = centroid.X() + (*_data)[i].X();
double newY = centroid.Y() + (*_data)[i].Y();
centroid = DataPoint(newX, newY);
++sumElements[centroidIndex];
}
for (int i = 0; i < _k; ++i)
{
auto & centroid = _centroids[i];
int z = sumElements[i];
if (z)
{
centroid.X(centroid.X() / z);
centroid.Y(centroid.Y() / z);
}
}
}
DataVector KMeansClustering::getCleanCentroids() const
{
return DataVector(_k, DataPoint(0, 0));
}
void KMeansClustering::assignToClusters()
{
// for every point, assign it
for (int i = 0; i < (*_data).size(); ++i)
{
// for every centroid calculate distance
vector<double> distances(_k);
for (int j = 0; j < _k; ++j)
{
double dx = (*_data)[i].X() - _centroids[j].X();
double dy = (*_data)[i].Y() - _centroids[j].Y();
distances[j] = dx*dx + dy*dy;
}
auto minDistance = min_element(distances.begin(), distances.end());
(*_clusters)[i] = distance(distances.begin(), minDistance);
}
}