forked from UUDeCART/DeCART_ML_2019
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkmeansplots.py
126 lines (93 loc) · 3.93 KB
/
kmeansplots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# https://gist.githubusercontent.com/clintval/e9afc246e77f6488cda79f86e4d37148/raw/23ea7565e74fd0b3c38afc50d25e1e6c609d68fd/kmeansplots.py
import numpy as np
import matplotlib.cm as cm
import matplotlib.pyplot as plt
from matplotlib.colors import colorConverter
__license__ = 'MIT'
__author__ = 'clintval'
def darken_rgb(rgb, p):
"""
Will darken an rgb value by p percent
"""
assert 0 <= p <= 1, "Proportion must be [0, 1]"
return [int(x * (1 - p)) for x in rgb]
def lighten_rgb(rgb, p):
"""
Will lighten an rgb value by p percent
"""
assert 0 <= p <= 1, "Proportion must be [0, 1]"
return [int((255 - x) * p + x) for x in rgb]
def is_luminous(rgb):
new_color = []
for c in rgb:
if c <= 0.03928:
new_color.append(c / 12.92)
else:
new_color.append(((c + 0.055) / 1.055) ** 2.4)
L = sum([x * y for x, y in zip([0.2126, 0.7152, 0.0722], new_color)])
return True if L < 0.179 else False
def kmeans_plot(X, y, cluster_centers, ax=None):
import matplotlib.patheffects as path_effects
from sklearn.metrics.pairwise import pairwise_distances_argmin_min
if ax is None:
ax = plt.gca()
colors = cm.spectral(y.astype(float) / len(cluster_centers))
ax.scatter(*list(zip(*X)), lw=0, c=colors, s=30)
offset = max(list(zip(*cluster_centers))[0]) * 0.2
for i, cluster in enumerate(cluster_centers):
index, _ = pairwise_distances_argmin_min(cluster.reshape(1, -1), Y=X)
cluster_color = colorConverter.to_rgb(colors[index[0]])
if is_luminous(cluster_color) is False:
cluster_color = darken_rgb(cluster_color, 0.35)
label = ax.text(x=cluster[0] + offset,
y=cluster[1],
s='{:d}'.format(i + 1),
color=cluster_color)
label.set_path_effects([path_effects.Stroke(lw=2, foreground='white'),
path_effects.Normal()])
limit = max(*ax.get_xlim(), *ax.get_xlim())
ax.set_xlim(0, limit)
ax.set_ylim(0, limit)
ax.set_xlabel("Feature space for the 1st feature")
ax.set_ylabel("Feature space for the 2nd feature")
return ax
def silhouette_plot(X, y, n_clusters, ax=None):
from sklearn.metrics import silhouette_samples, silhouette_score
if ax is None:
ax = plt.gca()
# Compute the silhouette scores for each sample
silhouette_avg = silhouette_score(X, y)
sample_silhouette_values = silhouette_samples(X, y)
y_lower = padding = 2
for i in range(n_clusters):
# Aggregate the silhouette scores for samples belonging to
ith_cluster_silhouette_values = sample_silhouette_values[y == i]
ith_cluster_silhouette_values.sort()
size_cluster_i = ith_cluster_silhouette_values.shape[0]
y_upper = y_lower + size_cluster_i
color = cm.spectral(float(i) / n_clusters)
ax.fill_betweenx(np.arange(y_lower, y_upper),
0,
ith_cluster_silhouette_values,
facecolor=color,
edgecolor=color,
alpha=0.7)
# Label the silhouette plots with their cluster numbers at the middle
ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i + 1))
# Compute the new y_lower for next plot
y_lower = y_upper + padding
ax.set_xlabel("The silhouette coefficient values")
ax.set_ylabel("Cluster label")
# The vertical line for average silhoutte score of all the values
ax.axvline(x=silhouette_avg, c='r', alpha=0.8, lw=0.8, ls='-')
ax.annotate('Average',
xytext=(silhouette_avg, y_lower * 1.025),
xy=(0, 0),
ha='center',
alpha=0.8,
c='r')
ax.set_yticks([]) # Clear the yaxis labels / ticks
ax.set_xticks([0, 0.2, 0.4, 0.6, 0.8, 1])
ax.set_ylim(0, y_upper + 1)
ax.set_xlim(-0.075, 1.0)
return ax