forked from rushter/MLAlgorithms
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tsne.py
132 lines (101 loc) · 3.89 KB
/
tsne.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import logging
import numpy as np
from six.moves import range
from mla.base import BaseEstimator
from mla.metrics.distance import l2_distance
np.random.seed(999)
"""
References:
https://lvdmaaten.github.io/tsne/
Based on:
https://lvdmaaten.github.io/tsne/code/tsne_python.zip
"""
class TSNE(BaseEstimator):
y_required = False
def __init__(self, n_components=2, perplexity=30., max_iter=200, learning_rate=500):
"""A t-Distributed Stochastic Neighbor Embedding implementation.
Parameters
----------
max_iter : int, default 200
perplexity : float, default 30.0
n_components : int, default 2
"""
self.max_iter = max_iter
self.perplexity = perplexity
self.n_components = n_components
self.initial_momentum = 0.5
self.final_momentum = 0.8
self.min_gain = 0.01
self.lr = learning_rate
self.tol = 1e-5
self.perplexity_tries = 50
def fit_transform(self, X, y=None):
self._setup_input(X, y)
Y = np.random.randn(self.n_samples, self.n_components)
velocity = np.zeros_like(Y)
gains = np.ones_like(Y)
P = self._get_pairwise_affinities(X)
iter_num = 0
while iter_num < self.max_iter:
iter_num += 1
D = l2_distance(Y)
Q = self._q_distribution(D)
# Normalizer q distribution
Q_n = Q / np.sum(Q)
# Early exaggeration & momentum
pmul = 4.0 if iter_num < 100 else 1.0
momentum = 0.5 if iter_num < 20 else 0.8
# Perform gradient step
grads = np.zeros(Y.shape)
for i in range(self.n_samples):
grad = 4 * np.dot((pmul * P[i] - Q_n[i]) * Q[i], Y[i] - Y)
grads[i] = grad
gains = (gains + 0.2) * ((grads > 0) != (velocity > 0)) + (gains * 0.8) * ((grads > 0) == (velocity > 0))
gains = gains.clip(min=self.min_gain)
velocity = momentum * velocity - self.lr * (gains * grads)
Y += velocity
Y = Y - np.mean(Y, 0)
error = np.sum(P * np.log(P / Q_n))
logging.info("Iteration %s, error %s" % (iter_num, error))
return Y
def _get_pairwise_affinities(self, X):
"""Computes pairwise affinities."""
affines = np.zeros((self.n_samples, self.n_samples), dtype=np.float32)
target_entropy = np.log(self.perplexity)
distances = l2_distance(X)
for i in range(self.n_samples):
affines[i, :] = self._binary_search(distances[i], target_entropy)
# Fill diagonal with near zero value
np.fill_diagonal(affines, 1.e-12)
affines = affines.clip(min=1e-100)
affines = (affines + affines.T) / (2 * self.n_samples)
return affines
def _binary_search(self, dist, target_entropy):
"""Performs binary search to find suitable precision."""
precision_min = 0
precision_max = 1.e15
precision = 1.e5
for _ in range(self.perplexity_tries):
denom = np.sum(np.exp(-dist[dist > 0.] / precision))
beta = np.exp(-dist / precision) / denom
# Exclude zeros
g_beta = beta[beta > 0.]
entropy = -np.sum(g_beta * np.log2(g_beta))
error = entropy - target_entropy
if error > 0:
# Decrease precision
precision_max = precision
precision = (precision + precision_min) / 2.
else:
# Increase precision
precision_min = precision
precision = (precision + precision_max) / 2.
if np.abs(error) < self.tol:
break
return beta
def _q_distribution(self, D):
"""Computes Student t-distribution."""
Q = 1.0 / (1.0 + D)
np.fill_diagonal(Q, 0.0)
Q = Q.clip(min=1e-100)
return Q