-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathmmd.py
159 lines (119 loc) · 5.18 KB
/
mmd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
'''
MMD functions implemented in tensorflow.
'''
from __future__ import division
import tensorflow as tf
_eps = 1e-8
def ard_mmd2_and_ratio(X, Y, bws, biased=False, min_var_est=1e-5):
K_XX, K_XY, K_YY, const_diagonal = _ard_kernel(X, Y, bws)
mmd2, var_est = _mmd2_and_variance(
K_XX, K_XY, K_YY, const_diagonal=const_diagonal, biased=biased)
ratio = mmd2 / tf.sqrt(tf.maximum(var_est, min_var_est))
return mmd2, ratio, var_est
def _ard_kernel(X, Y, bws):
X = X / bws
Y = Y / bws
XX = tf.matmul(X, X, transpose_b=True)
XY = tf.matmul(X, Y, transpose_b=True)
YY = tf.matmul(Y, Y, transpose_b=True)
X_sqnorms = tf.diag_part(XX)
Y_sqnorms = tf.diag_part(YY)
r = lambda x: tf.expand_dims(x, 0)
c = lambda x: tf.expand_dims(x, 1)
XXsqnorm = tf.maximum(-2 * XX + c(X_sqnorms) + r(X_sqnorms), 0.)
XYsqnorm = tf.maximum(-2 * XY + c(X_sqnorms) + r(Y_sqnorms), 0.)
YYsqnorm = tf.maximum(-2 * YY + c(Y_sqnorms) + r(Y_sqnorms), 0.)
K_XY = tf.exp(-.5 * XYsqnorm)
K_XX = tf.exp(-.5 * XXsqnorm)
K_YY = tf.exp(-.5 * YYsqnorm)
return K_XX, K_XY, K_YY, 1
def _mix_rbf_kernel(X, Y, sigmas, wts=None):
if wts is None:
wts = [1] * len(sigmas)
XX = tf.matmul(X, X, transpose_b=True)
XY = tf.matmul(X, Y, transpose_b=True)
YY = tf.matmul(Y, Y, transpose_b=True)
X_sqnorms = tf.diag_part(XX)
Y_sqnorms = tf.diag_part(YY)
r = lambda x: tf.expand_dims(x, 0)
c = lambda x: tf.expand_dims(x, 1)
K_XX, K_XY, K_YY = 0, 0, 0
for sigma, wt in zip(sigmas, wts):
gamma = 1 / (2 * sigma**2)
K_XX += wt * tf.exp(-gamma * (-2 * XX + c(X_sqnorms) + r(X_sqnorms)))
K_XY += wt * tf.exp(-gamma * (-2 * XY + c(X_sqnorms) + r(Y_sqnorms)))
K_YY += wt * tf.exp(-gamma * (-2 * YY + c(Y_sqnorms) + r(Y_sqnorms)))
return K_XX, K_XY, K_YY, tf.reduce_sum(wts)
def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
m = tf.cast(K_XX.get_shape()[0], tf.float32) # Assumes X, Y are same shape
# Get the various sums of kernels that we'll use
# Kts drop the diagonal, but we don't need to compute them explicitly
if const_diagonal is not False:
const_diagonal = tf.cast(const_diagonal, tf.float32)
diag_X = diag_Y = const_diagonal
sum_diag_X = sum_diag_Y = m * const_diagonal
sum_diag2_X = sum_diag2_Y = m * const_diagonal**2
else:
diag_X = tf.diag_part(K_XX)
diag_Y = tf.diag_part(K_YY)
sum_diag_X = tf.reduce_sum(diag_X)
sum_diag_Y = tf.reduce_sum(diag_Y)
sum_diag2_X = sq_sum(diag_X)
sum_diag2_Y = sq_sum(diag_Y)
Kt_XX_sums = tf.reduce_sum(K_XX, 1) - diag_X
Kt_YY_sums = tf.reduce_sum(K_YY, 1) - diag_Y
K_XY_sums_0 = tf.reduce_sum(K_XY, 0)
K_XY_sums_1 = tf.reduce_sum(K_XY, 1)
Kt_XX_sum = tf.reduce_sum(Kt_XX_sums)
Kt_YY_sum = tf.reduce_sum(Kt_YY_sums)
K_XY_sum = tf.reduce_sum(K_XY_sums_0)
Kt_XX_2_sum = sq_sum(K_XX) - sum_diag2_X
Kt_YY_2_sum = sq_sum(K_YY) - sum_diag2_Y
K_XY_2_sum = sq_sum(K_XY)
if biased:
mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m)
+ (Kt_YY_sum + sum_diag_Y) / (m * m)
- 2 * K_XY_sum / (m * m))
else:
mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * (m-1))
+ (Kt_YY_sum + sum_diag_Y) / (m * (m-1))
- 2 * K_XY_sum / (m * m))
var_est = (
2 / (m**2 * (m-1)**2) * (
2 * sq_sum(Kt_XX_sums) - Kt_XX_2_sum
+ 2 * sq_sum(Kt_YY_sums) - Kt_YY_2_sum)
- (4*m-6) / (m**3 * (m-1)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2)
+ 4*(m-2) / (m**3 * (m-1)**2) * (
sq_sum(K_XY_sums_1) + sq_sum(K_XY_sums_0))
- 4 * (m-3) / (m**3 * (m-1)**2) * K_XY_2_sum
- (8*m - 12) / (m**5 * (m-1)) * K_XY_sum**2
+ 8 / (m**3 * (m-1)) * (
1/m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum
- dot(Kt_XX_sums, K_XY_sums_1)
- dot(Kt_YY_sums, K_XY_sums_0))
)
return mmd2, var_est
def dot(x, y, name=None):
"The dot product of two vectors x and y."
with tf.name_scope(name, "Dot", [x, y]):
x = tf.convert_to_tensor(x, name='x')
y = tf.convert_to_tensor(y, name='y')
x.get_shape().assert_has_rank(1)
y.get_shape().assert_has_rank(1)
return tf.squeeze(tf.matmul(tf.expand_dims(x, 0), tf.expand_dims(y, 1)))
def sq_sum(t, name=None):
"The squared Frobenius-type norm of a tensor, sum(t ** 2)."
with tf.name_scope(name, "SqSum", [t]):
t = tf.convert_to_tensor(t, name='t')
return 2 * tf.nn.l2_loss(t)
def rbf_mmd2_and_ratio(X, Y, sigma=1, biased=True):
return mix_rbf_mmd2_and_ratio(X, Y, sigmas=[sigma], biased=biased)
def mix_rbf_mmd2_and_ratio(X, Y, sigmas=(1,), wts=None, biased=True):
K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigmas, wts)
return _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased)
def _mmd2_and_ratio(K_XX, K_XY, K_YY, const_diagonal=False, biased=False,
min_var_est=_eps):
mmd2, var_est = _mmd2_and_variance(
K_XX, K_XY, K_YY, const_diagonal=const_diagonal, biased=biased)
ratio = mmd2 / tf.sqrt(tf.maximum(var_est, min_var_est))
return mmd2, ratio