-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPEMRF.py
executable file
·415 lines (333 loc) · 14.5 KB
/
PEMRF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
#!/usr/bin/python
import numpy as np
from numpy import linalg as la
import sys
from collections import OrderedDict
from decimal import Decimal
import gzip
import CFN
import simplejson as json
from json import encoder
def get(dim,i):
if (1+i >= len(dim)):
return int(dim[-1])
else:
return int(dim[1+i])
def isDiag(M):
i, j = M.shape
assert i == j
test = M.reshape(-1)[:-1].reshape(i-1, j+1)
return ~np.any(test[:, 1:])
def CFNcount(mat, Theta, m, dim, List_indices, assign = False):
num_nodes = np.sum(m) - 1
cfn_number = 0
tolerance = pow(10, -6)
is_sudoku = True
for i in range(num_nodes-1):
for j in range(i+1, num_nodes):
ETable = np.asarray(Block_Extract(mat, i+1, j+1, List_indices), dtype=np.longdouble)
if (np.max(abs(ETable.reshape(get(dim,i) * get(dim,j)))) >= tolerance): # else useless zero function
cfn_number += 1
is_sudoku &= (get(dim,i) == get(dim,j) and isDiag(ETable) and (np.min(np.diag(ETable)) > tolerance))
return cfn_number, is_sudoku
def toCFN(Z, Theta, m, dim, List_indices, btLimit = None, assign = None, weight = None, twoSol = False, min_arity = 2, max_arity = 2, harden = False, varnames = None, domains = None, configuration = False):
myCFN = CFN.CFN(twoSol,btLimit,configuration)
num_nodes = np.sum(m) - 1
tolerance = pow(10, -6)
# Constant term if desired
if (min_arity <= 0 and max_arity >= 0):
ETable = np.asarray((Block_Extract(Z, 0, 0, List_indices)))
if (np.max(abs(ETable)) >= tolerance):
myCFN.AddFunction([], ETable)
# Variables
for i in range(num_nodes):
if varnames:
var_name = varnames[i]
if domains:
values = ['v' + str(e) for e in domains[var_name]]
else:
values = [str(z) for z in range(1, get(dim, i) + 1)]
else:
var_name = "x"+str(i)
values = [str(z) for z in range(1, get(dim,i)+1)]
myCFN.AddVariable(var_name, values)
ETable = np.full(get(dim,i),0.0,dtype=np.longdouble)
# Unary terms if desired
if (min_arity <= 1 and max_arity >= 1):
ETable += np.asarray((Block_Extract(Z, 0, i+1, List_indices) +
np.transpose(Block_Extract(Z, i+1, 0, List_indices)) +
np.diag(Block_Extract(Z, i+1, i+1, List_indices)))[0], dtype=np.longdouble)
# Value assignment if desired
if (assign):
val = int(assign[i]) if (i<len(assign)) else -1
if (val >= 0 and val < get(dim,i)):
ATable = np.full(get(dim,i), 1000.0, dtype=np.longdouble)
ATable[val] = 0.0
ETable += ATable
# Unary weight bias if desired
if (weight):
if (weight[i]):
ETable += weight[i]
if (np.max(abs(ETable)) >= tolerance):
myCFN.AddFunction([i], ETable)
# Binary terms
if (min_arity <= 2 and max_arity >= 2):
for i in range(num_nodes-1):
for j in range(i+1, num_nodes):
ETable = np.asarray(Block_Extract(
Z, i+1, j+1, List_indices).reshape(get(dim,i) * get(dim,j)), dtype=np.longdouble)
if (harden):
ETable[ ETable >= harden ] = 1000.0
if (np.max(abs(ETable)) >= tolerance):
myCFN.AddFunction([i, j], ETable)
return myCFN
def write_cfn_gzip(cfn, output_filename, comment="", indent=0):
cfn_str = json.dumps(cfn, indent=indent,
separators=(',', ':'), use_decimal=True)
cfn_bytes = cfn_str.encode('utf-8')
with gzip.GzipFile(output_filename, 'w') as fout:
if (comment):
fout.write(('# '+comment+'\n').encode('utf-8'))
fout.write(cfn_bytes)
def dump_mat(mat, m, dim, precision, List_indices):
"""
dump the learnt MRF in to a JSON/CFN
mat is the matrix (theta or Z)
"""
energyformat = "{:."+str(precision)+"f}"
tolerance = pow(10, -precision)
num_nodes = np.sum(m) - 1
ub = 0.0
varsDict = OrderedDict()
funsDict = OrderedDict()
for i in range(num_nodes):
varname = "x_"+str(i)
varsDict[varname] = get(dim,i)
funDict = OrderedDict()
funDict['scope'] = [varname]
ETable1 = [Decimal(energyformat.format(e)) for e in (Block_Extract(
mat, 0, i+1, List_indices) + np.transpose(Block_Extract(mat, i+1, 0, List_indices)) + np.diag(Block_Extract(mat, i+1, i+1, List_indices)))[0]]
ub += float(max(ETable1))
funDict['costs'] = ETable1
funsDict['E_'+str(i)] = funDict
for j in range(i+1, num_nodes):
ovarname = "x_"+str(j)
temp = Block_Extract(mat, i+1, j+1, List_indices)
if (np.max(abs(temp)) >= tolerance): # else useless zero function
funDict = OrderedDict()
funDict['scope'] = [varname, ovarname]
ETable2 = [Decimal(energyformat.format(e))
for e in Block_Extract(mat, i+1, j+1, List_indices).reshape(get(dim,i) * get(dim,j))]
ub += float(max(ETable2))
funDict['costs'] = ETable2
funsDict['E_'+str(i)+"_"+str(j)] = funDict
header = OrderedDict()
header['name'] = "ADMM_MRF"
header['mustbe'] = '<'+energyformat.format(ub)
cfn = OrderedDict()
cfn["problem"] = header
cfn["variables"] = varsDict
cfn["functions"] = funsDict
return cfn
def Amatrix(data, m_vec, dim_vec, balance, c):
"""
data: the sufficient statistic data
m_vec: array containing the number of nodes for each type of distribution (plus initial value 1)
dim_vec: array containing the dimension for each type of distribution (plus initial value 1)
balance = [1, balance1, balance2, balance3, balance4]
c: binary vector that indicates the nodes that are discrete random variables
"""
num_sam, d_num = np.shape(data)
balance_vec = np.repeat(balance, m_vec*dim_vec)
balance_mat = np.tile(balance_vec, (num_sam, 1))
data_2 = np.concatenate((np.ones([num_sam, 1]), data), axis=1)
new_data = np.divide(data_2, balance_mat)
M = np.dot(new_data.T, new_data)/num_sam
A = M + np.diag(c)
return A
def Block_Extract(Z, row_ind, col_ind, List_indices):
"""
extracts the submatrix of Z for the row row_ind and the colonne col_ind
"""
inter_row = List_indices[row_ind]
inter_col = List_indices[col_ind]
num_row = len(inter_row)
num_col = len(inter_col)
inter_row_2 = inter_row.reshape(num_row, 1)
inter_col_2 = inter_col.reshape(num_col, 1)
return Z[inter_row_2, inter_col_2.T]
def Zupdate(theta, U, Eta, m, dim, List_indices, norm_type):
"""
Update each subblock of Z
"""
num_nodes = np.sum(m)-1
temp_matrix = theta + U
B = [[] for i in range(num_nodes+1)]
for i in range(num_nodes+1): # iterate from 0 to the number of nodes
for j in range(num_nodes+1):
Block_ij = Block_Extract(temp_matrix, i, j, List_indices)
if i == 0 or j == 0 or i == j:
B[i].append(Block_ij)
else:
if norm_type == 'l1_l2':
# Proximal of the l1/l2 norm
gamma = la.norm(Block_ij, 'fro')
# Block-wise soft-thresholding
B[i].append((1 - Eta[i, j]/gamma).clip(0) * Block_ij)
elif norm_type == 'l1':
# Proximal of the l1 norm
# Soft-thresholding
B[i].append((Block_ij - Eta[i, j]).clip(0) -
(-Block_ij - Eta[i, j]).clip(0))
elif norm_type == 'l2':
B[i].append(1/(1+2*Eta[i, j]) * Block_ij)
# now B is [ [B0,0 , B0,1 , ... , B0,31], [B1,0 , B1,1 , ... , B1,31], ..., [B31,0, ..., B31,31]]
Z_new = np.block(B)
Z_new = (Z_new + Z_new.T)/2
return Z_new
def createEmatrix(theta, m, dim, tolerance, List_indices):
"""
Convert the learnt theta parameters to an adjacency matrix
"""
num_nodes = np.sum(m)-1
E = np.zeros([num_nodes+1, num_nodes+1])
for i in range(num_nodes+1):
for j in range(num_nodes+1):
temp = la.norm(Block_Extract(theta, i, j, List_indices), 'fro')
if np.absolute(temp) >= tolerance:
E[i, j] = np.absolute(temp)
else:
E[i, j] = 0
return E[1:, 1:]
def information_criterion(Theta, A, Ematrix, num_sample):
"""
Computes the AIC and BIC criteria
"""
num_param = np.sum(Ematrix != 0)
log_likelihood = - np.trace(np.dot(Theta, A)) + np.log(la.det(Theta))
AIC = -2*log_likelihood + 2*num_param
BIC = -2*log_likelihood + np.log(num_sample)*num_param
return AIC, BIC, log_likelihood
def ADMM(A, Z_init, U_init, lamb, num_sam, m, dim, norm_type='l1_l2', save=0, precision=6, K=1000, epsilon_abs=1e-3, epsilon_rel=1e-3):
"""
ADMM algorithm for PE-MRF
A: matrix obtained using the function Amatrix
Z_init, U_init: initial values for Z and U
lamb: regularization parameter
num_sam: number of samples
m: array containing the number of nodes for each type of distribution (plus initial value 1)
dim: array containing the dimension for each type of distribution (plus initial value 1)
norm_type: 'l1_l2' (group lasso), 'l1' (sparse group lasso) or 'l2' (ridge).
save: save as CFN file False/0: none, True/1: theta, 2: Z, 3: both, 4: return data to build a CFN
K: maximum number of iterations
epsilon_abs/epsilon_rel: absolute/relative tolerances used in the termination criteria
"""
# Build the matrix W containing the weights used in the penalty
Mvec = np.array(np.repeat(dim, m))[np.newaxis]
W = np.sqrt(np.transpose(Mvec)@Mvec)
# Initialization
d = np.sum(np.multiply(m, dim))-1
Z = np.copy(Z_init)
U = np.copy(U_init)
rho = 4 # initial step size
# Construct a list that gives the indices corresponding to each node
Mvec = np.array(np.repeat(dim, m))[np.newaxis]
List_indices = np.split(np.arange(d+1), np.cumsum(Mvec))
del List_indices[-1]
# Iterations
for k in range(K):
print(k,end='\r')
eta1 = rho/num_sam # define eta for theta update
eta2 = (lamb*W)/rho # define eta2 for Z update
# Book-keeping previous Z for computing the dual residual
prev_Z = np.copy(Z)
# Update Theta
temp = eta1*(prev_Z-U) - A
temp = (temp + temp.T)/2
lambvec, Q = la.eigh(temp) # eigenvalue decomposition
D_lamb = np.diag(
lambvec + np.sqrt(np.power(lambvec, 2) + 4*eta1 * np.ones(len(lambvec))))
Theta = Q @ D_lamb @ (Q.T)
Theta = (1/(2*eta1)) * Theta
Theta = (Theta + Theta.T)/2
# Update Z
Z = Zupdate(Theta, U, eta2, m, dim, List_indices, norm_type)
# Update U
U += Theta - Z
# Check convergence
R_primal = la.norm(Theta - Z, 'fro') # primal residual
R_dual = rho * la.norm(Z - prev_Z, 'fro') # dual residual
epsilon_dual = (d+1) * epsilon_abs + epsilon_rel * \
rho * la.norm(U, 'fro')
epsilon_pri = (d+1) * epsilon_abs + epsilon_rel * \
max(la.norm(Theta, 'fro'), la.norm(Z, 'fro'))
if R_primal <= epsilon_pri and R_dual <= epsilon_dual:
print('Primal and dual met after ', end='')
break
# Varying step size
if R_primal > 10*R_dual:
rho *= 2
U /= 2
elif R_dual > 10*R_primal:
rho /= 2
U *= 2
# END of Iteration
print(k, 'iterations')
if (save & 1):
write_cfn_gzip(dump_mat(Theta, m, dim, precision,
List_indices), 'Theta_' + norm_type + '_'+str(lamb) + '.cfn.gz')
if (save & 2):
write_cfn_gzip(dump_mat(Z, m, dim, precision,
List_indices), 'Z_' + norm_type+'_' + str(lamb) + '.cfn.gz')
if (save & 4):
return (Z, Theta, m, dim, List_indices)
# Convert the learnt parameters Z to obtain the predicted adjacency matrix
E = createEmatrix(Z, m, dim, 1e-18, List_indices)
# normalization
E = E / np.sqrt(np.tensordot(np.diag(E), np.diag(E), axes=0))
return Theta, Z, U, E
def regul_path(lamb, A, num_sam, m, dim, norm_type):
"""
Computes the regularization path using warm restart
lamb: vector of lambda by increasing values
"""
Z_init = np.zeros(A.shape)
U_init = np.zeros(A.shape)
Adj_matrix = []
for p in range(len(lamb)):
optimal_theta, optimal_Z, optimal_U, Ematrix = ADMM(A, Z_init, U_init, lamb[p], num_sam, m, dim, norm_type)
# Warm restart is used to initialize Z and U for the next lambda
Z_init = np.copy(optimal_Z)
U_init = np.copy(optimal_U)
Adj_matrix.append(Ematrix)
return Adj_matrix
def stars_parameter_selection(lamb, data, m, dim, c, balance, norm_type='l1', beta=0.05, N=50):
"""
Implementation of the StARS (Stability Approach to Regularization Selection) algorithm [Liu et al., 2010]
that selects the regularization parameter for graph inference problems based on the stability
lamb: vector of lambda by increasing values (for example lamb = np.logspace(-3,-1,100))
data: sufficient statistics
beta: cut point value
N: number of bootstraps
"""
num_nodes = np.sum(m)-1
num_sample = data.shape[0]
Adj_matrix = [np.zeros([num_nodes, num_nodes]) for j in range(len(lamb))]
for ind_boot in range(N):
boot_index = np.random.choice(num_sample, int(10*np.sqrt(num_sample)), replace=False) # Random subsample
A_b = Amatrix(data[boot_index,:], m, dim, balance, c)
path = regul_path(lamb, A_b, num_sample, m, dim, norm_type) # run PE-MRF for all the lambda values
for j in range(len(lamb)):
# for each edge, add 1 if it has been predicted as present in the network
Adj_matrix[j] += 1*(path[j] > 0)
# Compute the total instability for each lambda
v = np.zeros(len(lamb))
iu = np.triu_indices(num_nodes, 1)
for j in range(len(lamb)):
Adj_matrix[j] = Adj_matrix[j] / N
D = 2*Adj_matrix[j]*(1-Adj_matrix[j]) # instability of the edges across subsamples
v[j] = np.mean(D[iu]) # total instability
# Determine the optimal lambda
vcum = [np.max(v[rho:]) for rho in range(len(lamb))]
lambda_opt = np.min(lamb[np.array(vcum) <= beta])
return v, lambda_opt, Adj_matrix