forked from saper0/revisiting_robustness
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval.py
354 lines (341 loc) · 18 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
"""(Semantic-Aware) Robustness evaluation module. """
from collections import Counter
from typing import Any, Dict, Optional, Union
import numpy as np
import scipy.stats
import torch
import torch.nn as nn
from tqdm import tqdm
from src.attacks import create_attack
from src.graph_models import GRAPH_MODEL_TYPE
from src.models import LP
from src.utils import accuracy
def evaluate_robustness(model: Optional[nn.Module],
label_prop: Optional[LP],
graph_model: GRAPH_MODEL_TYPE,
X_np: np.ndarray,
A_np: np.ndarray,
y_np: np.ndarray,
inductive_samples: int,
attack_params: Dict[str, Any],
surrogate_model: Optional[nn.Module],
device: Union[torch.device, str]) -> Dict[str, Any]:
"""Evaluate the robustness of a given model on a synthetic graph.
Evaluates different robustness metrics of a node-classifier trained on a
given graph generated from a specific generative graph model. Robustness
metrics are calculated over repeated sampling of an additional node in an
inductive manner and include:
- General Robustness to edge insertions / deletions
- Robustness w.r.t. Bayes Classifier
Args:
model (Optional[nn.Model]): Node-classifier to investigation. If not
provided, label propagation has to be enabled.
label_prop (Optional[nn.Module]): Label Propagation Module applied on
top of model-predictions. Can be disabled by setting to None or
used as stand-alone if model is set to None.
graph_model (GRAPH_MODEL_TYPE): Generative graph model.
X_np (np.ndarray, [n x d]): Feature matrix (assumed to be known during
training).
A_np (np.ndarray, [n x n]): Adjacency matrix (assumed to be known
during training).
y_np (np.ndarray, [n, ]): Labels of nodes (assumed to be known)
inductive_samples (int): How often an additional node should be
inductively sampled.
attack_params (Dict[str, Any]): Used to create attack.
surrogate_model: (Optional[nn.Module]): If nettack is used, surrogate
model to attack to get perturbed adjacency matrix.
device: Calculation device for predictions.
Returns:
Dict[str, Any]: Robustness statistics. This dict has two subdicts:
"predictions_statistics":
Contains counts how often bayes classfier or gnn could classify
a node correctly as well as separability information of node
w.r.t. only features / structure.
"robustness_statistics":
Includes general (degree-dependent) robustness of bayes
classifier and of GNN when they correctly classified a node.
Then, for the case that they both correctly classify a node, it
collects the following statistics:
- Degree Dependent Robustness of Bayes Classifier
- Degree Dependent Robustness of GNN
- Degree Dependent Robustness of GNN w.r.t. Bayes Classifier.
These three dicts store the robustness of a node of degree deg
in a list accessed by the degree as key. Important: if e.g. a
node is in position 2 in the list of nodes of degree 4, it is
the same node in all three dicts. I.e. the position uniquely
identifies the node.
Out of convenience, also degree-dependent avg/median/std/max
robustness are returned (all calculated using the above
raw-data dicts)
"""
assert model is not None or label_prop is not None
# Statistics Regarding Bayes & GNN Predictions
c_acc_bayes = 0 # Count nodes correctly classified by bayes classifier
c_acc_bayes_deg = Counter() # Above but for each degree
c_acc_bayes_structure = 0 # Count nodes separable by structure alone
c_acc_bayes_structure_deg = Counter() # Above but for each degree
c_acc_bayes_feature = 0 # Count nodes separable by features alone (degree
# dependent doesn't make sense as features
# independent of connections)
c_acc_bayes_not_gnn = 0 # Decisions where BC correct but GNN wrong
c_acc_bayes_not_gnn_deg = Counter() # Above but for each degree
c_acc_gnn = 0 # Count nodes correctly classified by gnn
c_acc_gnn_deg = Counter() # Above but for each degree
c_acc_gnn_not_bayes = 0 # Decisions where GNN correctly says true even
# though BC violated
c_acc_gnn_not_bayes_deg = Counter() # Above but for each degree
c_acc_bayes_gnn = 0 # Count nodes correctly classified by bc & gnn
c_acc_bayes_gnn_deg = Counter() # Above but for each degree
c_degree_total = Counter() # Count degrees of all generated nodes
# Statistics Regarding Bayes & GNN Robustness
c_bayes_robust = dict() # Degree-dependend robustness BC
c_gnn_robust = dict() # Degree-dependend robustness GNN
c_gnn_wrt_bayes_robust = dict() # Degree-dependend robustness of GNN w.r.t. BC
c_bayes_robust_when_both = dict() # Degree-dependend robustness of Bayes on
# nodes separable by both GNN and Bayes
c_gnn_robust_when_both = dict() # Degree-dependend robustness of GNN on
# nodes separable by both GNN and Bayes
c_bayes_higher_robust = 0 # Number of times BC is more robust than GNN
c_gnn_higher_robust = 0 # Number of times GNN is "overly robust"
c_bayes_gnn_equal_robust = 0 # Number of times GNN has perfect robustness
# w.r.t. BC
n = y_np.size
if "max_robustness" in attack_params:
max_robustness = attack_params["max_robustness"]
else:
# Nettack (or any other attack) get possibility to remove all
# same-class and add all different-class edges
max_robustness = y_np.size
if model is not None:
model.eval()
for i in tqdm(range(inductive_samples)):
X, A, y = graph_model.sample_conditional(1, X_np, A_np, y_np)
deg_n = str(np.sum(A[:,n]))
c_degree_total[deg_n] += 1
# Statistics Bayes Classifier
feature_separable, _ = graph_model.feature_separability(X, y, [n])
structure_separable, _ = graph_model.structure_separability(A, y, [n])
bayes_separable, _ = graph_model.likelihood_separability(X, A, y, [n])
if bayes_separable:
c_acc_bayes += 1
c_acc_bayes_deg[deg_n] += 1
if structure_separable:
c_acc_bayes_structure += 1
c_acc_bayes_structure_deg[deg_n] += 1
if feature_separable:
c_acc_bayes_feature += 1
# Calculate GNN-prediction
X_gpu = torch.tensor(X, dtype=torch.float32, device=device)
A_gpu = torch.tensor(A, dtype=torch.float32, device=device)
y_gpu = torch.tensor(y, device=device)
if model is not None:
logits = model(X_gpu, A_gpu)
normalize = True
else:
logits = None
normalize = False
if label_prop is not None:
logits = label_prop.smooth(logits, y_gpu[:n], [i for i in range(n)],
A_gpu, normalize)
gnn_separable = round(accuracy(logits, y_gpu, n))
# Statistics Prediction
if gnn_separable:
c_acc_gnn += 1
c_acc_gnn_deg[deg_n] += 1
if bayes_separable:
c_acc_bayes_gnn += 1
c_acc_bayes_gnn_deg[deg_n] += 1
else:
c_acc_gnn_not_bayes += 1
c_acc_gnn_not_bayes_deg[deg_n] += 1
elif bayes_separable:
c_acc_bayes_not_gnn += 1
c_acc_bayes_not_gnn_deg[deg_n] += 1
# Investigate Robustness
c_robustness = 0 # Counts changes to local neighbourhood
bayes_separable_new = 0
gnn_separable_new = 0
# gnn w.r.t. bayes count possible?
gnn_wrt_bayes_setting = False
if bayes_separable and gnn_separable:
gnn_wrt_bayes_setting = True
attack = create_attack(n, X, A, y, attack_params, surrogate_model,
model, label_prop, device)
while bayes_separable or gnn_separable:
adv_edge = attack.create_adversarial_pert()
if c_robustness >= max_robustness:
adv_edge = None
if adv_edge is not None:
u, v = adv_edge
if A_gpu[u, v] == 1:
A_gpu[u, v] = 0
A_gpu[v, u] = 0
A[u, v] = 0 #simple attacks update A, nettack not
A[v, u] = 0
else:
A_gpu[u, v] = 1
A_gpu[v, u] = 1
A[u, v] = 1
A[v, u] = 1
# Robustness of BC
if bayes_separable:
bayes_separable_new, _ = graph_model.likelihood_separability(
X, A, y, [n]
)
if not bayes_separable_new or adv_edge is None:
if deg_n not in c_bayes_robust:
c_bayes_robust[deg_n] = []
c_bayes_robust[deg_n].append(c_robustness)
if gnn_wrt_bayes_setting:
if deg_n not in c_bayes_robust_when_both:
c_bayes_robust_when_both[deg_n] = []
c_bayes_robust_when_both[deg_n].append(c_robustness)
# Robustness of GNN
if gnn_separable:
if model is not None:
logits = model(X_gpu, A_gpu)
normalize = True
else:
logits = None
normalize = False
if label_prop is not None:
logits = label_prop.smooth(logits, y_gpu[:n],
[i for i in range(n)],
A_gpu, normalize)
gnn_separable_new = round(accuracy(logits, y_gpu, n))
if not gnn_separable_new or adv_edge is None:
if deg_n not in c_gnn_robust:
c_gnn_robust[deg_n] = []
c_gnn_robust[deg_n].append(c_robustness)
if gnn_wrt_bayes_setting:
if deg_n not in c_gnn_robust_when_both:
c_gnn_robust_when_both[deg_n] = []
c_gnn_robust_when_both[deg_n].append(c_robustness)
# Robustness of GNN w.r.t. BC
if bayes_separable and gnn_separable:
if deg_n not in c_gnn_wrt_bayes_robust:
c_gnn_wrt_bayes_robust[deg_n] = []
if not bayes_separable_new and not gnn_separable_new:
c_bayes_gnn_equal_robust += 1
c_gnn_wrt_bayes_robust[deg_n].append(c_robustness)
elif bayes_separable_new and not gnn_separable_new:
c_bayes_higher_robust += 1
c_gnn_wrt_bayes_robust[deg_n].append(c_robustness)
elif not bayes_separable_new and gnn_separable_new:
c_gnn_higher_robust += 1
c_gnn_wrt_bayes_robust[deg_n].append(c_robustness)
elif adv_edge is None:
c_bayes_gnn_equal_robust += 1
c_gnn_wrt_bayes_robust[deg_n].append(c_robustness)
else:
pass
bayes_separable = bayes_separable_new if adv_edge is not None else False
gnn_separable = gnn_separable_new if adv_edge is not None else False
c_robustness += 1
# Postprocess robustness counts to averages
avg_bayes_robust = {}
med_bayes_robust = {}
std_bayes_robust = {}
sem_bayes_robust = {}
max_bayes_robust = {}
for degree in c_acc_bayes_deg:
avg_bayes_robust[f"{degree}"] = float(np.mean(c_bayes_robust[degree]))
med_bayes_robust[f"{degree}"] = float(np.median(c_bayes_robust[degree]))
std_bayes_robust[f"{degree}"] = float(np.std(c_bayes_robust[degree]))
sem_bayes_robust[f"{degree}"] = float(scipy.stats.sem(c_bayes_robust[degree], ddof=0))
max_bayes_robust[f"{degree}"] = float(np.max(c_bayes_robust[degree]))
avg_gnn_robust = {}
med_gnn_robust = {}
std_gnn_robust = {}
sem_gnn_robust = {}
max_gnn_robust = {}
for degree in c_acc_gnn_deg:
avg_gnn_robust[f"{degree}"] = float(np.mean(c_gnn_robust[degree]))
med_gnn_robust[f"{degree}"] = float(np.median(c_gnn_robust[degree]))
std_gnn_robust[f"{degree}"] = float(np.std(c_gnn_robust[degree]))
sem_gnn_robust[f"{degree}"] = float(scipy.stats.sem(c_gnn_robust[degree], ddof=0))
max_gnn_robust[f"{degree}"] = float(np.max(c_gnn_robust[degree]))
avg_gnn_wrt_bayes_robust = {}
med_gnn_wrt_bayes_robust = {}
std_gnn_wrt_bayes_robust = {}
sem_gnn_wrt_bayes_robust = {}
max_gnn_wrt_bayes_robust = {}
# Robustness GNN w.r.t. Bayes
for degree in c_acc_bayes_gnn_deg:
avg_gnn_wrt_bayes_robust[f"{degree}"] = float(np.mean(c_gnn_wrt_bayes_robust[degree]))
med_gnn_wrt_bayes_robust[f"{degree}"] = float(np.median(c_gnn_wrt_bayes_robust[degree]))
std_gnn_wrt_bayes_robust[f"{degree}"] = float(np.std(c_gnn_wrt_bayes_robust[degree]))
sem_gnn_wrt_bayes_robust[f"{degree}"] = float(scipy.stats.sem(c_gnn_wrt_bayes_robust[degree], ddof=0))
max_gnn_wrt_bayes_robust[f"{degree}"] = float(np.max(c_gnn_wrt_bayes_robust[degree]))
avg_bayes_robust_when_both = {}
med_bayes_robust_when_both = {}
std_bayes_robust_when_both = {}
sem_bayes_robust_when_both = {}
max_bayes_robust_when_both = {}
for degree in c_acc_bayes_gnn_deg:
avg_bayes_robust_when_both[f"{degree}"] = float(np.mean(c_bayes_robust_when_both[degree]))
med_bayes_robust_when_both[f"{degree}"] = float(np.median(c_bayes_robust_when_both[degree]))
std_bayes_robust_when_both[f"{degree}"] = float(np.std(c_bayes_robust_when_both[degree]))
sem_bayes_robust_when_both[f"{degree}"] = float(scipy.stats.sem(c_bayes_robust_when_both[degree], ddof=0))
max_bayes_robust_when_both[f"{degree}"] = float(np.max(c_bayes_robust_when_both[degree]))
avg_gnn_robust_when_both = {}
med_gnn_robust_when_both = {}
std_gnn_robust_when_both = {}
sem_gnn_robust_when_both = {}
max_gnn_robust_when_both = {}
for degree in c_acc_bayes_gnn_deg:
avg_gnn_robust_when_both[f"{degree}"] = float(np.mean(c_gnn_robust_when_both[degree]))
med_gnn_robust_when_both[f"{degree}"] = float(np.median(c_gnn_robust_when_both[degree]))
std_gnn_robust_when_both[f"{degree}"] = float(np.std(c_gnn_robust_when_both[degree]))
sem_gnn_robust_when_both[f"{degree}"] = float(scipy.stats.sem(c_gnn_robust_when_both[degree], ddof=0))
max_gnn_robust_when_both[f"{degree}"] = float(np.max(c_gnn_robust_when_both[degree]))
return dict(
prediction_statistics = dict(
c_acc_bayes=c_acc_bayes,
c_acc_gnn=c_acc_gnn,
c_acc_bayes_structure=c_acc_bayes_structure,
c_acc_bayes_feature=c_acc_bayes_feature,
c_acc_bayes_gnn=c_acc_bayes_gnn,
c_acc_bayes_not_gnn=c_acc_bayes_not_gnn,
c_acc_gnn_not_bayes=c_acc_gnn_not_bayes
),
robustness_statistics = dict(
# General Robustness Statistics
c_bayes_higher_robust=c_bayes_higher_robust,
c_bayes_gnn_equal_robust=c_bayes_gnn_equal_robust,
c_gnn_higher_robust=c_gnn_higher_robust,
# Statistics calculated from Degree-Dependent Robustness Data
avg_bayes_robust=avg_bayes_robust,
med_bayes_robust=med_bayes_robust,
std_bayes_robust=std_bayes_robust,
sem_bayes_robust=sem_bayes_robust,
max_bayes_robust=max_bayes_robust,
avg_gnn_robust=avg_gnn_robust,
med_gnn_robust=med_gnn_robust,
std_gnn_robust=std_gnn_robust,
sem_gnn_robust=sem_gnn_robust,
max_gnn_robust=max_gnn_robust,
avg_gnn_wrt_bayes_robust=avg_gnn_wrt_bayes_robust,
med_gnn_wrt_bayes_robust=med_gnn_wrt_bayes_robust,
std_gnn_wrt_bayes_robust=std_gnn_wrt_bayes_robust,
sem_gnn_wrt_bayes_robust=sem_gnn_wrt_bayes_robust,
max_gnn_wrt_bayes_robust=max_gnn_wrt_bayes_robust,
avg_bayes_robust_when_both=avg_bayes_robust_when_both,
med_bayes_robust_when_both=med_bayes_robust_when_both,
std_bayes_robust_when_both=std_bayes_robust_when_both,
sem_bayes_robust_when_both=sem_bayes_robust_when_both,
max_bayes_robust_when_both=max_bayes_robust_when_both,
avg_gnn_robust_when_both=avg_gnn_robust_when_both,
med_gnn_robust_when_both=med_gnn_robust_when_both,
std_gnn_robust_when_both=std_gnn_robust_when_both,
sem_gnn_robust_when_both=sem_gnn_robust_when_both,
max_gnn_robust_when_both=max_gnn_robust_when_both,
# Raw Degree-Dependent Robustness Data (For each Node)
c_bayes_robust=c_bayes_robust, #Robustness counts of g w.r.t. y
c_gnn_robust=c_gnn_robust, #Robustness counts of f w.r.t. y
c_gnn_wrt_bayes_robust=c_gnn_wrt_bayes_robust, #Robustness counts of f w.r.t. g
c_bayes_robust_when_both=c_bayes_robust_when_both, #Robustness counts of g w.r.t. y when g & f both correctly classified node
c_gnn_robust_when_both=c_gnn_robust_when_both, #Robustness counts of f w.r.t. y when g & f both correctly classified node
c_degree_total = dict(c_degree_total)
)
)