-
Notifications
You must be signed in to change notification settings - Fork 2
/
isoforest_experiment.py
110 lines (78 loc) · 2.57 KB
/
isoforest_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# Copyright 2021 Grabtaxi Holdings Pte Ltd (GRAB), All rights reserved.
# Use of this source code is governed by an MIT-style license that can be found in the LICENSE file
import sys
from sklearn.metrics import roc_curve, precision_recall_curve, auc
from data_finefoods import load_graph
import argparse
import os
import torch
from utils.seed import seed_all
from sklearn.ensemble import IsolationForest
# %% args
parser = argparse.ArgumentParser(description="IsolationForest")
parser.add_argument("--name", type=str, default="wikipedia_anomaly", help="name")
parser.add_argument(
"--key", type=str, default="graph_anomaly_list", help="key to the data"
)
parser.add_argument("--id", type=int, default=0, help="id to the data")
args1 = vars(parser.parse_args())
args2 = {
"seed": 0,
}
args = {**args1, **args2}
seed_all(args["seed"])
result_dir = "results/"
# %% data
data = load_graph(args["name"], args["key"], args["id"])
u_ch = data.xu.shape[1]
v_ch = data.xv.shape[1]
e_ch = data.xe.shape[1]
print(
f"Data dimension: U node = {data.xu.shape}; V node = {data.xv.shape}; E edge = {data.xe.shape}; \n"
)
# %% model
xu, xv = data.xu, data.xv
xe, adj = data.xe, data.adj
yu, yv, ye = data.yu, data.yv, data.ye
def train_eval(x, y):
clf = IsolationForest()
clf.fit(x)
score = -clf.score_samples(x)
rc_curve = roc_curve(y, score)
pr_curve = precision_recall_curve(y, score)
roc_auc = auc(rc_curve[0], rc_curve[1])
pr_auc = auc(pr_curve[1], pr_curve[0])
return roc_auc, pr_auc, rc_curve, pr_curve
# %% isolation forest
u_roc_auc, u_pr_auc, u_rc_curve, u_pr_curve = train_eval(xu.numpy(), yu.numpy())
v_roc_auc, v_pr_auc, v_rc_curve, v_pr_curve = train_eval(xv.numpy(), yv.numpy())
e_roc_auc, e_pr_auc, e_rc_curve, e_pr_curve = train_eval(xe.numpy(), ye.numpy())
print(args)
print(
f"Eval, "
+ f"u auc-roc: {u_roc_auc:.4f}, v auc-roc: {v_roc_auc:.4f}, e auc-roc: {e_roc_auc:.4f} | "
+ f"u auc-pr {u_pr_auc:.4f}, v auc-pr {v_pr_auc:.4f}, e auc-pr {e_pr_auc:.4f}"
)
auc_metrics = {
"u_roc_auc": u_roc_auc,
"u_pr_auc": u_pr_auc,
"v_roc_auc": v_roc_auc,
"v_pr_auc": v_pr_auc,
"e_roc_auc": e_roc_auc,
"e_pr_auc": e_pr_auc,
"u_roc_curve": u_rc_curve,
"u_pr_curve": u_pr_curve,
"v_roc_curve": v_rc_curve,
"v_pr_curve": v_pr_curve,
"e_roc_curve": e_rc_curve,
"e_pr_curve": e_pr_curve,
}
output_stored = {
"args": args,
"auc_metrics": auc_metrics,
}
print("Saving current results...")
torch.save(
output_stored,
os.path.join(result_dir, f"isoforest-{args['name']}-{args['id']}-output.th"),
)