forked from EricYizhenWang/robust_nn_icml
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheps_separation.py
137 lines (124 loc) · 5.15 KB
/
eps_separation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import numpy as np
from hopcroftkarp import HopcroftKarp
from sets import Set
def build_collision_graph(eps, pts, y):
'''
This function builds the collision/conflict graph for the max-matching algorithm.
'''
n = len(pts)
graph = dict()
adj_lst = dict()
for i in range(n):
adj_lst[i] = Set([])
for i in range(n-1):
for j in range(i+1, n):
if y[i] != y[j]:
p1 = pts[i]
p2 = pts[j]
d = p1 - p2
dist = np.linalg.norm(d)
if (dist <= 2*eps):
adj_lst[i].add(j)
adj_lst[j].add(i)
if y[i] == 1:
if i in graph:
graph[i].add(j)
else:
graph[i] = Set([j])
else:
if j in graph:
graph[j].add(i)
else:
graph[j] = Set([i])
#print 'Done: find collision graph'
return [adj_lst, graph]
def find_matching(graph):
match = HopcroftKarp(graph).maximum_matching()
#print 'Done: find matching'
return match
def find_num_collision(eps, pts, y):
'''
This function finds the number of collision/conflicts of each vertex.
Output:
hasCollision: the set of vertices that has collision with other vertex
numCollision: number of collisions that each vertex has.
'''
n = len(pts)
numCollision = [0 for i in range(n)]
hasCollision = Set([])
for i in range(n-1):
for j in range(i+1, n):
if y[i] != y[j]:
p1 = pts[i]
p2 = pts[j]
d = p1 - p2
dist = np.linalg.norm(d)
if (dist <= (2*eps)):
numCollision[i] += 1
numCollision[j] += 1
hasCollision.add(i)
hasCollision.add(j)
return [hasCollision, numCollision]
def find_Z(graph, matching, U, adj_lst):
# a simple bfs finding Z, set of vertices reachable from U via alternating path.
visited_via_matched_edge = Set([p for p in U])
visited_via_unmatched_edge = Set([p for p in U])
while True:
set1 = Set([item for item in visited_via_unmatched_edge])
set2 = Set([item for item in visited_via_matched_edge])
for u in visited_via_matched_edge:
if u in matching:
for v in adj_lst[u]:
if v != matching[u]:
set1.add(v)
else:
for v in adj_lst[u]:
set1.add(v)
for u in visited_via_unmatched_edge:
if u in matching:
for v in adj_lst[u]:
if v == matching[u]:
set2.add(v)
if (len(set1) == len(visited_via_unmatched_edge)) and (len(set2) == len(visited_via_matched_edge)):
return set1.union(set2)
visited_via_matched_edge = set2
visited_via_unmatched_edge = set1
def find_min_cover(graph, adj_lst, y_pts):
'''
This function finds the min-cover of a bipartite graph from max-matching.
graph: the collision graph dictionary, each key is a vertex while the value is the set of vertices adjacent to it. Note than a vertex can only appear in either the key or the value list,never both.
adj_lst: similar to graph, except that the key set contains all vertices.
has_collison: the set of points that has collision with other points
y_pts: label of points in the graph.
'''
matching = find_matching(graph)
L = Set([i for i in graph if y_pts[i] > 0])
R = Set([i for i in graph if y_pts[i] <= 0])
U = Set([i for i in graph if (i not in matching) and (i in L)])
Z = find_Z(graph, matching, U, adj_lst)
K = (L.difference(Z)).union(R.intersection(Z))
return [matching, K]
def find_eps_separated_set(pts, eps, y_pts):
'''
This function finds the epsilon-separated subset with the largest cardinality.
Args:
pts: the set of input points
y_pts: the label of pts
eps: the epsilon value
Output:
good_pts: the desired epsilon-separated set
good_y: the label of good_pts
Brief description of steps:
1) find the set of points that have conflict with other points
2) build the conflict adjacency graph, conflict points are adjacent
3) find the min-cover of the adjacency graph
4) remove the min-cover from the original set pts.
'''
y_pts = [1 if i>0 else -1 for i in y_pts]
hasCollision, numCollision = find_num_collision(eps, pts, y_pts)
adj_lst, graph = build_collision_graph(eps, pts, y_pts)
matching, min_cover = find_min_cover(graph, adj_lst, y_pts)
good_pts = np.delete(pts, list(min_cover), axis = 0)
good_y = np.delete(y_pts, list(min_cover), axis = 0)
#print 'size of min_cover is: ', len(min_cover), 'size of matching is: ', len(matching)
return [good_pts, good_y]