-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathImplementation.py
214 lines (195 loc) · 8.44 KB
/
Implementation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
'''
Each algorithm proceeds in 4 steps:
1.Generate truths and noise level for each worker
2.add inherent noise
3.decides privacy level and adds privacy noise
4.perform CRH on sensory data to obtain RMSE and return
'''
def CRH(sensory_data,M,N,threshold):
weights=np.ones(N)
discovered_truths=np.zeros(M)
last_truths=np.zeros(M)
while True:
last_truths=np.copy(discovered_truths)
weights_sum=np.sum(weights)
for i in range(M):
discovered_truths[i]=np.sum(sensory_data[:,i]*weights)/weights_sum
dis_sum=0
for i in range(N):
dis_sum+=np.sum((sensory_data[i]-discovered_truths)**2)
for i in range(N):
weights[i]=np.log(dis_sum/np.sum((sensory_data[i]-discovered_truths)**2))
if(np.sum(np.abs(discovered_truths-last_truths))<threshold):
break
return discovered_truths
def RMSE(discovered_truths, truths,M):
return np.sqrt(np.sum((discovered_truths-truths)**2)/M)
def standard(M,N,LOW,HIGH,threshold):
# M,N: number of objects and workers
# LOW, HIGH: range of truths
# lambdae: hyper parameter for sampling noise level
# threshold: threshold for CRH to stop iteration
#------------------------------------------------------------------------------
# generate random data
np.random.seed(30)
truths=np.random.randint(LOW,HIGH,M)
# generate noise level with exponential distribution
noise_level=np.random.randint(0,100,N)
# np.random.seed(2000)
sensory_data=np.zeros((N,M)) # id first objects then
for i in range(N):
sensory_data[i]=truths+np.random.normal(0,noise_level[i],M)
#-------------------------------------------------------------------------------
# perform CRH and return RMSE results
discovered_truths=CRH(sensory_data,M,N,threshold)
return RMSE(discovered_truths,truths,M)
def ICDCS2020(M,N,LOW,HIGH,lambda1,lambda2,threshold):
# M,N: number of objects and workers
# LOW, HIGH: range of truths
# lambda1: hyper parameter for sampling noise level
# lambda2: hyper parameter for sampling privacy level
# threshold: threshold for CRH to stop iteration
#------------------------------------------------------------------------------
# generate random data
np.random.seed(100)
truths=np.random.randint(LOW,HIGH,M)
# notice that ICDCS2020 assumes a hyper-parameter lambda1 to generate noise level
# thus resulting in different sensory data
noise_level=np.random.exponential(1/lambda1,size=N)
sensory_data=np.zeros((N,M)) # id first objects then
for i in range(N):
sensory_data[i]=truths+np.random.normal(0,noise_level[i],M)
discovered_truths=CRH(sensory_data,M,N,threshold)
rmse1=RMSE(discovered_truths,truths,M)
#-------------------------------------------------------------------------------
# after obtaining random sensory data, it requires to add privacy noise
privacy_level=np.random.exponential(1/lambda2,N)
# each user samples Gaussian noise according to his sampled variance
for i in range(N):
sensory_data[i]+=np.random.normal(0,privacy_level[i],M)
#------------------------------------------------------------------------------
# after perturbation, perform CRH and return MSE results
discovered_truths=CRH(sensory_data,M,N,threshold)
rmse2=RMSE(discovered_truths,truths,M)
return rmse1,rmse2
def ICDCS2020_v1(M,N,LOW,HIGH,nLOW,nHIGH,lambda2,threshold):
# This version 1 replaces the way of generating noise level
# M,N: number of objects and workers
# LOW, HIGH: range of truths
# lambda1: hyper parameter for sampling noise level
# lambda2: hyper parameter for sampling privacy level
# threshold: threshold for CRH to stop iteration
#------------------------------------------------------------------------------
# generate random data
np.random.seed(10)
truths=np.random.randint(LOW,HIGH,M)
# generate noise level same as others
noise_level=np.random.randint(nLOW,nHIGH,N)
np.random.seed(20)
sensory_data=np.zeros((N,M)) # id first objects then
for i in range(N):
sensory_data[i]=truths+np.random.normal(0,noise_level[i],M)
#-------------------------------------------------------------------------------
# after obtaining random sensory data, it requires to add privacy noise
np.random.seed(30)
privacy_level=np.random.exponential(lambda2,N)
# each user samples Gaussian noise according to his sampled variance
for i in range(N):
sensory_data[i]+=np.random.normal(0,privacy_level[i],M)
#------------------------------------------------------------------------------
# after perturbation, perform CRH and return MSE results
discovered_truths=CRH(sensory_data,M,N,threshold)
return RMSE(discovered_truths,truths,M)
def TMC2021CDP(M,N,LOW,HIGH,lambdae,budget,threshold):
# M,N: number of objects and workers
# LOW, HIGH: range of truths
# budget: privacy budget for Laplace mechanism (sensitivity can be computed from LOW and HIGH)
# threshold: threshold for CRH to stop iteration
#-----------------------------------------------------------------------------
# generate random data
np.random.seed(10)
truths=np.random.randint(LOW,HIGH,M)
# noise generated from np.random.randint is much larger than ICDCS2020
noise_level=np.random.exponential(lambdae,N)
np.random.seed(20)
sensory_data=np.zeros((N,M)) # id first objects then
for i in range(N):
sensory_data[i]=truths+np.random.normal(0,noise_level[i],M)
#----------------------------------------------------------------------------
# add privacy noise
np.random.seed(30)
# each user samples Gaussian noise according to his sampled variance
for i in range(N):
sensory_data[i]+=np.random.laplace(0,(HIGH-LOW)/budget[i],M)
#----------------------------------------------------------------------------
discovered_truths=CRH(sensory_data,M,N,threshold)
return RMSE(discovered_truths,truths)
def standard_test_scale_lambda(M,N,LOW,HIGH,threshold):
rmsel1=[]
rmsel2=[]
for lambdae in range(1,300):
rmse=standard(M,N,LOW,HIGH,lambdae/10,threshold)
rmsel1.append(rmse[0])
rmsel2.append(rmse[1])
plt.plot(np.arange(0.1,30,0.1),rmsel1,'blue',label='Discovered truths')
plt.plot(np.arange(0.1,30,0.1),rmsel2,'red',label='Sensory data')
plt.xlabel("Scale of $\lambda$")
plt.ylabel("RMSE (the lower, the better)")
plt.legend()
plt.show()
def standard_test_number_workers(M,LOW,HIGH,threshold):
rmsel=[]
for n in range(M//10,10*M,M//10):
rmsel.append(standard(M,n,LOW,HIGH,threshold))
return rmsel
def TMC2021_test(M,N,LOW,HIGH,nLOW,nHIGH,threshold):
# TMC2021CDP(M,N,LOW,HIGH,nLOW,nHIGH,budget,threshold)
rmsel=[]
for b in np.arange(1,30,0.1):
budget=np.zeros(N)+b
rmsel.append(TMC2021CDP(M,N,LOW,HIGH,10,budget,threshold))
plt.plot(np.arange(1,30,0.1),rmsel)
plt.show()
def ICDCS2020_test_scale_lambda(M,N,LOW,HIGH,threshold):
rmsel1=[]
rmsel2=[]
# fix c at 2.0
for lambdae in range(1,300):
rmse=ICDCS2020(M,N,LOW,HIGH,lambdae/10,2*lambdae/10,threshold)
rmsel1.append(rmse[0])
rmsel2.append(rmse[1])
plt.plot(np.arange(0.1,30,0.1),rmsel1,'red',linewidth=0.5,label='Discovered truths before privacy')
plt.plot(np.arange(0.1,30,0.1),rmsel2,'black',linewidth=0.5,label='Discovered truths before privacy')
plt.xlabel("Scale of $\lambda$")
plt.ylabel("RMSE (the lower, the better)")
plt.legend()
plt.show()
if __name__=='__main__':
#---------------------------------------
# default parameters
M=100
N=100
threshold=1e-6
LOW=-1e3
HIGH=1e3-1
lambdae=10
#----------------------------------------
# standard test
# standard_test_scale_lambda(M,N,LOW,HIGH,threshold)
rmsel=[]
for m in range(100,210,10):
rmsel=standard_test_number_workers(m,LOW,HIGH,threshold)
plt.plot(np.arange(0.1,10,0.1),rmsel,label="M=%d"%m)
plt.xlabel("Number of workers / number of objects")
plt.ylabel("RMSE(the lower, the better)")
plt.legend()
plt.show()
#------------------------------------------
# ICDCS2020 test
# ICDCS2020_test_scale_lambda(M,N,LOW,HIGH,threshold)
#------------------------------------------
# TMC2021 test
# TMC2021_test(M,N,LOW,HIGH,nLOW,nHIGH,threshold)