-
Notifications
You must be signed in to change notification settings - Fork 0
/
runExperimentsSimplex.py
299 lines (271 loc) · 13.2 KB
/
runExperimentsSimplex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
import numpy as np
import datetime
import time
ts = time.time()
timestamp = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S').replace(' ', '-').replace(':', '-')
import os
os.environ["OMP_NUM_THREADS"] = "4" # export OMP_NUM_THREADS=4
os.environ["OPENBLAS_NUM_THREADS"] = "4" # export OPENBLAS_NUM_THREADS=4
os.environ["MKL_NUM_THREADS"] = "6" # export MKL_NUM_THREADS=6
os.environ["VECLIB_MAXIMUM_THREADS"] = "4" # export VECLIB_MAXIMUM_THREADS=4
os.environ["NUMEXPR_NUM_THREADS"] = "6" # export NUMEXPR_NUM_THREADS=6
from auxiliaryFunctions import project_onto_simplex, performUpdate, exitCriterion, stepSize
"""## Away FW or Pairwise FW"""
#Maintains active list of weights and vertices.
def runFWSimplex(x0, function, feasibleReg, tolerance, maxTime, FWVariant = "AFW", typeStep = "SS", criterion = "PG", criterionRef = 0.0):
#Quantities we want to output.
grad = function.fEvalGrad(x0)
FWGap = [np.dot(grad, x0 - feasibleReg.LPOracle(grad))]
fVal = [function.fEval(x0)]
timing = [time.time()]
x = x0.copy()
itCount = 1
while(True):
if(FWVariant == "AFW"):
x, vertvar, gap = awayStepFWSimplex(function, feasibleReg, x, typeStep)
else:
x, vertvar, gap = pairwiseStepFWSimplex(function, feasibleReg, x, typeStep)
performUpdate(function, x, FWGap, fVal, timing, gap)
if(exitCriterion(itCount, fVal[-1], FWGap[-1], criterion = criterion, numCriterion = tolerance, critRef = criterionRef) or timing[-1] - timing[0] > maxTime):
timing[:] = [t - timing[0] for t in timing]
return x, FWGap, fVal, timing
itCount += 1
def awayStepFWSimplex(function, feasibleReg, x, typeStep):
grad = function.fEvalGrad(x)
v = feasibleReg.LPOracle(grad)
a, indexMax = feasibleReg.AwayOracle(grad, x)
vertvar = 0
#Choose FW direction, can overwrite index.
if(np.dot(grad, x - v) > np.dot(grad, a - x)):
d = v - x
alphaMax = 1.0
optStep = stepSize(function, d, grad, typeStep)
alpha = min(optStep, alphaMax)
#Less than maxStep
if(alpha != alphaMax):
#newVertex returns true if vertex is new.
if(np.dot(v, x) == 0.0):
vertvar = 1
#Max step length away step, only one vertex now.
else:
vertvar = -1
else:
d = x - a
alphaMax = x[indexMax]/(1.0 - x[indexMax])
optStep = stepSize(function, d, grad, typeStep)
alpha = min(optStep, alphaMax)
#Max step, need to delete a vertex.
if(alpha == alphaMax):
vertvar = -1
return x + alpha*d, vertvar, np.dot(grad, x - v)
#Perform one step of the Pairwise FW algorithm
#Also specifies if the number of vertices has decreased var = -1 or
#if it has increased var = +1. Otherwise 0.
def pairwiseStepFWSimplex(function, feasibleReg, x, typeStep):
grad = function.fEvalGrad(x)
v = feasibleReg.LPOracle(grad)
a, index = feasibleReg.AwayOracle(grad, x)
vertVar = 0
#Find the weight of the extreme point a in the decomposition.
alphaMax = x[index]
#Update weight of away vertex.
d = v - a
optStep = stepSize(function, d, grad, typeStep)
alpha = min(optStep, alphaMax)
if(alpha == alphaMax):
vertVar = -1
#Update the FW vertex
if(np.dot(v, x) == 0.0):
vertVar = 1
return x + alpha*d, vertVar, np.dot(grad, x - v)
"""
# LaCG Variants
"""
#Locally Accelerated Conditional Gradients.
class LaCG:
def run(self, x0, function, feasReg, tol, maxIter = 5e5, FWVariant = "AFW", typeStep = "SS"):
#Perform lineseach?
self.lineSearch = typeStep
#Function parameters.
self.restart = []
self.L = function.largestEig()
self.mu = function.smallestEig()
self.tol = tol
self.theta = np.sqrt(0.5*self.mu/self.L)
#Copy the variables.
self.xAFW, self.xAGD, x, self.y, self.w = [x0.copy(), x0.copy(), x0.copy(), x0.copy(), x0.copy()]
#Store the data from the initial iterations.
itCount = 1
self.A = 1.0
self.z = -function.fEvalGrad(self.xAFW) + self.L*self.xAFW
#Initial data measurements.
grad = function.fEvalGrad(x0)
FWGap = [np.dot(grad, x0 - feasReg.LPOracle(grad))]
fVal = [function.fEval(x0)]
timing = [time.time()]
while(fVal[-1] - fValOpt > tol):
print(fVal[-1] - fValOpt)
x, gap = self.runIter(function, feasReg, x, itCount + 1, FWVariant)
performUpdate(function, x, FWGap, fVal, timing, gap)
itCount += 1
if(timing[-1] - timing[0] > TIME_LIMIT):
break
timing[:] = [t - timing[0] for t in timing]
return x, FWGap, fVal, timing
def runIter(self, function, feasReg, x, it, FWVariant):
#Information about variation of active set in vertVar
if(FWVariant == "AFW"):
self.xAFW, vertVar, gap = awayStepFWSimplex(function, feasReg, x, typeStep = self.lineSearch)
else:
self.xAFW, vertVar, gap = pairwiseStepFWSimplex(function, feasReg, x, typeStep = self.lineSearch)
self.xAGD = self.accelStep(function, x)
#If we return the Accelerated point, the gap is invalid, set it to zero for later processing.
if(function.fEval(self.xAGD) < function.fEval(self.xAFW)):
return self.xAGD, gap
else:
return self.xAFW, gap
#Whenever we perform an accelerated step, we can use a warm start for the
#optimization subproblem, using w0 and alphaw0
def accelStep(self, function, x):
self.A = self.A/(1 - self.theta)
a = self.theta*self.A
self.y = (x + self.theta*self.w)/(1 + self.theta)
self.z += a*(self.mu*self.y - function.fEvalGrad(self.y))
#Compute the projection directly.
indices = np.where(x > 0.0)[0]
#Calculate the vector.
b = self.z[indices]/(self.mu*self.A + self.L - self.mu)
aux = project_onto_simplex(b)
self.w = np.zeros(len(x))
self.w[indices] = aux
xAGD = (1 - self.theta)*x + self.theta*self.w
return xAGD
#Takes an input scheme and tries to accelerate it.
#Need to specify function and scheme wich will be used for optimizing.
class catalystSchemeSimplex:
def run(self, x0, function, feasReg, tol, maxTime, FWVariant = "AFW", typeStep = "SS"):
self.L = function.largestEig()
self.mu = function.smallestEig()
self.kappa = self.L - 2*self.mu
from collections import deque
xOut = deque([x0], maxlen = 2)
#Quantities we want to output.
FWGap = [function.FWGapBaseProblem(xOut[-1], feasReg)]
fVal = [function.fEvalBaseProblem(xOut[-1])]
timing = [time.time()]
iterations = [1]
q = self.mu / (self.mu + self.kappa)
rho = 0.9*np.sqrt(q)
y = deque([x0, x0], maxlen = 2)
function.setKappa(self.kappa)
epsilon = 0.22222 * FWGap[-1] * (1-rho)
alpha = deque([np.sqrt(q)], maxlen = 2)
itCount = 0
while(fVal[-1] - fValOpt > tol):
function.sety(y[-1])
newX, gap, fvalue, timingInner = runFWSimplex(xOut[-1], function, feasReg, epsilon, maxTime/2.0, FWVariant = FWVariant, typeStep = typeStep, criterion = "DG")
xOut.append(newX)
epsilon *= (1-rho)
iterations.append(len(gap) + iterations[-1])
alpha.append(self.findRoot(alpha[-1], q))
beta = self.returnBeta(alpha)
y.append(xOut[-1] + beta *(xOut[-1] - xOut[-2]))
performUpdate(function, xOut[-1], FWGap, fVal, timing, function.FWGapBaseProblem(xOut[-1], feasReg))
if(timing[-1] - timing[0] > maxTime):
break
itCount += 1
timing[:] = [t - timing[0] for t in timing]
return xOut[-1], FWGap, fVal, timing, iterations
#Finds the root of the equation between 0 and 1.
#Throws an assertion if no valid candidate is found.
def findRoot(self, alpha, q):
aux = (q-alpha*alpha)
val = 0.5*(aux + np.sqrt(aux*aux + 4.0*alpha*alpha))
if(val > 0 and val <= 1):
return val
else:
val = 0.5*(aux - np.sqrt(aux*aux + 4.0*alpha*alpha))
assert val > 0 and val < 1, "Root does not meet desired criteria.\n"
return val
#Returns the value of Beta based on the values of alpha.
#The alpha deque contains at least two values.
def returnBeta(self, alpha):
return alpha[-2]*(1-alpha[-2])/(alpha[-2]*alpha[-2] + alpha[-1])
"""# Simplex example"""
from functions import randomPSDGenerator, funcQuadratic, funcAccelScheme
from feasibleRegions import probabilitySimplexPolytope
from algorithms import NAGD_probabilitySimplex, CGS, DIPFW
TIME_LIMIT = int(1800)
size = int(1500)
feasibleRegion = probabilitySimplexPolytope(size)
x_0 = feasibleRegion.initialPoint()
S_0 = [x_0]
alpha_0 = [1]
tolerance = 1.0e-5
typeOfStep = "SS"
LVal = 1000.0
MuVal = 1.0
M = randomPSDGenerator(size, MuVal, LVal)
b = np.random.randint(-1,1, size = size)
fun = funcQuadratic(size, M , b, MuVal, LVal)
print("Solving the problem over the simplex.")
##Run to a high Frank-Wolfe primal gap accuracy for later use.
print("\nSolving the problem to a high accuracy using Nesterov's AGD to obtain a reference solution.")
fValOpt = NAGD_probabilitySimplex(x_0, fun, feasibleRegion, tolerance/10.0)
#Catalyst augmented
print("\nRunning Catalyst-augmented AFW.")
funCat = funcAccelScheme(len(x_0), fun.returnM(), fun.returnb(), fun.largestEig(), fun.smallestEig())
CatalystAFW = catalystSchemeSimplex()
xCatalyst, FWCatalyst, fCatalyst, tCatalyst, itCatalyst = CatalystAFW.run(x_0, funCat, feasibleRegion, tolerance, TIME_LIMIT)
##Vanilla AFW
print("\nRunning AFW.")
xAFW, FWGapAFW, fValAFW, timingAFW = runFWSimplex(x_0, fun, feasibleRegion, tolerance, TIME_LIMIT, FWVariant = "AFW", typeStep = typeOfStep, criterion = "PG", criterionRef = fValOpt)
##Vanilla PFW
print("\nRunning PFW.")
xPFW, FWGapPFW, fValPFW, timingPFW = runFWSimplex(x_0, fun, feasibleRegion, tolerance, TIME_LIMIT, FWVariant = "PFW", typeStep = typeOfStep, criterion = "PG", criterionRef = fValOpt)
#LaCG
print("\nRunning LaCG-AFW.")
LaCGAway = LaCG()
xLaCGAFW, FWGapLaCGAFW, fValLaCGAFW, timingLaCGAFW = LaCGAway.run(x_0, fun, feasibleRegion, tolerance, typeStep = typeOfStep, FWVariant = "AFW")
#LaCG PFW
print("\nRunning LaCG-PFW.")
LaCGPairwise = LaCG()
xLaCGPFW, FWGapLaCGPFW, fValLaCGPFW, timingLaCGPFW = LaCGPairwise.run(x_0, fun, feasibleRegion, tolerance, typeStep = typeOfStep, FWVariant = "PFW")
#Conditional Gradient Sliding.
print("\nRunning CGS.")
CGS = CGS()
xCGS, FWGapCGS, fValCGS, timingCGS, iterationCGS = CGS.run(x_0, fun, feasibleRegion, tolerance, TIME_LIMIT, criterion = "PG", criterionRef = fValOpt)
#Decomposition Invariant CG
print("\nRunning DICG.")
xDICG, FWGapDICG, fValDICG, timingDICG = DIPFW(x_0, fun, feasibleRegion, tolerance, TIME_LIMIT, typeStep = typeOfStep, criterion = "PG", criterionRef = fValOpt)
import matplotlib.pyplot as plt
#Plot primal gap in terms of iteration.
plt.loglog(np.arange(len(fValAFW)) + 1, [(x - fValOpt) for x in fValAFW], '-*', color = 'b', markevery = np.logspace(0, np.log10(len(fValAFW)-1), 10).astype(int).tolist(), label = 'AFW')
plt.loglog(np.arange(len(fValPFW)) + 1, [(x - fValOpt) for x in fValPFW], '-D', color = 'c', markevery = np.logspace(0, np.log10(len(fValPFW)-1), 10).astype(int).tolist(), label = 'PFW')
plt.loglog(np.arange(len(fValLaCGAFW)) + 1, [(x - fValOpt) for x in fValLaCGAFW], '-o', color = 'k', markevery = np.logspace(0, np.log10(len(fValLaCGAFW)-1), 10).astype(int).tolist(), label = 'LaCG-AFW')
plt.loglog(np.arange(len(fValLaCGPFW)) + 1, [(x - fValOpt) for x in fValLaCGPFW], '-^', color = 'g', markevery = np.logspace(0, np.log10(len(fValLaCGPFW)-1), 10).astype(int).tolist(), label = 'LaCG-PFW')
plt.loglog(np.arange(len(fValDICG)) + 1, [(x - fValOpt) for x in fValDICG], '-s', color = 'r', markevery = np.logspace(0, np.log10(len(fValDICG)-1), 10).astype(int).tolist(), label = 'DICG')
plt.loglog(iterationCGS, [(x - fValOpt) for x in fValCGS], color = 'y', label = 'CGS')
plt.loglog(itCatalyst, [(x - fValOpt) for x in fCatalyst], ':', color = 'm', label = 'Catalyst')
plt.legend()
plt.xlabel(r'$k$')
plt.ylabel(r'$f(x_{k}) - f^{*}$')
plt.autoscale(enable=True, axis='x', tight=True)
plt.grid()
plt.show()
plt.close()
#Plot Primal gap in terms of time.
plt.semilogy(timingAFW, [(x - fValOpt) for x in fValAFW], '-*', color = 'b', markevery = int(len(timingAFW)/10), label = 'AFW')
plt.semilogy(timingPFW, [(x - fValOpt) for x in fValPFW], '-D', color = 'g', markevery = int(len(timingPFW)/10), label = 'PFW')
plt.semilogy(timingLaCGAFW, [(x - fValOpt) for x in fValLaCGAFW], '-o', color = 'k', markevery = int(len(timingLaCGAFW)/10), label = 'LaCG-AFW')
plt.semilogy(timingLaCGPFW, [(x - fValOpt) for x in fValLaCGPFW], '-^', color = 'g', markevery = int(len(timingLaCGPFW)/10), label = 'LaCG-PFW')
plt.semilogy(timingDICG, [(x - fValOpt) for x in fValDICG], '-s', color = 'r', markevery = int(len(timingDICG)/10), label = 'DICG')
plt.semilogy(timingCGS, [(x - fValOpt) for x in fValCGS], color = 'y', label = 'CGS')
plt.semilogy(tCatalyst, [(x - fValOpt) for x in fCatalyst], ':', color = 'm', label = 'Catalyst')
plt.legend()
plt.ylabel(r'$f(x_{k}) - f^{*}$')
plt.xlabel(r't[s]')
plt.autoscale(enable=True, axis='x', tight=True)
plt.grid()
plt.show()
plt.close()