-
Notifications
You must be signed in to change notification settings - Fork 0
/
conductor_backend.py
329 lines (285 loc) · 11.1 KB
/
conductor_backend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
from collections import defaultdict
import io
import math
import threading
import time
import urllib
import uuid
import boto3
import pymonetdb
import minions
import pool
import teiresias
# actually maybe caller should pass this in but we don't bother
ec2 = boto3.resource('ec2')
class Backend:
def __init__(self, pools, specs, explainer_connector, minion_connector):
self._poller_thread = threading.Thread(target=self._polling_loop)
self._pools = pools
self._specs = specs
self._triggers = defaultdict(lambda: 0)
self._pool_condition = threading.Condition()
self._pool_condition_sleepers = 0
self._storage_lock = threading.Lock()
self._storage_do_not_use_directly = None
self._explainer_connector = explainer_connector
self._minion_connector = minion_connector
self._statushub = PollHub({})
assert len(pools) > 0
assert set(specs.keys()) == set(pools.keys())
self._update_status()
self._poller_thread.daemon = True
self._poller_thread.start()
def _polling_loop(self):
msgs = dict((name, None) for name in self._pools.keys())
while 1:
time.sleep(1)
wake_them = False
for name, p in self._pools.items():
p.poll()
self._manage_pool_size(p)
msg = f"Pool {name}: {len(p.members())} members, {p.actual} up, {p.desired} desired, load {p.loadaverage.load:.1f}"
if msgs[name] != msg:
print(msg)
wake_them = True
msgs[name] = msg
if wake_them:
#print()
with self._pool_condition:
self._pool_condition.notify_all()
self._update_status()
def _manage_pool_size(self, p):
loadavg = p.loadaverage
load = loadavg.load
ups = len(p.classify()['UP'])
# Ground rule: desired is load, rounded upward.
new_desired = int(math.ceil(load))
reason = "load"
# On start, load==0. We don't want to immediately shut down all minions.
bottom = max(0, int(ups - math.floor(loadavg.time_running / 60)))
if new_desired < bottom:
new_desired = bottom
reason = "keep some running initially"
# ceil(load) will never be 0.0 once load has been > 0.
# This means that new_desired will stay >= 0 forever.
# At some point we have to shut down the last minion.
if bottom == 0 and new_desired == 1 and loadavg.time_since_change > 15 * 60 and load < 0.1:
new_desired = 0
reason = "no recent activity"
if new_desired == 0 and self._triggers[p.name] > 0:
new_desired = 1
reason = "triggered"
new_desired = min(new_desired, len(p.members()))
if new_desired != p.desired:
print(
f"Pool {p.name} load {load:.1f} desired {p.desired} -> {new_desired} ({reason})")
p.desired = new_desired
def _connector_for_ip(self, ip):
parsed = self._minion_connector.parsed()
netloc = parsed.netloc
if not 'HOSTNAME' in netloc:
raise Exception(
"Minion connector template should use literal string 'HOSTNAME'")
netloc = netloc.replace('HOSTNAME', ip)
connector = self._minion_connector.override(netloc=netloc)
return connector
def status(self, id=None, seen=0):
return self._statushub.get_state(id, seen)
def _update_status(self):
# re-use status report from c2.py
out = io.StringIO()
stats = {}
for pool in self._pools.values():
may_shrink = ", postponing shrinks" if pool.postpone_shrink else ""
print(
f"Pool {pool.name}, load={pool.loadaverage.load:.1f}, actual={pool.actual}, desired={pool.desired}{may_shrink}:",
file=out)
for i, (name, state, claims, minion) in enumerate(pool.members()):
print(f"{i+1:2d} {name} state={state}",
file=out, end="")
observed = minion.observed_state
desired = minion.desired_state
if observed == desired:
print(f" minion={minion.observed_state}", file=out, end="")
else:
print(
f" minion={minion.observed_state}->{minion.desired_state}", file=out, end="")
print(file=out)
print(file=out)
classification=pool.classify()
stats[pool.name] = dict(
load=pool.loadaverage.load,
up=len(classification['UP']),
starting=len(classification['STARTING']),
actual=pool.actual,
desired=pool.desired,
)
status = dict(
stats=stats,
text=out.getvalue(),
)
self._statushub.set_state(status, filter=lambda s:s.get('text'))
def set_pool_size(self, poolname, size):
p = self._pools.get(poolname)
if p:
print(f"Set desired size of {poolname} to {size}")
p.desired = size
else:
msg = f"Pool {poolname} not found, try one of {', '.join(self._pools.keys())}"
print(msg)
raise Exception(msg)
def wait_for_pool(self, pool):
with self._pool_condition:
c = pool.claim()
if c:
return c
if self._pool_condition_sleepers >= 100:
raise Exception("too busy")
try:
self._pool_condition_sleepers += 1
self._triggers[pool.name] += 1
print(
f"Thread {threading.current_thread().name} waiting for pool {pool.name}")
while 1:
c = pool.claim()
if c:
print(
f"Thread {threading.current_thread().name} proceeding with pool {pool.name}")
return c
self._pool_condition.wait()
finally:
self._pool_condition_sleepers -= 1
self._triggers[pool.name] -= 1
def claim_any_pool(self):
for p in self._pools.values():
c = p.claim()
if c:
return c
first_pool = list(self._pools.values())[0]
return self.wait_for_pool(first_pool)
def get_storage(self):
with self._storage_lock:
if not self._storage_do_not_use_directly:
with self.claim_any_pool() as claim:
connector = self._connector_for_ip(claim.ip)
conn = connector.connect()
try:
storage = teiresias.get_storage(conn)
self._storage_do_not_use_directly = storage
print("Succesfully retrieved storage stats")
finally:
conn.close()
return self._storage_do_not_use_directly
def execute_query(self, q):
# Not sure if connection is thread safe, better create new one.
# Future work: connection pool
# First get some advice
storage = self.get_storage()
conn = self._explainer_connector.connect()
try:
adviser = teiresias.Adviser(conn, storage)
adv = adviser.advise(q, self._specs)
finally:
conn.close()
# Then send the query to the recommended pool
p = self._pools[adv]
with self.wait_for_pool(p) as claim:
connector = self._connector_for_ip(claim.ip)
conn = connector.connect()
try:
cursor = conn.cursor()
rows = cursor.execute(q)
return dict(
query=q,
advice=adv,
ip=claim.ip,
url=connector.url,
rows=rows,
)
finally:
conn.close()
INSTANCE_TYPE_MEMORY_MiB = {
"t2.micro": 1024,
"t2.small": 2048,
"t2.medium": 4096,
"t2.large": 8192,
"t2.xlarge": 16384,
}
def make_backend(explainer_connector, minion_connector_template, filters):
pools = {}
specs = {}
for name, filter in filters.items():
ms = minions.track_down_minions(ec2, filter)
if not ms:
raise Exception(f"Found no {name} minions using filter {filter}")
example = ms[0]
instance_type = example.instance.instance_type
mem = INSTANCE_TYPE_MEMORY_MiB.get(instance_type)
if not mem:
raise Exception(
f"Don't know how much memory a {instance_type} has")
p = pool.Pool(name, ms)
pools[name] = p
specs[name] = mem * 1024 * 1024 * 1.0
return Backend(pools, specs, explainer_connector, minion_connector_template)
class Connector:
def __init__(self, url):
if not url.startswith('mapi:monetdb:'):
raise Exception("Expect MAPI url to start with mapi:monetdb:")
self._url = url
path = self.parsed().path
if not path or path == '/':
raise Exception(f"MAPI URL {url} does not contain a database name")
if '/' in path[1:]:
raise Exception(f"Database name must not contain slashes: {url}")
@property
def url(self):
return self._url
def parsed(self):
return urllib.parse.urlparse(self.url[5:])
def override(self, **kwargs):
parts = self.parsed()
newparts = parts._replace(**kwargs)
url = 'mapi:' + urllib.parse.urlunparse(newparts)
return Connector(url)
def connect(self):
parsed = self.parsed()
# port handling slightly incorrect; no support for unix_socket yet :(
conn = pymonetdb.connect(
database=parsed.path[1:] if parsed.path else None,
hostname=parsed.hostname,
port=parsed.port or 50000,
username=parsed.username or 'monetdb',
password=parsed.password or 'monetdb',
)
return conn
def __call__(self):
return self.connect()
class PollHub:
def __init__(self, initial_state):
self._last_update = 0
self._id = uuid.uuid4().hex
self._condition = threading.Condition()
self._generation = 1
self._state = initial_state
def set_state(self, new_state, filter=lambda x:x):
now = time.time()
with self._condition:
if now - self._last_update < 60 and filter(new_state) == filter(self._state):
return
self._state = new_state
self._last_update = now
self._generation += 1
self._condition.notify_all()
def get_state(self, id=None, seen=0):
with self._condition:
if seen > self._generation:
# impossible!
seen = self._generation
while True:
if id != self._id:
break
if seen < self._generation:
break
self._condition.wait()
return self._id, self._generation, self._state