forked from vsrinivas/fuchsia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjob_dispatcher.cc
626 lines (515 loc) · 18.2 KB
/
job_dispatcher.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
// Copyright 2016 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include "object/job_dispatcher.h"
#include <inttypes.h>
#include <lib/counters.h>
#include <platform.h>
#include <zircon/errors.h>
#include <zircon/rights.h>
#include <zircon/types.h>
#include <zircon/syscalls/policy.h>
#include <fbl/alloc_checker.h>
#include <fbl/array.h>
#include <fbl/auto_lock.h>
#include <fbl/inline_array.h>
#include <kernel/mutex.h>
#include <ktl/algorithm.h>
#include <object/process_dispatcher.h>
KCOUNTER(dispatcher_job_create_count, "dispatcher.job.create")
KCOUNTER(dispatcher_job_destroy_count, "dispatcher.job.destroy")
// The starting max_height value of the root job.
static constexpr uint32_t kRootJobMaxHeight = 32;
static constexpr char kRootJobName[] = "root";
template <>
uint64_t JobDispatcher::ChildCountLocked<JobDispatcher>() const {
return jobs_.size();
}
template <>
uint64_t JobDispatcher::ChildCountLocked<ProcessDispatcher>() const {
return procs_.size();
}
// To come up with an order on our recursive locks we take advantage of the fact that our
// max_height reduces from parent to child. As we acquire locks from parent->child we can build an
// increasing counter by inverting the max_height. We add 1 to the counter just so the order value
// of 0 is reserved for the default order when the lock is acquired without an order being
// specified.
uint32_t JobDispatcher::LockOrder() const { return kRootJobMaxHeight - max_height() + 1; }
// Calls the provided |zx_status_t func(fbl::RefPtr<DISPATCHER_TYPE>)|
// function on all live elements of |children|, which must be one of |jobs_|
// or |procs_|. Stops iterating early if |func| returns a value other than
// ZX_OK, returning that value from this method. |lock_| must be held when
// calling this method, and it will still be held while the callback is
// called.
//
// The returned |LiveRefsArray| needs to be destructed when |lock_| is not
// held anymore. The recommended pattern is:
//
// LiveRefsArray refs;
// {
// Guard<Mutex> guard{get_lock()};
// refs = ForEachChildInLocked(...);
// }
//
template <typename T, typename Fn>
JobDispatcher::LiveRefsArray JobDispatcher::ForEachChildInLocked(T& children, zx_status_t* result,
Fn func) {
// Convert child raw pointers into RefPtrs. This is tricky and requires
// special logic on the RefPtr class to handle a ref count that can be
// zero.
//
// The main requirement is that |lock_| is both controlling child
// list lookup and also making sure that the child destructor cannot
// make progress when doing so. In other words, when inspecting the
// |children| list we can be sure that a given child process or child
// job is either
// - alive, with refcount > 0
// - in destruction process but blocked, refcount == 0
const uint64_t count = ChildCountLocked<typename T::ValueType>();
if (!count) {
*result = ZX_OK;
return LiveRefsArray();
}
fbl::AllocChecker ac;
LiveRefsArray refs(new (&ac) fbl::RefPtr<Dispatcher>[count], count);
if (!ac.check()) {
*result = ZX_ERR_NO_MEMORY;
return LiveRefsArray();
}
size_t ix = 0;
for (auto& craw : children) {
auto cref = ::fbl::MakeRefPtrUpgradeFromRaw(&craw, get_lock());
if (!cref)
continue;
*result = func(cref);
// |cref| might be the last reference at this point. If so,
// when we drop it in the next iteration the object dtor
// would be called here with the |get_lock()| held. To avoid that
// we keep the reference alive in the |refs| array and pass
// the responsibility of releasing them outside the lock to
// the caller.
refs[ix++] = ktl::move(cref);
if (*result != ZX_OK)
break;
}
return refs;
}
fbl::RefPtr<JobDispatcher> JobDispatcher::CreateRootJob() {
fbl::AllocChecker ac;
auto job = fbl::AdoptRef(new (&ac) JobDispatcher(0u, nullptr, JobPolicy::CreateRootPolicy()));
if (!ac.check()) {
panic("root-job: failed to allocate\n");
}
job->set_name(kRootJobName, sizeof(kRootJobName));
return job;
}
zx_status_t JobDispatcher::Create(uint32_t flags, const fbl::RefPtr<JobDispatcher>& parent,
KernelHandle<JobDispatcher>* handle, zx_rights_t* rights) {
if (parent != nullptr && parent->max_height() == 0) {
// The parent job cannot have children.
return ZX_ERR_OUT_OF_RANGE;
}
fbl::AllocChecker ac;
KernelHandle new_handle(
fbl::AdoptRef(new (&ac) JobDispatcher(flags, parent, parent->GetPolicy())));
if (!ac.check())
return ZX_ERR_NO_MEMORY;
if (!parent->AddChildJob(new_handle.dispatcher())) {
return ZX_ERR_BAD_STATE;
}
*rights = default_rights();
*handle = ktl::move(new_handle);
return ZX_OK;
}
JobDispatcher::JobDispatcher(uint32_t /*flags*/, fbl::RefPtr<JobDispatcher> parent,
JobPolicy policy)
: SoloDispatcher(ZX_JOB_NO_PROCESSES | ZX_JOB_NO_JOBS | ZX_JOB_NO_CHILDREN),
parent_(ktl::move(parent)),
max_height_(parent_ ? parent_->max_height() - 1 : kRootJobMaxHeight),
state_(State::READY),
return_code_(0),
kill_on_oom_(false),
policy_(policy),
exceptionate_(ZX_EXCEPTION_CHANNEL_TYPE_JOB),
debug_exceptionate_(ZX_EXCEPTION_CHANNEL_TYPE_JOB_DEBUGGER) {
kcounter_add(dispatcher_job_create_count, 1);
}
JobDispatcher::~JobDispatcher() {
kcounter_add(dispatcher_job_destroy_count, 1);
RemoveFromJobTreesUnlocked();
}
zx_koid_t JobDispatcher::get_related_koid() const { return parent_ ? parent_->get_koid() : 0u; }
bool JobDispatcher::AddChildProcess(const fbl::RefPtr<ProcessDispatcher>& process) {
canary_.Assert();
Guard<Mutex> guard{get_lock()};
if (state_ != State::READY)
return false;
procs_.push_back(process.get());
UpdateSignalsLocked();
return true;
}
bool JobDispatcher::AddChildJob(const fbl::RefPtr<JobDispatcher>& job) {
canary_.Assert();
Guard<Mutex> guard{get_lock()};
if (state_ != State::READY)
return false;
// Put the new job after our next-youngest child, or us if we have none.
//
// We try to make older jobs closer to the root (both hierarchically and
// temporally) show up earlier in enumeration.
JobDispatcher* neighbor = (jobs_.is_empty() ? this : &jobs_.back());
// This can only be called once, the job should not already be part
// of any job tree.
DEBUG_ASSERT(!fbl::InContainer<JobDispatcher::RawListTag>(*job));
DEBUG_ASSERT(neighbor != job.get());
jobs_.push_back(job.get());
UpdateSignalsLocked();
return true;
}
void JobDispatcher::RemoveChildProcess(ProcessDispatcher* process) {
canary_.Assert();
bool should_die = false;
{
Guard<Mutex> guard{get_lock()};
// The process dispatcher can call us in its destructor, Kill(),
// or RemoveThread().
if (!fbl::InContainer<ProcessDispatcher::RawJobListTag>(*process)) {
return;
}
procs_.erase(*process);
UpdateSignalsLocked();
should_die = IsReadyForDeadTransitionLocked();
// Aggregate runtime stats from exiting process.
aggregated_runtime_stats_.Add(process->GetAggregatedRuntime());
}
if (should_die)
FinishDeadTransitionUnlocked();
}
void JobDispatcher::RemoveChildJob(JobDispatcher* job) {
canary_.Assert();
bool should_die = false;
{
Guard<Mutex> guard{get_lock()};
if (!fbl::InContainer<JobDispatcher::RawListTag>(*job)) {
return;
}
jobs_.erase(*job);
jobs_.size();
UpdateSignalsLocked();
should_die = IsReadyForDeadTransitionLocked();
}
if (should_die)
FinishDeadTransitionUnlocked();
}
JobDispatcher::State JobDispatcher::GetState() const {
Guard<Mutex> guard{get_lock()};
return state_;
}
void JobDispatcher::RemoveFromJobTreesUnlocked() {
canary_.Assert();
if (parent_)
parent_->RemoveChildJob(this);
}
bool JobDispatcher::IsReadyForDeadTransitionLocked() {
canary_.Assert();
return state_ == State::KILLING && jobs_.is_empty() && procs_.is_empty();
}
void JobDispatcher::FinishDeadTransitionUnlocked() {
canary_.Assert();
// Make sure we're killing from the bottom of the tree up or else parent
// jobs could die before their children.
//
// In particular, this means we have to finish dying before leaving the job
// trees, since the last child leaving the tree can trigger its parent to
// finish dying.
DEBUG_ASSERT(!parent_ || (parent_->GetState() != State::DEAD));
{
Guard<Mutex> guard{get_lock()};
state_ = State::DEAD;
exceptionate_.Shutdown();
debug_exceptionate_.Shutdown();
UpdateStateLocked(0u, ZX_JOB_TERMINATED);
}
RemoveFromJobTreesUnlocked();
}
void JobDispatcher::UpdateSignalsLocked() {
// Clear all signals, and mark the appropriate ones active.
//
// The active signals take precedence over the clear signals.
zx_signals_t clear = (ZX_JOB_NO_JOBS | ZX_JOB_NO_PROCESSES | ZX_JOB_NO_CHILDREN);
// Removing jobs or processes.
zx_signals_t set = 0u;
if (procs_.is_empty()) {
set |= ZX_JOB_NO_PROCESSES;
}
if (jobs_.is_empty()) {
set |= ZX_JOB_NO_JOBS;
}
if (jobs_.is_empty() && procs_.is_empty()) {
set |= ZX_JOB_NO_CHILDREN;
}
UpdateStateLocked(clear, set);
}
JobPolicy JobDispatcher::GetPolicy() const {
Guard<Mutex> guard{get_lock()};
return policy_;
}
bool JobDispatcher::KillJobWithKillOnOOM() {
// Get list of jobs with kill bit set.
OOMBitJobArray oom_jobs;
int count = 0;
CollectJobsWithOOMBit(&oom_jobs, &count);
if (count == 0) {
printf("OOM: no jobs with kill_on_oom found\n");
return false;
}
// Sort by max height.
ktl::stable_sort(oom_jobs.begin(), oom_jobs.begin() + count,
[](const fbl::RefPtr<JobDispatcher>& a, const fbl::RefPtr<JobDispatcher>& b) {
return a->max_height() < b->max_height();
});
// Kill lowest to highest until we find something to kill.
for (int i = count - 1; i >= 0; --i) {
auto& job = oom_jobs[i];
if (job->Kill(ZX_TASK_RETCODE_OOM_KILL)) {
char name[ZX_MAX_NAME_LEN];
job->get_name(name);
printf("OOM: killing %" PRIu64 " '%s'\n", job->get_koid(), name);
return true;
}
}
printf("OOM: no job found to kill\n");
return false;
}
void JobDispatcher::CollectJobsWithOOMBit(OOMBitJobArray* into, int* count) {
// As CollectJobsWithOOMBit will recurse we need to give a lock order to the guard.
Guard<Mutex> guard{&lock_, LockOrder()};
// We had to take the guard directly on lock_ above as the get_lock() virtual method erases the
// Nestasble type information. The AssertHeld here allows us to restore the clang capability
// analysis.
AssertHeld(*get_lock());
if (kill_on_oom_) {
if (*count >= static_cast<int>(into->size())) {
printf("OOM: skipping some jobs, exceeded max count\n");
return;
}
auto cref = ::fbl::MakeRefPtrUpgradeFromRaw(this, get_lock());
if (!cref)
return;
(*into)[*count] = ktl::move(cref);
*count += 1;
}
for (auto& job : jobs_) {
job.CollectJobsWithOOMBit(into, count);
}
}
bool JobDispatcher::Kill(int64_t return_code) {
canary_.Assert();
JobList jobs_to_kill;
ProcessList procs_to_kill;
LiveRefsArray jobs_refs;
LiveRefsArray proc_refs;
bool should_die = false;
{
Guard<Mutex> guard{get_lock()};
if (state_ != State::READY)
return false;
return_code_ = return_code;
state_ = State::KILLING;
zx_status_t result;
// Safely gather refs to the children.
jobs_refs = ForEachChildInLocked(jobs_, &result, [&](fbl::RefPtr<JobDispatcher> job) {
jobs_to_kill.push_front(ktl::move(job));
return ZX_OK;
});
proc_refs = ForEachChildInLocked(procs_, &result, [&](fbl::RefPtr<ProcessDispatcher> proc) {
procs_to_kill.push_front(ktl::move(proc));
return ZX_OK;
});
should_die = IsReadyForDeadTransitionLocked();
}
if (should_die)
FinishDeadTransitionUnlocked();
// Since we kill the child jobs first we have a depth-first massacre.
while (!jobs_to_kill.is_empty()) {
// TODO(cpu): This recursive call can overflow the stack.
jobs_to_kill.pop_front()->Kill(return_code);
}
while (!procs_to_kill.is_empty()) {
procs_to_kill.pop_front()->Kill(return_code);
}
return true;
}
bool JobDispatcher::CanSetPolicy() TA_REQ(get_lock()) {
// Can't set policy when there are active processes or jobs. This constraint ensures that a
// process's policy cannot change over its lifetime. Because a process's policy cannot change,
// the risk of TOCTOU bugs is reduced and we are free to apply policy at the ProcessDispatcher
// without having to walk up the tree to its containing job.
if (!procs_.is_empty() || !jobs_.is_empty()) {
return false;
}
return true;
}
zx_status_t JobDispatcher::SetBasicPolicy(uint32_t mode, const zx_policy_basic_v1_t* in_policy,
size_t policy_count) {
fbl::AllocChecker ac;
fbl::InlineArray<zx_policy_basic_v2_t, kPolicyBasicInlineCount> policy(&ac, policy_count);
if (!ac.check()) {
return ZX_ERR_NO_MEMORY;
}
for (size_t ix = 0; ix != policy.size(); ++ix) {
policy[ix].condition = in_policy[ix].condition;
policy[ix].action = in_policy[ix].policy;
policy[ix].flags = ZX_POL_OVERRIDE_DENY;
}
return SetBasicPolicy(mode, policy.get(), policy.size());
}
zx_status_t JobDispatcher::SetBasicPolicy(uint32_t mode, const zx_policy_basic_v2_t* in_policy,
size_t policy_count) {
Guard<Mutex> guard{get_lock()};
if (!CanSetPolicy()) {
return ZX_ERR_BAD_STATE;
}
return policy_.AddBasicPolicy(mode, in_policy, policy_count);
}
zx_status_t JobDispatcher::SetTimerSlackPolicy(const zx_policy_timer_slack& policy) {
Guard<Mutex> guard{get_lock()};
if (!CanSetPolicy()) {
return ZX_ERR_BAD_STATE;
}
// Is the policy valid?
if (policy.min_slack < 0) {
return ZX_ERR_INVALID_ARGS;
}
slack_mode new_mode;
switch (policy.default_mode) {
case ZX_TIMER_SLACK_CENTER:
new_mode = TIMER_SLACK_CENTER;
break;
case ZX_TIMER_SLACK_EARLY:
new_mode = TIMER_SLACK_EARLY;
break;
case ZX_TIMER_SLACK_LATE:
new_mode = TIMER_SLACK_LATE;
break;
default:
return ZX_ERR_INVALID_ARGS;
};
const TimerSlack old_slack = policy_.GetTimerSlack();
const zx_duration_t new_amount = ktl::max(old_slack.amount(), policy.min_slack);
const TimerSlack new_slack(new_amount, new_mode);
policy_.SetTimerSlack(new_slack);
return ZX_OK;
}
bool JobDispatcher::EnumerateChildren(JobEnumerator* je, bool recurse) {
canary_.Assert();
LiveRefsArray jobs_refs;
LiveRefsArray proc_refs;
zx_status_t result = ZX_OK;
{
// As EnumerateChildren will recurse we need to give a lock order to the guard.
Guard<Mutex> guard{&lock_, LockOrder()};
// We had to take the guard directly on lock_ above as the get_lock() virtual method erases the
// Nestasble type information. The AssertHeld here allows us to restore the clang capability
// analysis.
AssertHeld(*get_lock());
proc_refs =
ForEachChildInLocked(procs_, &result, [&](const fbl::RefPtr<ProcessDispatcher>& proc) {
return je->OnProcess(proc.get()) ? ZX_OK : ZX_ERR_STOP;
});
if (result != ZX_OK) {
return false;
}
jobs_refs = ForEachChildInLocked(jobs_, &result, [&](const fbl::RefPtr<JobDispatcher>& job) {
if (!je->OnJob(job.get())) {
return ZX_ERR_STOP;
}
if (recurse) {
// TODO(kulakowski): This recursive call can overflow the stack.
return job->EnumerateChildren(je, /* recurse */ true) ? ZX_OK : ZX_ERR_STOP;
}
return ZX_OK;
});
}
return result == ZX_OK;
}
fbl::RefPtr<ProcessDispatcher> JobDispatcher::LookupProcessById(zx_koid_t koid) {
canary_.Assert();
LiveRefsArray proc_refs;
fbl::RefPtr<ProcessDispatcher> found_proc;
{
Guard<Mutex> guard{get_lock()};
zx_status_t result;
proc_refs = ForEachChildInLocked(procs_, &result, [&](fbl::RefPtr<ProcessDispatcher> proc) {
if (proc->get_koid() == koid) {
found_proc = ktl::move(proc);
return ZX_ERR_STOP;
}
return ZX_OK;
});
}
return found_proc; // Null if not found.
}
fbl::RefPtr<JobDispatcher> JobDispatcher::LookupJobById(zx_koid_t koid) {
canary_.Assert();
LiveRefsArray jobs_refs;
fbl::RefPtr<JobDispatcher> found_job;
{
Guard<Mutex> guard{get_lock()};
zx_status_t result;
jobs_refs = ForEachChildInLocked(jobs_, &result, [&](fbl::RefPtr<JobDispatcher> job) {
if (job->get_koid() == koid) {
found_job = ktl::move(job);
return ZX_ERR_STOP;
}
return ZX_OK;
});
}
return found_job; // Null if not found.
}
void JobDispatcher::get_name(char out_name[ZX_MAX_NAME_LEN]) const {
canary_.Assert();
name_.get(ZX_MAX_NAME_LEN, out_name);
}
zx_status_t JobDispatcher::set_name(const char* name, size_t len) {
canary_.Assert();
return name_.set(name, len);
}
Exceptionate* JobDispatcher::exceptionate(Exceptionate::Type type) {
canary_.Assert();
return type == Exceptionate::Type::kDebug ? &debug_exceptionate_ : &exceptionate_;
}
void JobDispatcher::set_kill_on_oom(bool value) {
Guard<Mutex> guard{get_lock()};
kill_on_oom_ = value;
}
bool JobDispatcher::get_kill_on_oom() const {
Guard<Mutex> guard{get_lock()};
return kill_on_oom_;
}
void JobDispatcher::GetInfo(zx_info_job_t* info) const {
canary_.Assert();
Guard<Mutex> guard{get_lock()};
info->return_code = return_code_;
info->exited = (state_ == State::DEAD) || (state_ == State::KILLING);
info->kill_on_oom = kill_on_oom_;
info->debugger_attached = debug_exceptionate_.HasValidChannel();
}
zx_status_t JobDispatcher::AccumulateRuntimeTo(zx_info_task_runtime_t* info) const {
canary_.Assert();
Guard<Mutex> guard{get_lock()};
aggregated_runtime_stats_.AccumulateRuntimeTo(info);
// At this point, the process in question may be in its destructor waiting to acquire the lock and
// remove itself from this job, but its aggregated runtime is not yet part of this job's data.
//
// AccumulateRuntimeTo must be safe to be called even when the process is in its destructor.
for (const auto& proc : procs_) {
zx_status_t err = proc.AccumulateRuntimeTo(info);
if (err != ZX_OK) {
return err;
}
}
return ZX_OK;
}