Skip to content

Commit

Permalink
Use 'system' account for Isolate/CIPD calls from Swarming bot.
Browse files Browse the repository at this point in the history
Switch to 'task' account before launching user-provided code.

[email protected], [email protected]
BUG=730878

Review-Url: https://codereview.chromium.org/2982113002
  • Loading branch information
vadimsht authored and Commit Bot committed Jul 25, 2017
1 parent 3c3ae08 commit 8d5dcf9
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 30 deletions.
23 changes: 14 additions & 9 deletions appengine/swarming/swarming_bot/bot_code/bot_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,10 @@ def start(self):
contains its parameters. It can be placed into LUCI_CONTEXT['local_auth']
slot.
By default LUCI subprocesses will be using "task" service account (or none
at all, it the task has no associated service account). Internal Swarming
processes (like run_isolated.py) can switch to using "system" account.
Sets default service account (to be used by Swarming internal processes,
like run_isolated.py) to 'system' (or unsets it if the bot has no associated
service account). run_isolated.py eventually switches the default account to
'task' before launching the actual user-supplied code.
If task is not using service accounts, returns None (meaning, there's no
need to setup LUCI_CONTEXT['local_auth'] at all).
Expand Down Expand Up @@ -252,18 +253,22 @@ def start(self):

# Expose all defined accounts (if any) to subprocesses via LUCI_CONTEXT.
#
# Use 'task' account as default for everything. Internal Swarming processes
# will pro-actively switch to 'system'.
# Use 'system' logical account as default for internal Swarming processes.
# It is specified by 'system_service_account' field in bots.cfg. Swarming
# will eventually switch to 'task' logical account before launching
# user-supplied code. 'task' account is specified in the task definition.
# This happens in run_isolated.py.
#
# If 'task' is not defined, then do not set default account at all! It means
# processes will use non-authenticated calls by default (which is precisely
# the meaning of un-set task account).
# If 'system_service_account' is not defined, then do not set default
# account at all! It means internal Swarming processes will use
# non-authenticated calls (which is precisely the meaning of un-set
# system account).
default_account_id = None
available_accounts = []
if params.system_service_account != 'none':
default_account_id = 'system'
available_accounts.append('system')
if params.task_service_account != 'none':
default_account_id = 'task'
available_accounts.append('task')

# If using service accounts, launch local HTTP server that serves tokens
Expand Down
23 changes: 11 additions & 12 deletions appengine/swarming/swarming_bot/bot_code/bot_auth_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,12 @@ def test_task_as_bot(self):
swarming_http_headers_exp=exp,
system_service_account='none',
task_service_account='bot'))
self.assertEqual(
['accounts', 'default_account_id', 'rpc_port', 'secret'],
sorted(local_auth_ctx))
# Note: default_account_id is omitted when it is None.
self.assertEqual(['accounts', 'rpc_port', 'secret'], sorted(local_auth_ctx))

# Only 'task' account is defined (no 'system'). It is also default.
# Only 'task' account is defined (no 'system'). And there's NO default.
self.assertEqual([{'id': 'task'}], local_auth_ctx['accounts'])
self.assertEqual('task', local_auth_ctx['default_account_id'])
self.assertFalse(local_auth_ctx.get('default_account_id'))

# Try to use the local RPC service to grab a 'task' token. Should return
# the token specified by 'swarming_http_headers'.
Expand All @@ -136,13 +135,13 @@ def test_system_as_bot(self):
swarming_http_headers_exp=exp,
system_service_account='bot',
task_service_account='none'))
# Note: default_account_id is omitted when it is None.
self.assertEqual(['accounts', 'rpc_port', 'secret'], sorted(local_auth_ctx))
self.assertEqual(
['accounts', 'default_account_id', 'rpc_port', 'secret'],
sorted(local_auth_ctx))

# Only 'system' account is defined (no 'task'). And there's NO default
# account at all.
# Only 'system' account is defined (no 'task'), and it is default.
self.assertEqual([{'id': 'system'}], local_auth_ctx['accounts'])
self.assertFalse(local_auth_ctx.get('default_account_id'))
self.assertEqual('system', local_auth_ctx['default_account_id'])

# Try to use the local RPC service to grab a 'system' token. Should return
# the token specified by 'swarming_http_headers'.
Expand Down Expand Up @@ -172,10 +171,10 @@ def test_system_and_task_as_bot(self):
['accounts', 'default_account_id', 'rpc_port', 'secret'],
sorted(local_auth_ctx))

# Both are defined, 'task' is default.
# Both are defined, 'system' is default.
self.assertEqual(
[{'id': 'system'}, {'id': 'task'}], local_auth_ctx['accounts'])
self.assertEqual('task', local_auth_ctx.get('default_account_id'))
self.assertEqual('system', local_auth_ctx.get('default_account_id'))

# Both 'system' and 'task' tokens work.
for account_id in ('system', 'task'):
Expand Down
2 changes: 2 additions & 0 deletions appengine/swarming/swarming_bot/bot_code/task_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ def get_isolated_args(is_grpc, work_dir, task_details, isolated_result,

cmd.extend(
[
# Switch to 'task' logical account, if it is set.
'--switch-to-account', 'task',
# Cleanup has been run at bot startup in bot_main.py.
'--no-clean',
# https://github.com/luci/luci-py/issues/270
Expand Down
84 changes: 75 additions & 9 deletions client/run_isolated.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
on_before_task() hook in the swarming server's custom bot_config.py.
"""

__version__ = '0.9.1'
__version__ = '0.9.2'

import argparse
import base64
Expand All @@ -52,6 +52,8 @@
from utils import tools
from utils import zip_package

from libs import luci_context

import auth
import cipd
import isolateserver
Expand Down Expand Up @@ -198,6 +200,54 @@ def change_tree_read_only(rootdir, read_only):
(rootdir, read_only, read_only))


@contextlib.contextmanager
def set_luci_context_account(account, tmp_dir):
"""Sets LUCI_CONTEXT account to be used by the task.
If 'account' is None or '', does nothing at all. This happens when
run_isolated.py is called without '--switch-to-account' flag. In this case,
if run_isolated.py is running in some LUCI_CONTEXT environment, the task will
just inherit whatever account is already set. This may happen is users invoke
run_isolated.py explicitly from their code.
If the requested account is not defined in the context, switches to
non-authenticated access. This happens for Swarming tasks that don't use
'task' service accounts.
If not using LUCI_CONTEXT-based auth, does nothing.
If already running as requested account, does nothing.
"""
if not account:
# Not actually switching.
yield
return

local_auth = luci_context.read('local_auth')
if not local_auth:
# Not using LUCI_CONTEXT auth at all.
yield
return

# See LUCI_CONTEXT.md for the format of 'local_auth'.
if local_auth.get('default_account_id') == account:
# Already set, no need to switch.
yield
return

available = {a['id'] for a in local_auth.get('accounts') or []}
if account in available:
logging.info('Switching default LUCI_CONTEXT account to %r', account)
local_auth['default_account_id'] = account
else:
logging.warning(
'Requested LUCI_CONTEXT account %r is not available (have only %r), '
'disabling authentication', account, sorted(available))
local_auth.pop('default_account_id', None)

with luci_context.write(_tmpdir=tmp_dir, local_auth=local_auth):
yield


def process_command(command, out_dir, bot_file):
"""Replaces variables in a command line.
Expand Down Expand Up @@ -441,7 +491,8 @@ def delete_and_upload(storage, out_dir, leak_temp_dir):
def map_and_run(
command, isolated_hash, storage, isolate_cache, outputs,
install_named_caches, leak_temp_dir, root_dir, hard_timeout, grace_period,
bot_file, install_packages_fn, use_symlinks, constant_run_path):
bot_file, switch_to_account, install_packages_fn, use_symlinks,
constant_run_path):
"""Runs a command with optional isolated input/output.
See run_tha_test for argument documentation.
Expand Down Expand Up @@ -548,9 +599,12 @@ def map_and_run(
sys.stdout.flush()
start = time.time()
try:
result['exit_code'], result['had_hard_timeout'] = run_command(
command, cwd, get_command_env(tmp_dir, cipd_info),
hard_timeout, grace_period)
# Need to switch the default account before 'get_command_env' call,
# so it can grab correct value of LUCI_CONTEXT env var.
with set_luci_context_account(switch_to_account, tmp_dir):
result['exit_code'], result['had_hard_timeout'] = run_command(
command, cwd, get_command_env(tmp_dir, cipd_info),
hard_timeout, grace_period)
finally:
result['duration'] = max(time.time() - start, 0)
except Exception as e:
Expand Down Expand Up @@ -617,7 +671,8 @@ def map_and_run(
def run_tha_test(
command, isolated_hash, storage, isolate_cache, outputs,
install_named_caches, leak_temp_dir, result_json, root_dir, hard_timeout,
grace_period, bot_file, install_packages_fn, use_symlinks):
grace_period, bot_file, switch_to_account, install_packages_fn,
use_symlinks):
"""Runs an executable and records execution metadata.
Either command or isolated_hash must be specified.
Expand All @@ -644,8 +699,10 @@ def run_tha_test(
isolate_cache: an isolateserver.LocalCache to keep from retrieving the
same objects constantly by caching the objects retrieved.
Can be on-disk or in-memory.
outputs: list of paths relative to root_dir to put into the output isolated
bundle upon task completion (see link_outputs_to_outdir).
install_named_caches: a function (run_dir) => context manager that installs
named caches into |run_dir|.
named caches into |run_dir|.
leak_temp_dir: if true, the temporary directory will be deliberately leaked
for later examination.
result_json: file path to dump result metadata into. If set, the process
Expand All @@ -655,8 +712,11 @@ def run_tha_test(
hard_timeout: kills the process if it lasts more than this amount of
seconds.
grace_period: number of seconds to wait between SIGTERM and SIGKILL.
bot_file: path to a file with bot state, used in place of
${SWARMING_BOT_FILE} task command line argument.
switch_to_account: a logical account to switch LUCI_CONTEXT into.
install_packages_fn: context manager dir => CipdInfo, see
install_client_and_packages.
install_client_and_packages.
use_symlinks: create tree with symlinks instead of hardlinks.
Returns:
Expand All @@ -677,7 +737,7 @@ def run_tha_test(
result = map_and_run(
command, isolated_hash, storage, isolate_cache, outputs,
install_named_caches, leak_temp_dir, root_dir, hard_timeout, grace_period,
bot_file, install_packages_fn, use_symlinks, True)
bot_file, switch_to_account, install_packages_fn, use_symlinks, True)
logging.info('Result:\n%s', tools.format_json(result, dense=True))

if result_json:
Expand Down Expand Up @@ -917,6 +977,10 @@ def create_option_parser():
'--bot-file',
help='Path to a file describing the state of the host. The content is '
'defined by on_before_task() in bot_config.')
parser.add_option(
'--switch-to-account',
help='If given, switches LUCI_CONTEXT to given logical service account '
'(e.g. "task" or "system") before launching the isolated process.')
parser.add_option(
'--output', action='append',
help='Specifies an output to return. If no outputs are specified, all '
Expand Down Expand Up @@ -1086,6 +1150,7 @@ def install_named_caches(run_dir):
options.hard_timeout,
options.grace_period,
options.bot_file,
options.switch_to_account,
install_packages_fn,
options.use_symlinks)
return run_tha_test(
Expand All @@ -1101,6 +1166,7 @@ def install_named_caches(run_dir):
options.hard_timeout,
options.grace_period,
options.bot_file,
options.switch_to_account,
install_packages_fn,
options.use_symlinks)
except (cipd.Error, named_cache.Error) as ex:
Expand Down
Loading

0 comments on commit 8d5dcf9

Please sign in to comment.