From 1fba31db2b926754a0b99ef58bf0857502d2a1a5 Mon Sep 17 00:00:00 2001 From: Ambrose Slone Date: Wed, 12 Aug 2020 09:13:57 -0700 Subject: [PATCH 1/2] added support for mlflow to caliban shell --- caliban/docker/build.py | 6 +++++- caliban/platform/cloud/core.py | 1 + caliban/platform/gke/cluster.py | 1 + caliban/platform/run.py | 17 ++++++++++++++++- caliban/platform/shell.py | 9 +++++++-- caliban/util/metrics.py | 17 +++++++++++++++-- 6 files changed, 45 insertions(+), 6 deletions(-) diff --git a/caliban/docker/build.py b/caliban/docker/build.py index 869992d..1debec9 100644 --- a/caliban/docker/build.py +++ b/caliban/docker/build.py @@ -123,6 +123,9 @@ def copy_command(user_id: int, recommend using an absolute path! """ + if from_path is None: + return "" + cmd = f"COPY --chown={user_id}:{user_group} {from_path} {to_path}\n" if comment is not None: @@ -356,6 +359,7 @@ def _package_entries( caliban_config = caliban_config or {} arg = package.main_module or package.script_path + arg = [arg] if arg is not None else [] package_path = package.package_path copy_code = copy_command( @@ -367,7 +371,7 @@ def _package_entries( # This needs to use json so that quotes print as double quotes, not single # quotes. - executable_s = json.dumps(package.executable + [arg]) + executable_s = json.dumps(package.executable + arg) entrypoint_code = _generate_entrypoint( executable=executable_s, diff --git a/caliban/platform/cloud/core.py b/caliban/platform/cloud/core.py index b63169b..039dfad 100644 --- a/caliban/platform/cloud/core.py +++ b/caliban/platform/cloud/core.py @@ -436,6 +436,7 @@ def _job_specs( for idx, m in enumerate(experiments, 1): launcher_args = um.mlflow_args( + caliban_config=caliban_config, experiment_name=m.xgroup.name, index=idx, tags={ diff --git a/caliban/platform/gke/cluster.py b/caliban/platform/gke/cluster.py index c4e6ac2..a17aad9 100644 --- a/caliban/platform/gke/cluster.py +++ b/caliban/platform/gke/cluster.py @@ -710,6 +710,7 @@ def create_simple_job_spec( labels = labels or {} launcher_args = um.mlflow_args( + caliban_config=caliban_config, experiment_name=experiment.xgroup.name, index=index, tags={ diff --git a/caliban/platform/run.py b/caliban/platform/run.py index c37868f..ea9f85a 100644 --- a/caliban/platform/run.py +++ b/caliban/platform/run.py @@ -153,6 +153,7 @@ def _create_job_spec_dict( base_cmd = _run_cmd(job_mode, run_args) + terminal_cmds + [image_id] launcher_args = um.mlflow_args( + caliban_config=caliban_config, experiment_name=experiment.xgroup.name, index=index, tags={ @@ -327,9 +328,23 @@ def run(job_mode: c.JobMode, if image_id is None: image_id = b.build_image(job_mode, **build_image_kwargs) + caliban_config = build_image_kwargs.get('caliban_config', {}) + base_cmd = _run_cmd(job_mode, run_args) - command = base_cmd + [image_id] + script_args + launcher_args = um.mlflow_args( + caliban_config=caliban_config, + experiment_name=um.mlflow_shell_experiment_name(), + index=-1, + tags={ + um.GPU_ENABLED_TAG: str(job_mode == c.JobMode.GPU).lower(), + um.TPU_ENABLED_TAG: 'false', + um.DOCKER_IMAGE_TAG: image_id, + um.PLATFORM_TAG: Platform.LOCAL.value, + }, + ) + + command = base_cmd + [image_id] + launcher_args + script_args logging.info("Running command: {}".format(' '.join(command))) subprocess.call(command) diff --git a/caliban/platform/shell.py b/caliban/platform/shell.py index 26e8856..cef9eae 100644 --- a/caliban/platform/shell.py +++ b/caliban/platform/shell.py @@ -25,6 +25,7 @@ import caliban.config as c import caliban.docker.build as b import caliban.platform.run as r +import caliban.util as u def _home_mount_cmds(enable_home_mount: bool) -> List[str]: @@ -99,9 +100,13 @@ def run_interactive(job_mode: c.JobMode, if entrypoint is None: entrypoint = b.SHELL_DICT[shell].executable + build_image_kwargs['package'] = u.Package(executable=[entrypoint], + script_path=None, + main_module=None, + package_path=None) + interactive_run_args = _interactive_opts(workdir) + [ - "-it", \ - "--entrypoint", entrypoint + "-it" ] + _home_mount_cmds(mount_home) + run_args r.run(job_mode=job_mode, diff --git a/caliban/util/metrics.py b/caliban/util/metrics.py index 803e034..94d9a50 100644 --- a/caliban/util/metrics.py +++ b/caliban/util/metrics.py @@ -166,6 +166,7 @@ def _mlflow_job_name(index: int, user: str = None) -> str: def mlflow_args( + caliban_config: Dict[str, Any], experiment_name: str, index: int, tags: Dict[str, Any], @@ -173,16 +174,28 @@ def mlflow_args( '''returns mlflow args for caliban launcher Args: + caliban_config: caliban configuration dict experiment: experiment object - index: job index + index: job index, if < 0, then no run name is generated tags: dictionary of tags to pass to mlflow Returns: mlflow args list ''' + if 'mlflow_config' not in caliban_config: + return [] + env = {f'ENVVAR_{k}': v for k, v in tags.items()} env['MLFLOW_EXPERIMENT_NAME'] = experiment_name - env['MLFLOW_RUN_NAME'] = _mlflow_job_name(index=index) + + if index >= 0: + env['MLFLOW_RUN_NAME'] = _mlflow_job_name(index=index) return ['--caliban_config', json.dumps({'env': env})] + + +def mlflow_shell_experiment_name() -> str: + '''generates an experiment name for a caliban shell session''' + timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + return f'{u.current_user()}-shell-{timestamp}' From 1de728e1d729085b70283ef475fe55fe7a1f457a Mon Sep 17 00:00:00 2001 From: Ambrose Slone Date: Wed, 12 Aug 2020 14:27:48 -0700 Subject: [PATCH 2/2] added unit test for mlflow_shell_experiment_name --- tests/caliban/util/test_metrics.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/caliban/util/test_metrics.py b/tests/caliban/util/test_metrics.py index 20bb10b..3bc5590 100644 --- a/tests/caliban/util/test_metrics.py +++ b/tests/caliban/util/test_metrics.py @@ -221,3 +221,9 @@ def test_launcher_config_file(): # make sure we clean up appropriately assert not os.path.exists(cfg_fname) + + +def test_mlflow_shell_experiment_name(): + experiment_name = um.mlflow_shell_experiment_name() + assert experiment_name is not None + assert len(experiment_name) > 0