Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for mlflow in caliban shell #83

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion caliban/docker/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ def copy_command(user_id: int,
recommend using an absolute path!

"""
if from_path is None:
return ""

cmd = f"COPY --chown={user_id}:{user_group} {from_path} {to_path}\n"

if comment is not None:
Expand Down Expand Up @@ -358,6 +361,7 @@ def _package_entries(
caliban_config = caliban_config or {}

arg = package.main_module or package.script_path
arg = [arg] if arg is not None else []
package_path = package.package_path

copy_code = copy_command(
Expand All @@ -369,7 +373,7 @@ def _package_entries(

# This needs to use json so that quotes print as double quotes, not single
# quotes.
executable_s = json.dumps(package.executable + [arg])
executable_s = json.dumps(package.executable + arg)

entrypoint_code = _generate_entrypoint(
executable=executable_s,
Expand Down
16 changes: 15 additions & 1 deletion caliban/platform/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,9 +328,23 @@ def run(job_mode: c.JobMode,
if image_id is None:
image_id = b.build_image(job_mode, **build_image_kwargs)

caliban_config = build_image_kwargs.get('caliban_config', {})

base_cmd = _run_cmd(job_mode, run_args)

command = base_cmd + [image_id] + script_args
launcher_args = um.mlflow_args(
caliban_config=caliban_config,
experiment_name=um.mlflow_shell_experiment_name(),
index=-1,
tags={
um.GPU_ENABLED_TAG: str(job_mode == c.JobMode.GPU).lower(),
um.TPU_ENABLED_TAG: 'false',
um.DOCKER_IMAGE_TAG: image_id,
um.PLATFORM_TAG: Platform.LOCAL.value,
},
)

command = base_cmd + [image_id] + launcher_args + script_args

logging.info("Running command: {}".format(' '.join(command)))
subprocess.call(command)
Expand Down
9 changes: 7 additions & 2 deletions caliban/platform/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import caliban.config as c
import caliban.docker.build as b
import caliban.platform.run as r
import caliban.util as u


def _home_mount_cmds(enable_home_mount: bool) -> List[str]:
Expand Down Expand Up @@ -99,9 +100,13 @@ def run_interactive(job_mode: c.JobMode,
if entrypoint is None:
entrypoint = b.SHELL_DICT[shell].executable

build_image_kwargs['package'] = u.Package(executable=[entrypoint],
script_path=None,
main_module=None,
package_path=None)

interactive_run_args = _interactive_opts(workdir) + [
"-it", \
"--entrypoint", entrypoint
"-it"
] + _home_mount_cmds(mount_home) + run_args

r.run(job_mode=job_mode,
Expand Down
12 changes: 10 additions & 2 deletions caliban/util/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def mlflow_args(

caliban_config: caliban configuration dict
experiment: experiment object
index: job index
index: job index, if < 0, then no run name is generated
tags: dictionary of tags to pass to mlflow

Returns:
Expand All @@ -188,6 +188,14 @@ def mlflow_args(

env = {f'ENVVAR_{k}': v for k, v in tags.items()}
env['MLFLOW_EXPERIMENT_NAME'] = experiment_name
env['MLFLOW_RUN_NAME'] = _mlflow_job_name(index=index)

if index >= 0:
env['MLFLOW_RUN_NAME'] = _mlflow_job_name(index=index)

return ['--caliban_config', json.dumps({'env': env})]


def mlflow_shell_experiment_name() -> str:
'''generates an experiment name for a caliban shell session'''
timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
return f'{u.current_user()}-shell-{timestamp}'
6 changes: 6 additions & 0 deletions tests/caliban/util/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,9 @@ def test_launcher_config_file():

# make sure we clean up appropriately
assert not os.path.exists(cfg_fname)


def test_mlflow_shell_experiment_name():
experiment_name = um.mlflow_shell_experiment_name()
assert experiment_name is not None
assert len(experiment_name) > 0