Skip to content

Commit

Permalink
Some tiny fixes and improvements for kube backend + dask clusters.
Browse files Browse the repository at this point in the history
  • Loading branch information
JSKenyon committed Apr 24, 2024
1 parent adc89f0 commit bb4a512
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 5 deletions.
11 changes: 9 additions & 2 deletions stimela/backends/kube/daskjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ def split_args(args):
def daskjob_template(args):
labels = dict(args.labels)

env_var = [{"name": k, "value": v} for k, v in args.environment_variables.items()]

return {
"apiVersion": "kubernetes.dask.org/v1",
"kind": "DaskJob",
Expand Down Expand Up @@ -133,7 +135,8 @@ def daskjob_template(args):
{
"name": "SCHEDULER_ENV",
"value": "hello-world",
}
},
*env_var
],
"image": args.image,
"imagePullPolicy": args.pull_policy,
Expand Down Expand Up @@ -186,7 +189,11 @@ def daskjob_template(args):
"$(DASK_SCHEDULER_ADDRESS)",
],
"env": [
{"name": "WORKER_ENV", "value": "hello-world"}
{
"name": "WORKER_ENV",
"value": "hello-world"
},
*env_var
],
"image": args.image,
"imagePullPolicy": args.pull_policy,
Expand Down
2 changes: 1 addition & 1 deletion stimela/backends/kube/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ def _delete_pod(kube_api, podname, namespace, log, warn_not_found=True):
log.info(f"deleting pod {podname}")
try:
resp = kube_api.delete_namespaced_pod(name=podname, namespace=namespace)
log.debug(f"delete_namespaced_pod({podname}): {resp}")
except ApiException as exc:
body = json.loads(exc.body)
if "reason" in body and body["reason"] == "NotFound" and warn_not_found:
log.warning(f"pod {podname} not found, this is probably OK, perhaps it just died on its own")
else:
log_exception(BackendError(f"k8s API error while deleting pod {podname}", (exc, body)),
severity="error", log=log)
log.debug(f"delete_namespaced_pod({podname}): {resp}")

def cleanup(backend: StimelaBackendOptions, log: logging.Logger):
return init(backend, log, cleanup=True)
Expand Down
4 changes: 2 additions & 2 deletions stimela/backends/kube/pod_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def session_init_container(self):
if self._session_init_container is None:
self._session_init_container = dict(
name="volume-session-init",
image="busybox",
image="quay.io/quay/busybox",
command=["/bin/sh", "-c", ""],
volumeMounts=[])
self.pod_spec.setdefault('initContainers', []).append(self._session_init_container)
Expand All @@ -129,7 +129,7 @@ def step_init_container(self):
if self._step_init_container is None:
self._step_init_container = dict(
name="volume-step-init",
image="busybox",
image="quay.io/quay/busybox",
command=["/bin/sh", "-c", ""],
securityContext=dict(
runAsNonRoot = self.uinfo.uid!=0,
Expand Down
1 change: 1 addition & 0 deletions stimela/backends/kube/run_kube.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def dprint(level, *args, **kw):
# cmdline=["/bin/sh", "-c", "while true;do date;sleep 5; done"],
service_account=kube.service_account,
mount_file=None,
environment_variables=kube.env
)))

# apply pod type specifications
Expand Down

0 comments on commit bb4a512

Please sign in to comment.