Skip to content

Commit

Permalink
moved backend.close() into end of run in prefernce to atexit
Browse files Browse the repository at this point in the history
  • Loading branch information
o-smirnov committed Sep 10, 2023
1 parent 771df8c commit 0b9c5c4
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 30 deletions.
39 changes: 17 additions & 22 deletions stimela/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from enum import Enum
from omegaconf import ListConfig, OmegaConf
from stimela.exceptions import BackendSpecificationError, BackendError
from stimela.stimelogging import log_exception
from scabha.basetypes import EmptyDictDefault

from .singularity import SingularityBackendOptions
Expand Down Expand Up @@ -96,7 +97,7 @@ def resolve_image_name(backend: StimelaBackendOptions, image: 'stimela.kitchen.C
return f"{image_name}:{version}"


def init_backends(backend_opts: StimelaBackendOptions, log: logging.Logger):
def _call_backends(backend_opts: StimelaBackendOptions, log: logging.Logger, method: str, desc: str, raise_exc: bool=True):
selected = backend_opts.select or ['native']
if type(selected) is str:
selected = [selected]
Expand All @@ -106,32 +107,26 @@ def init_backends(backend_opts: StimelaBackendOptions, log: logging.Logger):
opts = getattr(backend_opts, engine, None)
if not opts or opts.enable:
backend = get_backend(engine)
if backend:
func = backend and getattr(backend, method)
if func:
try:
backend.init(backend_opts, log)
func(backend_opts, log)
except BackendError as exc:
raise BackendError(f"error initializing {engine} backend", exc)

exc1 = BackendError(f"error {desc} {engine} backend", exc)
if raise_exc:
raise exc1 from None
else:
log_exception(exc1, log=log)

def cleanup_backends(backend_opts: StimelaBackendOptions, log: logging.Logger):
selected = backend_opts.select or ['native']
if type(selected) is str:
selected = [selected]

for engine in selected:
# check that backend has not been disabled
opts = getattr(backend_opts, engine, None)
if not opts or opts.enable:
backend = get_backend(engine)
if backend:
if hasattr(backend, 'cleanup'):
try:
backend.cleanup(backend_opts, log)
except BackendError as exc:
raise BackendError(f"error cleaning up {engine} backend", exc) from None
else:
log.info(f"nothing to clean up for {engine} backend")
def init_backends(backend_opts: StimelaBackendOptions, log: logging.Logger):
return _call_backends(backend_opts, log, "init", "initializing")

def close_backends(backend_opts: StimelaBackendOptions, log: logging.Logger):
return _call_backends(backend_opts, log, "close", "closing")

def cleanup_backends(backend_opts: StimelaBackendOptions, log: logging.Logger):
return _call_backends(backend_opts, log, "cleanup", "cleaning up")


## commenting out for now -- will need to fix when we reactive the kube backend (and have tests for it)
Expand Down
4 changes: 4 additions & 0 deletions stimela/backends/kube/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ def init(backend: 'stimela.backend.StimelaBackendOptions', log: logging.Logger):
from . import infrastructure
infrastructure.init(backend, log)

def close(backend: 'stimela.backend.StimelaBackendOptions', log: logging.Logger):
from . import infrastructure
infrastructure.close(backend, log)

def cleanup(backend: 'stimela.backend.StimelaBackendOptions', log: logging.Logger):
from . import infrastructure
infrastructure.cleanup(backend, log)
Expand Down
15 changes: 7 additions & 8 deletions stimela/backends/kube/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
# logger used for global kube messages
klog: Optional[logging.Logger] = None

initialized = True

def _delete_pod(kube_api, podname, namespace, log, warn_not_found=True):
log.info(f"deleting pod {podname}")
try:
Expand All @@ -55,7 +53,7 @@ def init(backend: StimelaBackendOptions, log: logging.Logger, cleanup: bool = Fa
if cleanup:
klog.info("cleaning up backend")
else:
atexit.register(teardown, kube)
atexit.register(close, kube, klog)
klog.info("initializing kube backend")

if cleanup or kube.infrastructure.on_startup.report_pods or kube.infrastructure.on_startup.cleanup_pods:
Expand Down Expand Up @@ -295,10 +293,10 @@ def delete_pvcs(kube: KubeBackendOptions, pvc_names, log: logging.Logger, force=
terminating_pvcs[pvc.name] = name


def teardown(kube: KubeBackendOptions):
global initialized
if not initialized:
return
def close(backend: StimelaBackendOptions, log: logging.Logger):
kube = backend.kube
klog.info("closing kube backend")

# release PVCs
delete_pvcs(kube, list(active_pvcs.keys()), log=klog, session=True, step=True, refresh=False)

Expand All @@ -325,4 +323,5 @@ def teardown(kube: KubeBackendOptions):
for podname in running_pods:
_delete_pod(kube_api, podname, kube.namespace, klog)

initialized = False
atexit.unregister(close)

4 changes: 4 additions & 0 deletions stimela/commands/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ def elapsed():
try:
outputs = outer_step.run(backend=stimela.CONFIG.opts.backend)
except Exception as exc:
stimela.backends.close_backends(backend, log)

task_stats.save_profiling_stats(outer_step.log,
print_depth=profile if profile is not None else stimela.CONFIG.opts.profile.print_depth,
unroll_loops=stimela.CONFIG.opts.profile.unroll_loops)
Expand All @@ -321,6 +323,8 @@ def elapsed():
else:
outer_step.log.info(f"run successful after {elapsed()}")

stimela.backends.close_backends(backend, log)

task_stats.save_profiling_stats(outer_step.log,
print_depth=profile if profile is not None else stimela.CONFIG.opts.profile.print_depth,
unroll_loops=stimela.CONFIG.opts.profile.unroll_loops)
Expand Down

0 comments on commit 0b9c5c4

Please sign in to comment.