diff --git a/datalad_container/containers_add.py b/datalad_container/containers_add.py index 3840e05a..024a6d40 100644 --- a/datalad_container/containers_add.py +++ b/datalad_container/containers_add.py @@ -6,8 +6,11 @@ import logging import os import os.path as op +from pathlib import ( + Path, + PurePosixPath, +) import re -import sys from shutil import copyfile from datalad.cmd import WitlessRunner @@ -22,6 +25,7 @@ from datalad.support.exceptions import InsufficientArgumentsError from datalad.interface.results import get_status_dict +from .utils import get_container_configuration from .definitions import definitions lgr = logging.getLogger("datalad.containers.containers_add") @@ -205,8 +209,8 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None, "Container names can only contain alphanumeric characters " "and '-', got: '{}'".format(name)) - cfgbasevar = "datalad.containers.{}".format(name) - if cfgbasevar + ".image" in ds.config: + container_cfg = get_container_configuration(ds, name) + if 'image' in container_cfg: if not update: yield get_status_dict( action="containers_add", ds=ds, logger=lgr, @@ -219,7 +223,7 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None, if not (url or image or call_fmt): # No updated values were provided. See if an update url is # configured (currently relevant only for Singularity Hub). - url = ds.config.get(cfgbasevar + ".updateurl") + url = container_cfg.get("updateurl") if not url: yield get_status_dict( action="containers_add", ds=ds, logger=lgr, @@ -227,8 +231,8 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None, message="No values to update specified") return - call_fmt = call_fmt or ds.config.get(cfgbasevar + ".cmdexec") - image = image or ds.config.get(cfgbasevar + ".image") + call_fmt = call_fmt or container_cfg.get("cmdexec") + image = image or container_cfg.get("image") if not image: loc_cfg_var = "datalad.containers.location" @@ -329,6 +333,7 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None, return # store configs + cfgbasevar = "datalad.containers.{}".format(name) if imgurl != url: # store originally given URL, as it resolves to something # different and maybe can be used to update the container @@ -337,7 +342,8 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None, # force store the image, and prevent multiple entries ds.config.set( "{}.image".format(cfgbasevar), - op.relpath(image, start=ds.path), + # always store a POSIX path, relative to dataset root + str(PurePosixPath(Path(image).relative_to(ds.pathobj))), force=True) if call_fmt: ds.config.set( diff --git a/datalad_container/containers_list.py b/datalad_container/containers_list.py index 38ee9b04..df71468a 100644 --- a/datalad_container/containers_list.py +++ b/datalad_container/containers_list.py @@ -19,6 +19,8 @@ from datalad.coreapi import subdatasets from datalad.ui import ui +from datalad_container.utils import get_container_configuration + lgr = logging.getLogger("datalad.containers.containers_list") @@ -77,22 +79,7 @@ def __call__(dataset=None, recursive=False, contains=None): yield c # all info is in the dataset config! - var_prefix = 'datalad.containers.' - containers = {} - for var, value in ds.config.items(): - if not var.startswith(var_prefix): - # not an interesting variable - continue - var_comps = var[len(var_prefix):].split('.') - cname = var_comps[0] - ccfgname = '.'.join(var_comps[1:]) - if not ccfgname: - continue - - cinfo = containers.get(cname, {}) - cinfo[ccfgname] = value - - containers[cname] = cinfo + containers = get_container_configuration(ds) for k, v in containers.items(): if 'image' not in v: diff --git a/datalad_container/containers_remove.py b/datalad_container/containers_remove.py index 7710a2d3..7e0e95ef 100644 --- a/datalad_container/containers_remove.py +++ b/datalad_container/containers_remove.py @@ -15,6 +15,8 @@ from datalad.support.constraints import EnsureStr from datalad.interface.results import get_status_dict +from datalad_container.utils import get_container_configuration + lgr = logging.getLogger("datalad.containers.containers_remove") @@ -24,15 +26,24 @@ class ContainersRemove(Interface): # first docstring line is used a short description in the cmdline help # the rest is put in the verbose help and manpage """Remove a known container from a dataset + + This command is only removing a container from the committed + Dataset configuration (configuration scope ``branch``). It will not + modify any other configuration scopes. + + This command is *not* dropping the container image associated with the + removed record, because it may still be needed for other dataset versions. + In order to drop the container image, use the 'drop' command prior + to removing the container configuration. """ # parameters of the command, must be exhaustive _params_ = dict( dataset=Parameter( args=("-d", "--dataset"), - doc="""specify the dataset to query. If no dataset is given, an - attempt is made to identify the dataset based on the current - working directory""", + doc="""specify the dataset from removing a container. If no dataset + is given, an attempt is made to identify the dataset based on the + current working directory""", constraints=EnsureDataset() | EnsureNone()), name=Parameter( args=("name",), @@ -42,7 +53,9 @@ class ContainersRemove(Interface): ), remove_image=Parameter( args=("-i", "--remove-image",), - doc="""if set, remove container image as well""", + doc="""if set, remove container image as well. Even with this flag, + the container image content will not be dropped. Use the 'drop' + command explicitly before removing the container configuration.""", action="store_true", ), ) @@ -59,12 +72,11 @@ def __call__(name, dataset=None, remove_image=False): action='containers_remove', logger=lgr) - section = 'datalad.containers.{}'.format(name) - imagecfg = '{}.image'.format(section) + container_cfg = get_container_configuration(ds, name) to_save = [] - if remove_image and imagecfg in ds.config: - imagepath = ds.config.get(imagecfg) + if remove_image and 'image' in container_cfg: + imagepath = container_cfg['image'] if op.lexists(op.join(ds.path, imagepath)): for r in ds.remove( path=imagepath, @@ -78,10 +90,10 @@ def __call__(name, dataset=None, remove_image=False): yield r to_save.append(imagepath) - if section in ds.config.sections(): + if container_cfg: ds.config.remove_section( - section, - where='dataset', + f'datalad.containers.{name}', + scope='branch', reload=True) res['status'] = 'ok' to_save.append(op.join('.datalad', 'config')) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index d7b4e796..e2c2c84a 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -81,6 +81,14 @@ def __call__(cmd, container_name=None, dataset=None, ds = require_dataset(dataset, check_installed=True, purpose='run a containerized command execution') + # this following block locates the target container. this involves a + # configuration look-up. This is not using + # get_container_configuration(), because it needs to account for a + # wide range of scenarios, including the installation of the dataset(s) + # that will eventually provide (the configuration) for the container. + # However, internally this is calling `containers_list()`, which is + # using get_container_configuration(), so any normalization of + # configuration on-read, get still be implemented in this helper. container = None for res in find_container_(ds, container_name): if res.get("action") == "containers": diff --git a/datalad_container/tests/test_containers.py b/datalad_container/tests/test_containers.py index 16cb58dc..f59cec68 100644 --- a/datalad_container/tests/test_containers.py +++ b/datalad_container/tests/test_containers.py @@ -29,27 +29,32 @@ from datalad_container.tests.utils import add_pyscript_image +common_kwargs = {'result_renderer': 'disabled'} + @with_tempfile def test_add_noop(path=None): - ds = Dataset(path).create() + ds = Dataset(path).create(**common_kwargs) ok_clean_git(ds.path) assert_raises(TypeError, ds.containers_add) # fails when there is no image - assert_status('error', ds.containers_add('name', on_failure='ignore')) + assert_status( + 'error', + ds.containers_add('name', on_failure='ignore', **common_kwargs)) # no config change ok_clean_git(ds.path) # place a dummy "image" file with open(op.join(ds.path, 'dummy'), 'w') as f: f.write('some') - ds.save('dummy') + ds.save('dummy', **common_kwargs) ok_clean_git(ds.path) # config will be added, as long as there is a file, even when URL access # fails res = ds.containers_add( 'broken', url='bogus-protocol://bogus-server', image='dummy', - on_failure='ignore' + on_failure='ignore', + **common_kwargs ) assert_status('ok', res) assert_result_count(res, 1, action='save', status='ok') @@ -59,12 +64,16 @@ def test_add_noop(path=None): @with_tree(tree={"foo.img": "doesn't matter 0", "bar.img": "doesn't matter 1"}) def test_add_local_path(path=None, local_file=None): - ds = Dataset(path).create() + ds = Dataset(path).create(**common_kwargs) res = ds.containers_add(name="foobert", url=op.join(local_file, "foo.img")) foo_target = op.join(path, ".datalad", "environments", "foobert", "image") assert_result_count(res, 1, status="ok", type="file", path=foo_target, action="containers_add") + # the image path configuration is always in POSIX format + assert ds.config.get('datalad.containers.foobert.image') \ + == '.datalad/environments/foobert/image' + # We've just copied and added the file. assert_not_in(ds.repo.WEB_UUID, ds.repo.whereis(foo_target)) @@ -94,12 +103,12 @@ def test_container_files(ds_path=None, local_file=None, url=None): local_file = get_local_file_url(op.join(local_file, 'some_container.img')) # prepare dataset: - ds = Dataset(ds_path).create() + ds = Dataset(ds_path).create(**common_kwargs) # non-default location: ds.config.add("datalad.containers.location", value=op.join(".datalad", "test-environments"), where='dataset') - ds.save(message="Configure container mountpoint") + ds.save(message="Configure container mountpoint", **common_kwargs) # no containers yet: res = ds.containers_list(**RAW_KWDS) @@ -108,7 +117,7 @@ def test_container_files(ds_path=None, local_file=None, url=None): # add first "image": must end up at the configured default location target_path = op.join( ds.path, ".datalad", "test-environments", "first", "image") - res = ds.containers_add(name="first", url=local_file) + res = ds.containers_add(name="first", url=local_file, **common_kwargs) ok_clean_git(ds.repo) assert_result_count(res, 1, status="ok", type="file", path=target_path, @@ -125,7 +134,7 @@ def test_container_files(ds_path=None, local_file=None, url=None): # and kill it again # but needs name assert_raises(TypeError, ds.containers_remove) - res = ds.containers_remove('first', remove_image=True) + res = ds.containers_remove('first', remove_image=True, **common_kwargs) assert_status('ok', res) assert_result_count(ds.containers_list(**RAW_KWDS), 0) # image removed @@ -143,25 +152,28 @@ def test_extra_inputs(ds_path=None): overlay2_file = 'overlay2.img' # prepare dataset: - ds = Dataset(ds_path).create(force=True) - ds.save() + ds = Dataset(ds_path).create(force=True, **common_kwargs) + ds.save(**common_kwargs) ds.containers_add( name="container", image=container_file, call_fmt="apptainer exec {img} {cmd}", + **common_kwargs ) ds.containers_add( name="container-with-overlay", image=container_file, call_fmt="apptainer exec --overlay {img_dirpath}/overlay1.img {img} {cmd}", - extra_input=[overlay1_file] + extra_input=[overlay1_file], + **common_kwargs ) ds.containers_add( name="container-with-two-overlays", image=container_file, call_fmt="apptainer exec --overlay {img_dirpath}/overlay1.img --overlay {img_dirpath}/overlay2.img:ro {img} {cmd}", - extra_input=[overlay1_file, overlay2_file] + extra_input=[overlay1_file, overlay2_file], + **common_kwargs ) res = ds.containers_list(**RAW_KWDS) @@ -181,31 +193,39 @@ def test_container_update(ds_path=None, local_file=None, url=None): url_bar = get_local_file_url(op.join(local_file, 'bar.img')) img = op.join(".datalad", "environments", "foo", "image") - ds = Dataset(ds_path).create() + ds = Dataset(ds_path).create(**common_kwargs) - ds.containers_add(name="foo", call_fmt="call-fmt1", url=url_foo) + ds.containers_add(name="foo", call_fmt="call-fmt1", url=url_foo, + **common_kwargs) # Abort without --update flag. - res = ds.containers_add(name="foo", on_failure="ignore") + res = ds.containers_add(name="foo", on_failure="ignore", + **common_kwargs) assert_result_count(res, 1, action="containers_add", status="impossible") # Abort if nothing to update is specified. - res = ds.containers_add(name="foo", update=True, on_failure="ignore") + res = ds.containers_add(name="foo", update=True, on_failure="ignore", + **common_kwargs) assert_result_count(res, 1, action="containers_add", status="impossible", message="No values to update specified") # Update call format. - ds.containers_add(name="foo", update=True, call_fmt="call-fmt2") + ds.containers_add(name="foo", update=True, call_fmt="call-fmt2", + **common_kwargs) assert_equal(ds.config.get("datalad.containers.foo.cmdexec"), "call-fmt2") ok_file_has_content(op.join(ds.path, img), "foo") # Update URL/image. - ds.drop(img) # Make sure it works even with absent content. - res = ds.containers_add(name="foo", update=True, url=url_bar) + ds.drop(img, **common_kwargs) # Make sure it works even with absent content. + res = ds.containers_add(name="foo", update=True, url=url_bar, + **common_kwargs) assert_in_results(res, action="remove", status="ok") assert_in_results(res, action="save", status="ok") ok_file_has_content(op.join(ds.path, img), "bar") + # the image path configuration is (still) always in POSIX format + assert ds.config.get('datalad.containers.foo.image') \ + == '.datalad/environments/foo/image' # Test commit message # In the above case it was updating existing image so should have "Update " @@ -213,7 +233,8 @@ def test_container_update(ds_path=None, local_file=None, url=None): assert_in("Update ", get_commit_msg()) # If we add a new image with update=True should say Configure - res = ds.containers_add(name="foo2", update=True, url=url_bar) + res = ds.containers_add(name="foo2", update=True, url=url_bar, + **common_kwargs) assert_in("Configure ", get_commit_msg()) @@ -223,16 +244,17 @@ def test_container_update(ds_path=None, local_file=None, url=None): def test_container_from_subdataset(ds_path=None, src_subds_path=None, local_file=None): # prepare a to-be subdataset with a registered container - src_subds = Dataset(src_subds_path).create() - src_subds.containers_add(name="first", - url=get_local_file_url(op.join(local_file, - 'some_container.img')) - ) + src_subds = Dataset(src_subds_path).create(**common_kwargs) + src_subds.containers_add( + name="first", + url=get_local_file_url(op.join(local_file, 'some_container.img')), + **common_kwargs + ) # add it as subdataset to a super ds: - ds = Dataset(ds_path).create() - subds = ds.install("sub", source=src_subds_path) + ds = Dataset(ds_path).create(**common_kwargs) + subds = ds.install("sub", source=src_subds_path, **common_kwargs) # add it again one level down to see actual recursion: - subds.install("subsub", source=src_subds_path) + subds.install("subsub", source=src_subds_path, **common_kwargs) # We come up empty without recursive: res = ds.containers_list(recursive=False, **RAW_KWDS) @@ -254,9 +276,9 @@ def test_container_from_subdataset(ds_path=None, src_subds_path=None, local_file ) # not installed subdataset doesn't pose an issue: - sub2 = ds.create("sub2") - assert_result_count(ds.subdatasets(), 2, type="dataset") - ds.uninstall("sub2", check=False) + sub2 = ds.create("sub2", **common_kwargs) + assert_result_count(ds.subdatasets(**common_kwargs), 2, type="dataset") + ds.uninstall("sub2", check=False, **common_kwargs) from datalad.tests.utils_pytest import assert_false assert_false(sub2.is_installed()) @@ -285,17 +307,17 @@ def test_container_from_subdataset(ds_path=None, src_subds_path=None, local_file @with_tempfile def test_list_contains(path=None): - ds = Dataset(path).create() - subds_a = ds.create("a") - subds_b = ds.create("b") - subds_a_c = subds_a.create("c") + ds = Dataset(path).create(**common_kwargs) + subds_a = ds.create("a", **common_kwargs) + subds_b = ds.create("b", **common_kwargs) + subds_a_c = subds_a.create("c", **common_kwargs) add_pyscript_image(subds_a_c, "in-c", "img") add_pyscript_image(subds_a, "in-a", "img") add_pyscript_image(subds_b, "in-b", "img") add_pyscript_image(ds, "in-top", "img") - ds.save(recursive=True) + ds.save(recursive=True, **common_kwargs) assert_result_count(ds.containers_list(recursive=True, **RAW_KWDS), 4) diff --git a/datalad_container/tests/test_run.py b/datalad_container/tests/test_run.py index 2c0bca9a..83403c82 100644 --- a/datalad_container/tests/test_run.py +++ b/datalad_container/tests/test_run.py @@ -41,40 +41,42 @@ testimg_url = 'shub://datalad/datalad-container:testhelper' +common_kwargs = {'result_renderer': 'disabled'} + @with_tree(tree={"dummy0.img": "doesn't matter 0", "dummy1.img": "doesn't matter 1"}) def test_run_mispecified(path=None): - ds = Dataset(path).create(force=True) - ds.save(path=["dummy0.img", "dummy1.img"]) + ds = Dataset(path).create(force=True, **common_kwargs) + ds.save(path=["dummy0.img", "dummy1.img"], **common_kwargs) ok_clean_git(path) # Abort if no containers exist. with assert_raises(ValueError) as cm: - ds.containers_run("doesn't matter") + ds.containers_run("doesn't matter", **common_kwargs) assert_in("No known containers", str(cm.value)) # Abort if more than one container exists but no container name is # specified. - ds.containers_add("d0", image="dummy0.img") - ds.containers_add("d1", image="dummy0.img") + ds.containers_add("d0", image="dummy0.img", **common_kwargs) + ds.containers_add("d1", image="dummy0.img", **common_kwargs) with assert_raises(ValueError) as cm: - ds.containers_run("doesn't matter") + ds.containers_run("doesn't matter", **common_kwargs) assert_in("explicitly specify container", str(cm.value)) # Abort if unknown container is specified. with assert_raises(ValueError) as cm: - ds.containers_run("doesn't matter", container_name="ghost") + ds.containers_run("doesn't matter", container_name="ghost", **common_kwargs) assert_in("Container selection impossible", str(cm.value)) @with_tree(tree={"i.img": "doesn't matter"}) def test_run_unknown_cmdexec_placeholder(path=None): ds = Dataset(path).create(force=True) - ds.containers_add("i", image="i.img", call_fmt="{youdontknowme}") + ds.containers_add("i", image="i.img", call_fmt="{youdontknowme}", **common_kwargs) assert_result_count( - ds.containers_run("doesn't matter", on_failure="ignore"), + ds.containers_run("doesn't matter", on_failure="ignore", **common_kwargs), 1, path=ds.path, action="run", @@ -95,9 +97,10 @@ def test_container_files(path=None, super_path=None): image='righthere', # the next one is auto-guessed #call_fmt='singularity exec {img} {cmd}' + **common_kwargs ) assert_result_count( - ds.containers_list(), 1, + ds.containers_list(**common_kwargs), 1, path=op.join(ds.path, 'righthere'), name='mycontainer') ok_clean_git(path) @@ -113,7 +116,7 @@ def assert_no_change(res, path): # now we can run stuff in the container # and because there is just one, we don't even have to name the container - res = ds.containers_run(cmd) + res = ds.containers_run(cmd, **common_kwargs) # container becomes an 'input' for `run` -> get request, but "notneeded" assert_result_count( res, 1, action='get', status='notneeded', @@ -122,7 +125,7 @@ def assert_no_change(res, path): # same thing as we specify the container by its name: res = ds.containers_run(cmd, - container_name='mycontainer') + container_name='mycontainer', **common_kwargs) # container becomes an 'input' for `run` -> get request, but "notneeded" assert_result_count( res, 1, action='get', status='notneeded', @@ -131,7 +134,7 @@ def assert_no_change(res, path): # we can also specify the container by its path: res = ds.containers_run(cmd, - container_name=op.join(ds.path, 'righthere')) + container_name=op.join(ds.path, 'righthere'), **common_kwargs) # container becomes an 'input' for `run` -> get request, but "notneeded" assert_result_count( res, 1, action='get', status='notneeded', @@ -141,15 +144,15 @@ def assert_no_change(res, path): # Now, test the same thing, but with this dataset being a subdataset of # another one: - super_ds = Dataset(super_path).create() - super_ds.install("sub", source=path) + super_ds = Dataset(super_path).create(**common_kwargs) + super_ds.install("sub", source=path, **common_kwargs) # When running, we don't discover containers in subdatasets with assert_raises(ValueError) as cm: - super_ds.containers_run(cmd) + super_ds.containers_run(cmd, **common_kwargs) assert_in("No known containers", str(cm.value)) # ... unless we need to specify the name - res = super_ds.containers_run(cmd, container_name="sub/mycontainer") + res = super_ds.containers_run(cmd, container_name="sub/mycontainer", **common_kwargs) # container becomes an 'input' for `run` -> get request (needed this time) assert_result_count( res, 1, action='get', status='ok', @@ -160,33 +163,34 @@ def assert_no_change(res, path): @with_tempfile @with_tree(tree={'some_container.img': "doesn't matter"}) def test_non0_exit(path=None, local_file=None): - ds = Dataset(path).create() + ds = Dataset(path).create(**common_kwargs) # plug in a proper singularity image ds.containers_add( 'mycontainer', url=get_local_file_url(op.join(local_file, 'some_container.img')), image='righthere', - call_fmt="sh -c '{cmd}'" + call_fmt="sh -c '{cmd}'", + **common_kwargs ) - ds.save() # record the effect in super-dataset + ds.save(**common_kwargs) # record the effect in super-dataset ok_clean_git(path) # now we can run stuff in the container # and because there is just one, we don't even have to name the container - ds.containers_run(['touch okfile']) + ds.containers_run(['touch okfile'], **common_kwargs) assert_repo_status(path) # Test that regular 'run' behaves as expected -- it does not proceed to save upon error with pytest.raises(IncompleteResultsError): - ds.run(['sh', '-c', 'touch nokfile && exit 1']) + ds.run(['sh', '-c', 'touch nokfile && exit 1'], **common_kwargs) assert_repo_status(path, untracked=['nokfile']) (ds.pathobj / "nokfile").unlink() # remove for the next test assert_repo_status(path) # Now test with containers-run which should behave similarly -- not save in case of error with pytest.raises(IncompleteResultsError): - ds.containers_run(['touch nokfile && exit 1']) + ds.containers_run(['touch nokfile && exit 1'], **common_kwargs) # check - must have created the file but not saved anything since failed to run! assert_repo_status(path, untracked=['nokfile']) @@ -194,8 +198,8 @@ def test_non0_exit(path=None, local_file=None): @with_tempfile @with_tree(tree={'some_container.img': "doesn't matter"}) def test_custom_call_fmt(path=None, local_file=None): - ds = Dataset(path).create() - subds = ds.create('sub') + ds = Dataset(path).create(**common_kwargs) + subds = ds.create('sub', **common_kwargs) # plug in a proper singularity image subds.containers_add( @@ -204,9 +208,10 @@ def test_custom_call_fmt(path=None, local_file=None): image='righthere', call_fmt='echo image={img} cmd={cmd} img_dspath={img_dspath} ' # and environment variable being set/propagated by default - 'name=$DATALAD_CONTAINER_NAME' + 'name=$DATALAD_CONTAINER_NAME', + **common_kwargs, ) - ds.save() # record the effect in super-dataset + ds.save(**common_kwargs) # record the effect in super-dataset # Running should work fine either within sub or within super out = WitlessRunner(cwd=subds.path).run( @@ -239,8 +244,8 @@ def test_custom_call_fmt(path=None, local_file=None): } ) def test_extra_inputs(path=None): - ds = Dataset(path).create(force=True) - subds = ds.create("sub", force=True) + ds = Dataset(path).create(force=True, **common_kwargs) + subds = ds.create("sub", force=True, **common_kwargs) subds.containers_add( "mycontainer", image="containers/container.img", @@ -250,9 +255,10 @@ def test_extra_inputs(path=None): "{img_dirpath}/../overlays/overlay2.img", "{img_dspath}/overlays/overlay3.img", ], + **common_kwargs ) - ds.save(recursive=True) # record the entire tree of files etc - ds.containers_run("XXX", container_name="sub/mycontainer") + ds.save(recursive=True, **common_kwargs) # record the entire tree of files etc + ds.containers_run("XXX", container_name="sub/mycontainer", **common_kwargs) ok_file_has_content( os.path.join(ds.repo.path, "out.log"), "image=sub/containers/container.img", @@ -273,10 +279,10 @@ def test_extra_inputs(path=None): @skip_if_no_network @with_tree(tree={"subdir": {"in": "innards"}}) def test_run_no_explicit_dataset(path=None): - ds = Dataset(path).create(force=True) - ds.save() + ds = Dataset(path).create(force=True, **common_kwargs) + ds.save(**common_kwargs) ds.containers_add("deb", url=testimg_url, - call_fmt="singularity exec {img} {cmd}") + call_fmt="singularity exec {img} {cmd}", **common_kwargs) # When no explicit dataset is given, paths are interpreted as relative to # the current working directory. @@ -285,14 +291,14 @@ def test_run_no_explicit_dataset(path=None): with chpwd(path): containers_run("cat {inputs[0]} {inputs[0]} >doubled", inputs=[op.join("subdir", "in")], - outputs=["doubled"]) + outputs=["doubled"], **common_kwargs) ok_file_has_content(op.join(path, "doubled"), "innardsinnards") # From under a subdirectory. subdir = op.join(ds.path, "subdir") with chpwd(subdir): containers_run("cat {inputs[0]} {inputs[0]} >doubled", - inputs=["in"], outputs=["doubled"]) + inputs=["in"], outputs=["doubled"], **common_kwargs) ok_file_has_content(op.join(subdir, "doubled"), "innardsinnards") @@ -316,16 +322,16 @@ def test_run_subdataset_install(path=None): # `-- d/ # `-- d2/ # `-- img - ds_src_a = ds_src.create("a") - ds_src_a2 = ds_src_a.create("a2") - ds_src_b = Dataset(ds_src.pathobj / "b").create() - ds_src_b2 = ds_src_b.create("b2") - ds_src_c = ds_src.create("c") - ds_src_c2 = ds_src_c.create("c2") - ds_src_d = Dataset(ds_src.pathobj / "d").create() - ds_src_d2 = ds_src_d.create("d2") + ds_src_a = ds_src.create("a", **common_kwargs) + ds_src_a2 = ds_src_a.create("a2", **common_kwargs) + ds_src_b = Dataset(ds_src.pathobj / "b").create(**common_kwargs) + ds_src_b2 = ds_src_b.create("b2", **common_kwargs) + ds_src_c = ds_src.create("c", **common_kwargs) + ds_src_c2 = ds_src_c.create("c2", **common_kwargs) + ds_src_d = Dataset(ds_src.pathobj / "d").create(**common_kwargs) + ds_src_d2 = ds_src_d.create("d2", **common_kwargs) - ds_src.save() + ds_src.save(**common_kwargs) add_pyscript_image(ds_src_a, "in-a", "img") add_pyscript_image(ds_src_a2, "in-a2", "img") @@ -333,9 +339,9 @@ def test_run_subdataset_install(path=None): add_pyscript_image(ds_src_c2, "in-c2", "img") add_pyscript_image(ds_src_d2, "in-d2", "img") - ds_src.save(recursive=True) + ds_src.save(recursive=True, **common_kwargs) - ds_dest = clone(ds_src.path, str(path / "dest")) + ds_dest = clone(ds_src.path, str(path / "dest"), **common_kwargs) ds_dest_a2 = Dataset(ds_dest.pathobj / "a" / "a2") ds_dest_b2 = Dataset(ds_dest.pathobj / "b" / "b2") ds_dest_c2 = Dataset(ds_dest.pathobj / "c" / "c2") @@ -346,7 +352,7 @@ def test_run_subdataset_install(path=None): assert_false(ds_dest_d2.is_installed()) # Needed subdatasets are installed if container name is given... - res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2") + res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2", **common_kwargs) assert_result_count( res, 1, action="install", status="ok", path=ds_dest_a2.path) assert_result_count( @@ -354,7 +360,7 @@ def test_run_subdataset_install(path=None): path=str(ds_dest_a2.pathobj / "img")) ok_(ds_dest_a2.is_installed()) # ... even if the name and path do not match. - res = ds_dest.containers_run(["arg"], container_name="b/b2/in-b2") + res = ds_dest.containers_run(["arg"], container_name="b/b2/in-b2", **common_kwargs) assert_result_count( res, 1, action="install", status="ok", path=ds_dest_b2.path) assert_result_count( @@ -362,15 +368,60 @@ def test_run_subdataset_install(path=None): path=str(ds_dest_b2.pathobj / "img")) ok_(ds_dest_b2.is_installed()) # Subdatasets will also be installed if given an image path... - res = ds_dest.containers_run(["arg"], container_name=str(Path("c/c2/img"))) + res = ds_dest.containers_run(["arg"], container_name=str(Path("c/c2/img")), **common_kwargs) assert_result_count( res, 1, action="install", status="ok", path=ds_dest_c2.path) assert_result_count( res, 1, action="get", status="ok", path=str(ds_dest_c2.pathobj / "img")) ok_(ds_dest_c2.is_installed()) - ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img"))) + ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img")), **common_kwargs) # There's no install record if subdataset is already present. - res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2") + res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2", **common_kwargs) assert_not_in_results(res, action="install") + + +@skip_if_no_network +def test_nonPOSIX_imagepath(tmp_path): + ds = Dataset(tmp_path).create(**common_kwargs) + + # plug in a proper singularity image + ds.containers_add( + 'mycontainer', + url=testimg_url, + **common_kwargs + ) + assert_result_count( + ds.containers_list(**common_kwargs), 1, + # we get a report in platform conventions + path=str(ds.pathobj / '.datalad' / 'environments' / + 'mycontainer' / 'image'), + name='mycontainer') + assert_repo_status(tmp_path) + + # now reconfigure the image path to look as if a version <= 1.2.4 + # configured it on windows + # this must still run across all platforms + ds.config.set( + 'datalad.containers.mycontainer.image', + '.datalad\\environments\\mycontainer\\image', + scope='branch', + reload=True, + ) + ds.save(**common_kwargs) + assert_repo_status(tmp_path) + + assert_result_count( + ds.containers_list(**common_kwargs), 1, + # we still get a report in platform conventions + path=str(ds.pathobj / '.datalad' / 'environments' / + 'mycontainer' / 'image'), + name='mycontainer') + + res = ds.containers_run(['ls'], **common_kwargs) + assert_result_count( + res, 1, + action='run', + status='ok', + ) diff --git a/datalad_container/utils.py b/datalad_container/utils.py index 0e272f2a..260db2ce 100644 --- a/datalad_container/utils.py +++ b/datalad_container/utils.py @@ -1,5 +1,20 @@ +"""Collection of common utilities""" + +from __future__ import annotations + +# the pathlib equivalent is only available in PY3.12 +from os.path import lexists + +from pathlib import ( + PurePath, + PurePosixPath, + PureWindowsPath, +) + +from datalad.distribution.dataset import Dataset from datalad.support.external_versions import external_versions + def get_container_command(): for command in ["apptainer", "singularity"]: container_system_version = external_versions[f"cmd:{command}"] @@ -9,3 +24,128 @@ def get_container_command(): raise RuntimeError("Did not find apptainer or singularity") +def get_container_configuration( + ds: Dataset, + name: str | None = None, +) -> dict: + """Report all container-related configuration in a dataset + + Such configuration is identified by the item name pattern:: + + datalad.containers.. + + Parameters + ---------- + ds: Dataset + Dataset instance to report configuration on. + name: str, optional + If given, the reported configuration will be limited to the container + with this exact name. In this case, only a single ``dict`` is returned, + not nested dictionaries. + + Returns + ------- + dict + Keys are the names of configured containers and values are dictionaries + with their respective configuration items (with the + ``datalad.containers..`` prefix removed from their + keys). + If `name` is given, only a single ``dict`` with the configuration + items of the matching container is returned (i.e., there will be no + outer ``dict`` with container names as keys). + If not (matching) container configuration exists, and empty dictionary + is returned. + """ + var_prefix = 'datalad.containers.' + + containers = {} + # all info is in the dataset config! + for var, value in ds.config.items(): + if not var.startswith(var_prefix): + # not an interesting variable + continue + var_comps = var.split('.') + # container name is the 3rd after 'datalad'.'container'. + cname = var_comps[2] + if name and name != cname: + # we are looking for a specific container's configuration + # and this is not it + continue + # reconstruct config item name, anything after + # datalad.containers.. + ccfgname = '.'.join(var_comps[3:]) + if not ccfgname: + continue + + if ccfgname == 'image': + # run image path normalization to get a relative path + # in platform conventions, regardless of the input. + # for now we report a str, because the rest of the code + # is not using pathlib + value = str(_normalize_image_path(value, ds)) + + cinfo = containers.get(cname, {}) + cinfo[ccfgname] = value + + containers[cname] = cinfo + + return containers if name is None else containers.get(name, {}) + + +def _normalize_image_path(path: str, ds: Dataset) -> PurePath: + """Helper to standardize container image path handling + + Previously, container configuration would contain platform-paths + for container image location (e.g., windows paths when added on + windows, POSIX paths elsewhere). This made cross-platform reuse + impossible out-of-the box, but it also means that such dataset + are out there in unknown numbers. + + This helper inspects an image path READ FROM CONFIG(!) and ensures + that it matches platform conventions (because all other arguments) + also come in platform conventions. This enables standardizing + the storage conventions to be POSIX-only (for the future). + + Parameters + ---------- + path: str + A str-path, as read from the configuration, matching its conventions + (relative path, pointing to a container image relative to the + dataset's root). + ds: Dataset + This dataset's base path is used as a reference for resolving + the relative image path to an absolute location on the file system. + + Returns + ------- + PurePath + Relative path in platform conventions + """ + # we only need to act differently, when an incoming path is + # windows. This is not possible to say with 100% confidence, + # because a POSIX path can also contain a backslash. We support + # a few standard cases where we CAN tell + pathobj = None + if '\\' not in path: + # no windows pathsep, no problem + pathobj = PurePosixPath(path) + elif path.startswith(r'.datalad\\environments\\'): + # this is the default location setup in windows conventions + pathobj = PureWindowsPath(path) + else: + # let's assume it is windows for a moment + if lexists(str(ds.pathobj / PureWindowsPath(path))): + # if there is something on the filesystem for this path, + # we can be reasonably sure that this is indeed a windows + # path. This won't catch images in uninstalled subdataset, + # but better than nothing + pathobj = PureWindowsPath(path) + else: + # if we get here, we have no idea, and no means to verify + # further hypotheses -- go with the POSIX assumption + # and hope for the best + pathobj = PurePosixPath(path) + + assert pathobj is not None + # we report in platform-conventions + return PurePath(pathobj) diff --git a/docs/source/index.rst b/docs/source/index.rst index a21ac4c8..e15bdd28 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -51,4 +51,6 @@ Python API containers_list containers_run + utils + .. |---| unicode:: U+02014 .. em dash