From 084a8eb4b3100bc467b42c4a7f02b0f341ec6336 Mon Sep 17 00:00:00 2001 From: sam Date: Tue, 24 Sep 2024 17:50:13 +0200 Subject: [PATCH 1/9] move fetch lock code into separate functions --- src/build_tools/hooks_hydra.py | 88 ++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 41 deletions(-) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index 51f5bd9..e219cf9 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -65,6 +65,51 @@ LOCAL_ARCH_FULL = f'{LOCAL_ARCH}{LOCAL_ARCH_SUFFIX}' +def acquire_fetch_lock(self): + " acquire fetch lock " + source_path = source_paths()[0] + full_source_path = os.path.join(source_path, letter_dir_for(self.name), self.name) + lock_name = full_source_path.replace('/', '_') + '.lock' + + lock_dir = os.path.join(source_path, '.locks') + mkdir(lock_dir, parents=True) + + wait_time = 0 + wait_interval = 60 + wait_limit = 3600 + + lock = Lock(os.path.join(lock_dir, lock_name), lifetime=wait_limit, default_timeout=1) + self.fetch_hook_lock = lock + + while True: + try: + # try to acquire the lock + lock.lock() + self.log.info("[pre-fetch hook] Lock acquired: %s", lock.lockfile) + break + + except TimeOutError as err: + if wait_time >= wait_limit: + error_msg = "[pre-fetch hook] Maximum wait time for lock %s to be released reached: %s sec >= %s sec" + raise EasyBuildError(error_msg, lock.lockfile, wait_time, wait_limit) from err + + msg = "[pre-fetch hook] Lock %s held by another build, waiting %d seconds..." + self.log.debug(msg, lock.lockfile, wait_interval) + time.sleep(wait_interval) + wait_time += wait_interval + + +def release_fetch_lock(self): + " release fetch lock " + lock = self.fetch_hook_lock + try: + lock.unlock() + self.log.info("[post-fetch hook] Lock released: %s", lock.lockfile) + + except NotLockedError: + self.log.warning("[post-fetch hook] Could not release lock %s: was already released", lock.lockfile) + + def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument """Alter the parameters of easyconfigs""" @@ -150,51 +195,12 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument def pre_fetch_hook(self): """Hook at pre-fetch level""" - - # acquire fetch lock - source_path = source_paths()[0] - full_source_path = os.path.join(source_path, letter_dir_for(self.name), self.name) - lock_name = full_source_path.replace('/', '_') + '.lock' - - lock_dir = os.path.join(source_path, '.locks') - mkdir(lock_dir, parents=True) - - wait_time = 0 - wait_interval = 60 - wait_limit = 3600 - - lock = Lock(os.path.join(lock_dir, lock_name), lifetime=wait_limit, default_timeout=1) - self.fetch_hook_lock = lock - - while True: - try: - # try to acquire the lock - lock.lock() - self.log.info("[pre-fetch hook] Lock acquired: %s", lock.lockfile) - break - - except TimeOutError as err: - if wait_time >= wait_limit: - error_msg = "[pre-fetch hook] Maximum wait time for lock %s to be released reached: %s sec >= %s sec" - raise EasyBuildError(error_msg, lock.lockfile, wait_time, wait_limit) from err - - msg = "[pre-fetch hook] Lock %s held by another build, waiting %d seconds..." - self.log.debug(msg, lock.lockfile, wait_interval) - time.sleep(wait_interval) - wait_time += wait_interval + acquire_fetch_lock(self) def post_fetch_hook(self): """Hook at post-fetch level""" - - # release fetch lock - lock = self.fetch_hook_lock - try: - lock.unlock() - self.log.info("[post-fetch hook] Lock released: %s", lock.lockfile) - - except NotLockedError: - self.log.warning("[post-fetch hook] Could not release lock %s: was already released", lock.lockfile) + release_fetch_lock(self) def pre_configure_hook(self, *args, **kwargs): # pylint: disable=unused-argument From e957f5d836a230128bed99ef6a29faec13923d3e Mon Sep 17 00:00:00 2001 From: sam Date: Mon, 30 Sep 2024 17:07:04 +0200 Subject: [PATCH 2/9] update module install paths in the hooks --- src/build_tools/hooks_hydra.py | 102 ++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index e219cf9..62e5614 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -18,6 +18,7 @@ """ import os +from pathlib import Path import time from flufl.lock import Lock, TimeOutError, NotLockedError @@ -25,10 +26,10 @@ from vsc.utils import fancylogger from easybuild.framework.easyconfig.constants import EASYCONFIG_CONSTANTS -from easybuild.framework.easyconfig.easyconfig import letter_dir_for +from easybuild.framework.easyconfig.easyconfig import letter_dir_for, get_toolchain_hierarchy from easybuild.tools import LooseVersion from easybuild.tools.build_log import EasyBuildError -from easybuild.tools.config import source_paths +from easybuild.tools.config import source_paths, ConfigurationVariables from easybuild.tools.filetools import mkdir from easybuild.tools.hooks import SANITYCHECK_STEP @@ -64,6 +65,102 @@ LOCAL_ARCH_SUFFIX = os.getenv('VSC_ARCH_SUFFIX') LOCAL_ARCH_FULL = f'{LOCAL_ARCH}{LOCAL_ARCH_SUFFIX}' +VALID_TCGENS = ['2022a', '2023a'] +VALID_MODULES_SUBDIRS = VALID_TCGENS + ['system'] +VALID_TCS = ['foss', 'intel', 'gomkl', 'gimkl', 'gimpi'] + + +def get_tc_versions(): + " build dict of (sub)toolchain-versions per valid generation " + tc_versions = {} + for toolcgen in VALID_TCGENS: + tc_versions[toolcgen] = [] + for toolc in VALID_TCS: + try: + tc_versions[toolcgen].extend(get_toolchain_hierarchy({'name': toolc, 'version': toolcgen})) + except EasyBuildError: + # skip if no easyconfig found for toolchain-version + pass + + return tc_versions + + +def calc_tc_gen(name, version, tcname, tcversion, easyblock): + """ + calculate the toolchain generation + return False if not valid + """ + name_version = {'name': name, 'version': version} + toolchain = {'name': tcname, 'version': tcversion} + software = [name, version, tcname, tcversion, easyblock] + + tc_versions = get_tc_versions() + + # (software with) valid (sub)toolchain and version + for toolcgen in VALID_TCGENS: + if toolchain in tc_versions[toolcgen] or name_version in tc_versions[toolcgen]: + log_msg = f"Determined toolchain generation {toolcgen} for {software}" + return toolcgen, log_msg + + # (software with) valid (sub)toolchain but invalid version + for toolcgen in VALID_TCGENS: + tcnames = [x['name'] for x in tc_versions[toolcgen]] + if toolchain['name'] in tcnames or name in tcnames: + log_msg = (f"Determined toolchain generation {toolcgen} for {software} is not valid." + f" Choose one of {VALID_TCGENS}.") + return False, log_msg + + # invalid toolchains + # all toolchains have 'system' toolchain, so we need to handle the invalid toolchains separately + # all toolchains have 'Toolchain' easyblock, so checking the easyblock is sufficient + if easyblock == 'Toolchain': + log_msg = f"Invalid toolchain {name} for {software}" + return False, log_msg + + # software with 'system' toolchain: return 'system' + if tcname == 'system': + log_msg = f"Determined toolchain {tcname} for {software}" + return tcname, log_msg + + log_msg = f"Invalid toolchain {tcname} for {software}" + return False, log_msg + + +def update_module_install_paths(self): + " update module install paths unless subdir-modules uption is specified " + + # default subdir_modules config var = 'modules' + # in hydra we change it to 'modules/' + subdir_modules = Path(ConfigurationVariables()['subdir_modules']).parts + + if len(subdir_modules) not in [1, 2] or subdir_modules[0] != 'modules': + log_msg = '[pre-fetch hook] Format of option subdir-modules %s is not valid. Must be modules/.' + raise EasyBuildError(log_msg, os.path.join(*subdir_modules)) + + if len(subdir_modules) == 2: + subdir = subdir_modules[1] + if subdir not in VALID_MODULES_SUBDIRS: + log_msg = "[pre-fetch hook] Specified modules subdir %s is not valid. Choose one of %s." + raise EasyBuildError(log_msg, subdir, VALID_MODULES_SUBDIRS) + log_msg = "[pre-fetch hook] Option subdir-modules was set to %s, not updating module install paths." + self.log.info(log_msg, subdir_modules) + return + + subdir, log_msg = calc_tc_gen( + self.name, self.version, self.toolchain.name, self.toolchain.version, self.cfg.easyblock) + if not subdir: + raise EasyBuildError("[pre-fetch hook] " + log_msg) + self.log.info("[pre-fetch hook] " + log_msg) + + # insert subdir in module install path strings (normally between 'modules' and 'all') + installdir_mod = Path(self.installdir_mod).parts + self.installdir_mod = Path().joinpath(*installdir_mod[:-1], subdir, installdir_mod[-1]).as_posix() + self.log.info('[pre-fetch hook] Updated installdir_mod to %s.', self.installdir_mod) + + mod_filepath = Path(self.mod_filepath).parts + self.mod_filepath = Path().joinpath(*mod_filepath[:-3], subdir, *mod_filepath[-3:]).as_posix() + self.log.info('[pre-fetch hook] Updated mod_filepath to %s.', self.mod_filepath) + def acquire_fetch_lock(self): " acquire fetch lock " @@ -195,6 +292,7 @@ def parse_hook(ec, *args, **kwargs): # pylint: disable=unused-argument def pre_fetch_hook(self): """Hook at pre-fetch level""" + update_module_install_paths(self) acquire_fetch_lock(self) From 09de92c0e18d6c64bcd5eb8e50876b9a465d01ab Mon Sep 17 00:00:00 2001 From: sam Date: Mon, 30 Sep 2024 17:08:12 +0200 Subject: [PATCH 3/9] remove toolchain option from submit_build script --- bin/submit_build.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/bin/submit_build.py b/bin/submit_build.py index 3cc5b2b..4bb614a 100755 --- a/bin/submit_build.py +++ b/bin/submit_build.py @@ -32,7 +32,7 @@ from build_tools.clusters import ARCHS, PARTITIONS from build_tools.filetools import APPS_BRUSSEL, get_module from build_tools.lmodtools import submit_lmod_cache_job -from build_tools.softinstall import mk_job_name, set_toolchain_generation, submit_build_job +from build_tools.softinstall import mk_job_name, submit_build_job # repositories with easyconfigs VSCSOFTSTACK_ROOT = os.path.expanduser("~/vsc-software-stack") @@ -79,7 +79,6 @@ def main(): 'sourcepath': '/apps/brussel/sources:/apps/gent/source', 'installpath': os.path.join(APPS_BRUSSEL, os.getenv('VSC_OS_LOCAL'), LOCAL_ARCH), 'buildpath': os.path.join(job['tmp'], 'eb-submit-build-fetch'), - 'subdir-modules': 'modules', 'hooks': hooks_hydra.__file__, } @@ -87,7 +86,6 @@ def main(): options = { "arch": ("CPU architecture of the host system and the build", 'strlist', 'add', None, 'a'), "partition": ("Slurm partition for the build", 'strlist', 'add', None, 'P'), - "toolchain": ("Toolchain generation of the installation", None, "store", None, 't'), "extra-flags": ("Extra flags to pass to EasyBuild", None, "store", None, 'e'), "extra-sub-flags": ("Extra flags to pass to Slurm", None, "store", '', 'q'), "extra-mod-footer": ("Path to extra footer for module file", None, "store", None, 'f'), @@ -173,14 +171,6 @@ def main(): if opts.options.clang: job['langcode'] = 'C' - # Set target toolchain generation - job['tc_gen'] = set_toolchain_generation(easyconfig, user_toolchain=opts.options.toolchain) - if not job['tc_gen']: - logger.error("Unable to determine the toolchain generation, specify it with --toolchain") - sys.exit(1) - - ebconf['subdir-modules'] = os.path.join('modules', job['tc_gen']) - # Set robot paths if opts.options.pwd_robot_append: ebconf['robot-paths'] += ':' + os.getcwd() From 0736625c91e38302ea8f573bdbd6a4ebb989dfe4 Mon Sep 17 00:00:00 2001 From: sam Date: Mon, 30 Sep 2024 17:10:13 +0200 Subject: [PATCH 4/9] update tests and crosscompilation --- src/build_tools/jobtemplate.py | 12 ++------ tests/conftest.py | 8 ++++++ tests/input/build_job_01.sh | 12 ++------ tests/input/build_job_02.sh | 12 ++------ tests/test_hooks_hydra.py | 51 ++++++++++++++++++++++++++++++++++ tests/test_softinstall.py | 27 +----------------- 6 files changed, 69 insertions(+), 53 deletions(-) create mode 100644 tests/test_hooks_hydra.py diff --git a/src/build_tools/jobtemplate.py b/src/build_tools/jobtemplate.py index c8be2ee..efb1903 100644 --- a/src/build_tools/jobtemplate.py +++ b/src/build_tools/jobtemplate.py @@ -50,15 +50,9 @@ mkdir -p ${eb_buildpath} # update MODULEPATH for cross-compilations -if [ "${target_arch}" != "$$VSC_ARCH_LOCAL" ]; then - moddir="${eb_installpath}/modules" - # use modules from target arch and toolchain generation - CC_MODULEPATH=$${moddir}/${tc_gen}/all - # also add last 3 years of modules in case out-of-toolchain deps are needed - for modpath in $$(ls -1dr $${moddir}/*/all | head -n 6); do - CC_MODULEPATH="$$CC_MODULEPATH:$$modpath" - done - export MODULEPATH=$$CC_MODULEPATH +local_arch="$$VSC_ARCH_LOCAL$$VSC_ARCH_SUFFIX" +if [ "${target_arch}" != "$$local_arch" ]; then + export MODULEPATH=$${MODULEPATH//$$local_arch/${target_arch}} fi ${pre_eb_options} eb ${eb_options} diff --git a/tests/conftest.py b/tests/conftest.py index 93ef921..129aecf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,6 +19,8 @@ import os import pytest +from easybuild.tools.options import set_up_configuration + def pytest_addoption(parser): parser.addoption( @@ -51,3 +53,9 @@ def realpath_apps_brussel(path): @pytest.fixture def mock_realpath_apps_brussel(monkeypatch): monkeypatch.setattr('os.path.realpath', realpath_apps_brussel) + + +@pytest.fixture +def set_up_config(): + set_up_configuration(silent=True) + yield diff --git a/tests/input/build_job_01.sh b/tests/input/build_job_01.sh index ccac2f5..9d82596 100644 --- a/tests/input/build_job_01.sh +++ b/tests/input/build_job_01.sh @@ -28,15 +28,9 @@ mkdir -p $TMPDIR mkdir -p /tmp/eb-test-build # update MODULEPATH for cross-compilations -if [ "skylake" != "$VSC_ARCH_LOCAL" ]; then - moddir="/apps/brussel/${VSC_OS_LOCAL}/skylake/modules" - # use modules from target arch and toolchain generation - CC_MODULEPATH=${moddir}/2019a/all - # also add last 3 years of modules in case out-of-toolchain deps are needed - for modpath in $(ls -1dr ${moddir}/*/all | head -n 6); do - CC_MODULEPATH="$CC_MODULEPATH:$modpath" - done - export MODULEPATH=$CC_MODULEPATH +local_arch="$VSC_ARCH_LOCAL$VSC_ARCH_SUFFIX" +if [ "skylake" != "$local_arch" ]; then + export MODULEPATH=${MODULEPATH//$local_arch/skylake} fi eb diff --git a/tests/input/build_job_02.sh b/tests/input/build_job_02.sh index 6424a67..4e7cb9f 100644 --- a/tests/input/build_job_02.sh +++ b/tests/input/build_job_02.sh @@ -28,15 +28,9 @@ mkdir -p $TMPDIR mkdir -p /tmp/eb-test-build # update MODULEPATH for cross-compilations -if [ "zen2" != "$VSC_ARCH_LOCAL" ]; then - moddir="/apps/brussel/${VSC_OS_LOCAL}/zen2-ib/modules" - # use modules from target arch and toolchain generation - CC_MODULEPATH=${moddir}/2020b/all - # also add last 3 years of modules in case out-of-toolchain deps are needed - for modpath in $(ls -1dr ${moddir}/*/all | head -n 6); do - CC_MODULEPATH="$CC_MODULEPATH:$modpath" - done - export MODULEPATH=$CC_MODULEPATH +local_arch="$VSC_ARCH_LOCAL$VSC_ARCH_SUFFIX" +if [ "zen2-ib" != "$local_arch" ]; then + export MODULEPATH=${MODULEPATH//$local_arch/zen2-ib} fi bwrap eb --cuda-compute-capabilities=8.0 diff --git a/tests/test_hooks_hydra.py b/tests/test_hooks_hydra.py new file mode 100644 index 0000000..3c80a92 --- /dev/null +++ b/tests/test_hooks_hydra.py @@ -0,0 +1,51 @@ +# +# Copyright 2017-2024 Vrije Universiteit Brussel +# All rights reserved. +# +# This file is part of build_tools (https://github.com/vub-hpc/build_tools), +# originally created by the HPC team of Vrije Universiteit Brussel (https://hpc.vub.be), +# with support of Vrije Universiteit Brussel (https://www.vub.be), +# the Flemish Supercomputer Centre (VSC) (https://www.vscentrum.be), +# the Flemish Research Foundation (FWO) (http://www.fwo.be/en) +# and the Department of Economy, Science and Innovation (EWI) (http://www.ewi-vlaanderen.be/en). +# +## +""" +Unit tests for build_tools.softinstall + +@author: Samuel Moors (Vrije Universiteit Brussel) +""" + +import pytest + +from build_tools import hooks_hydra + + +@pytest.mark.parametrize( + 'toolchain', + [ + # (name, version, tcname, tcversion, easyblock, expected_generation) + # (software with) toolchains with custom versioning + ('GCCcore', '11.3.0', 'system', 'system', 'Toolchain', '2022a'), + ('GCCcore', '10.2.0', 'system', 'system', 'Toolchain', False), + ('UCX-CUDA', '1.14.1', 'GCCcore', '12.3.0', 'EB_UCX_Plugins', '2023a'), + ('bwa-mem2', '2.2.1', 'intel-compilers', '2023.1.0', 'MakeCp', '2023a'), + ('SAMtools', '1.18', 'GCC', '12.3.0', 'EB_SAMtools', '2023a'), + # (software with) toolchains with generation as their version + ('foss', '2023a', 'system', 'system', 'Toolchain', '2023a'), + ('foss', '2021a', 'system', 'system', 'Toolchain', False), + ('PyTorch', '2.1.2', 'foss', '2023a', 'EB_PyTorch', '2023a'), + ('R', '4.3.2', 'gfbf', '2023a', 'EB_R', '2023a'), + # software with system toolchain + ('zlib', '1.2.11', 'system', 'system', 'ConfigureMake', 'system'), + ('MATLAB', '2023a', 'system', 'system', 'EB_MATLAB', 'system'), + # (software with) unsupported toolchains + ('torchvision', '0.9.1', 'fosscuda', '2022a', 'EB_torchvision', False), + ('fosscuda', '2023a', 'system', 'system', 'Toolchain', False), + ], +) +def test_calc_tc_gen(toolchain, set_up_config): + name, version, tcname, tcversion, easyblock, expected_generation = toolchain + generation, _ = hooks_hydra.calc_tc_gen(name, version, tcname, tcversion, easyblock) + + assert generation == expected_generation diff --git a/tests/test_softinstall.py b/tests/test_softinstall.py index 58ec5af..d761541 100644 --- a/tests/test_softinstall.py +++ b/tests/test_softinstall.py @@ -22,29 +22,6 @@ from build_tools import softinstall -@pytest.mark.parametrize( - 'toolchain', - [ - ('GCCcore-10.2.0.eb', False, '2020b'), - ('GCCcore-10.2.0.eb', '2020b', '2020b'), - ('GCCcore-10.2.0.eb', '1920c', False), - ('UCX-1.8.0-GCCcore-9.3.0-CUDA-11.0.2.eb', False, '2020a'), - ('R-4.0.3-foss-2020b.eb', False, '2020b'), - ('R-4.0.3-foss-2020b.eb', '2019a', '2019a'), - ('TensorFlow-2.3.1-foss-2020a-Python-3.8.2.eb', False, '2020a'), - ('TensorFlow-2.3.1-fosscuda-2020a-Python-3.8.2.eb', False, '2020a'), - ('SAMtools-1.9-GCC-8.2.0-2.31.1.eb', False, '2019a'), - ('SAMtools-1.9-iccifort-2019.1.144-GCC-8.2.0-2.31.1.eb', False, '2019a'), - ], -) -def test_set_toolchain_generation(toolchain): - easyconfig, user_toolchain, expected_generation = toolchain - - generation = softinstall.set_toolchain_generation(easyconfig, user_toolchain=user_toolchain) - - assert generation == expected_generation - - @pytest.mark.parametrize( 'test_name', [ @@ -88,7 +65,6 @@ def test_mk_job_name(test_name): 'gpus': 0, 'target_arch': 'skylake', 'partition': 'skylake_mpi', - 'tc_gen': '2019a', 'langcode': 'C', 'eb_options': '', 'pre_eb_options': '', @@ -104,9 +80,8 @@ def test_mk_job_name(test_name): 'nodes': 1, 'tasks': 4, 'gpus': 1, - 'target_arch': 'zen2', + 'target_arch': 'zen2-ib', 'partition': 'ampere_gpu', - 'tc_gen': '2020b', 'langcode': 'C', 'eb_options': ' --cuda-compute-capabilities=8.0', 'pre_eb_options': 'bwrap', From d20620734e192bf14b54993a3afa38296e54520b Mon Sep 17 00:00:00 2001 From: sam Date: Mon, 30 Sep 2024 17:12:32 +0200 Subject: [PATCH 5/9] version bump --- src/build_tools/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/build_tools/package.py b/src/build_tools/package.py index 63cd21e..457890a 100644 --- a/src/build_tools/package.py +++ b/src/build_tools/package.py @@ -16,7 +16,7 @@ @author: Alex Domingo (Vrije Universiteit Brussel) """ -VERSION = '3.2.3' +VERSION = '3.3.0' AUTHOR = { 'wp': 'Ward Poelmans', From 4931aa1aea798fc70ef66dd043f387f38653425d Mon Sep 17 00:00:00 2001 From: sam Date: Fri, 4 Oct 2024 15:49:31 +0200 Subject: [PATCH 6/9] update bwrap for the tcgen changes --- bin/submit_build.py | 19 ++++----- src/build_tools/bwraptools.py | 73 ++++++++++++++++------------------ src/build_tools/hooks_hydra.py | 61 ++++++++++++++++++++-------- 3 files changed, 90 insertions(+), 63 deletions(-) diff --git a/bin/submit_build.py b/bin/submit_build.py index 4bb614a..05afcf5 100755 --- a/bin/submit_build.py +++ b/bin/submit_build.py @@ -28,7 +28,7 @@ from vsc.utils.run import RunNoShell from build_tools import hooks_hydra -from build_tools.bwraptools import bwrap_prefix, rsync_copy +from build_tools.bwraptools import bwrap_prefix, rsync_copy, SUBDIR_MODULES_BWRAP from build_tools.clusters import ARCHS, PARTITIONS from build_tools.filetools import APPS_BRUSSEL, get_module from build_tools.lmodtools import submit_lmod_cache_job @@ -99,7 +99,7 @@ def main(): "tmp-scratch": ("Use $VSC_SCRATCH as temporary disk instead of /dev/shm", None, "store_true", False, 'M'), "dry-run": ("Do not fetch/install, set debug log level", None, "store_true", False, 'D'), "pre-fetch": ("Pre-fetch sources before submitting build jobs", None, "store_true", False, 'n'), - "bwrap": ("Reinstall via new namespace with bwrap", None, "store_true", False, 'b'), + "bwrap": ("Reinstall in 2 steps via new namespace with bwrap (no robot)", None, "store_true", False, 'b'), "skip-lmod-cache": ("Do not run Lmod cache after installation", None, "store_true", False, 's'), "lmod-cache-only": ("Run Lmod cache and exit, no software installation", None, "store_true", False, 'o'), } @@ -213,7 +213,7 @@ def main(): bwrap = opts.options.bwrap if bwrap: - logger.info('Reinstalling in 2 steps via new namespace under %s/bwrap', APPS_BRUSSEL) + logger.info('Calculating module name and version for bwrap') ec, module = get_module(easyconfig) if ec != 0: logger.error("Failed to get module name/version for %s", easyconfig) @@ -239,16 +239,18 @@ def main(): job['tmp'] = os.path.join('$VSC_SCRATCH', job_options['target_arch']) ebconf['buildpath'] = os.path.join(job['tmp'], 'eb-submit-build') - # generate EB command line options - eb_options = ['--robot', '--logtostdout', '--debug', '--module-extensions', '--zip-logs=bzip2'] + # common EB command line options + eb_options = ['--logtostdout', '--debug', '--module-extensions', '--zip-logs=bzip2', '--module-depends-on'] + + if bwrap: + eb_options.extend([' --rebuild', f'--subdir-modules={SUBDIR_MODULES_BWRAP}']) + else: + eb_options.append('--robot') # not supported with bwrap # cross-compilation if job_options['target_arch'] != host_arch: eb_options.extend(['--optarch', ARCHS[job_options['target_arch']]['opt']]) - # use depends_on in Lmod - eb_options.append("--module-depends-on") - # extra settings from user if opts.options.extra_flags: eb_options.append(opts.options.extra_flags) @@ -290,7 +292,6 @@ def main(): # install in new namespace if requested if bwrap: - job_options['eb_options'] += ' --rebuild' job_options['pre_eb_options'] = bwrap_prefix(job_options, module[0], install_dir) rsync_cmds = rsync_copy(job_options, module[0], module[1], install_dir) job_options['postinstall'] = '\n'.join([rsync_cmds, job_options['postinstall']]) diff --git a/src/build_tools/bwraptools.py b/src/build_tools/bwraptools.py index c4ac857..8ca1340 100644 --- a/src/build_tools/bwraptools.py +++ b/src/build_tools/bwraptools.py @@ -26,42 +26,45 @@ logger = fancylogger.getLogger() +BWRAP_PATH = os.path.join(APPS_BRUSSEL, 'bwrap', '$VSC_OS_LOCAL') +SUBDIR_MODULES_BWRAP = '.modules_bwrap' +MOD_FILEPATH_FILENAME = '{modversion}_fp.txt' -def bwrap_prefix(job_options, modname, install_dir): + +def bwrap_prefix(job_options, modname, arch): """ Create the bwrap prefix command string :param job_options: dict with options to pass to job template - :param modname: module name - :param install_dir: architecture-specific installation subdirectory + :param modname: module name (without the version) + :param arch: architecture-specific installation subdirectory """ - - bwrap_path = os.path.join(APPS_BRUSSEL, 'bwrap', '$VSC_OS_LOCAL', install_dir) real_installpath = os.path.realpath(job_options['eb_installpath']) - mod_subdir = os.path.join('modules', job_options['tc_gen'], 'all', modname) + mod_subdir = os.path.join(SUBDIR_MODULES_BWRAP, 'all', modname) + # cannot use 'software//' here, otherwise EB cannot "remove" the old installation soft_subdir = os.path.join('software', modname) - soft_source = os.path.join(bwrap_path, soft_subdir) + soft_source = os.path.join(BWRAP_PATH, arch, soft_subdir) soft_dest = os.path.join(real_installpath, soft_subdir) - mod_source = os.path.join(bwrap_path, mod_subdir) - mod_dest = os.path.join(real_installpath, mod_subdir) + mod_source = os.path.join(real_installpath, mod_subdir) if not os.path.isdir(soft_dest): logger.error("Bind destination does not exist: %s", soft_dest) + # create a temporary dir for the module, but don’t bind it with bwrap: + # the final location is not known yet, and module files don’t need a new namespace anyway return ' '.join([ - 'mkdir -p %s &&' % soft_source, - 'mkdir -p %s &&' % mod_source, + f'mkdir -p "{soft_source}" &&', + f'mkdir -p "{mod_source}" &&', 'bwrap', '--bind / /', - '--bind %s %s' % (soft_source, soft_dest), - '--bind %s %s' % (mod_source, mod_dest), + f'--bind "{soft_source}" "{soft_dest}"', '--dev /dev', '--bind /dev/log /dev/log', ]) -def rsync_copy(job_options, modname, modversion, install_dir): +def rsync_copy(job_options, modname, modversion, arch): """ Create command string to copy the bwrap installation dir and module file to the real installation dir If the source and destination dirs are in the same filesystem, @@ -69,46 +72,40 @@ def rsync_copy(job_options, modname, modversion, install_dir): :param job_options: dict with options to pass to job template :param modname: module name :param modversion: module version - :param install_dir: architecture-specific installation subdirectory + :param arch: architecture-specific installation subdirectory """ - source_path = os.path.join(APPS_BRUSSEL, 'bwrap', '$VSC_OS_LOCAL', install_dir) dest_path = job_options['eb_installpath'] rel_soft_path = os.path.join('software', modname, modversion, '') # trailing slash is required! - source_soft_path = os.path.join(source_path, rel_soft_path) + source_soft_path = os.path.join(BWRAP_PATH, arch, rel_soft_path) dest_soft_path = os.path.join(dest_path, rel_soft_path) - rel_mod_path = os.path.join('modules', job_options['tc_gen'], 'all', modname) - rel_mod_file = os.path.join(rel_mod_path, '%s.lua' % modversion) - - source_mod_path = os.path.join(source_path, rel_mod_path) - source_mod_file = os.path.join(source_path, rel_mod_file) - dest_mod_file = os.path.join(dest_path, rel_mod_file) + source_mod_path = os.path.join(dest_path, SUBDIR_MODULES_BWRAP, 'all', modname) + source_mod_file = os.path.join(source_mod_path, f'{modversion}.lua') + mod_filepath_file = os.path.join(source_mod_path, MOD_FILEPATH_FILENAME.format(modversion=modversion)) rsync_software = ' '.join([ 'rsync -a', - '--link-dest=%s' % source_soft_path, + f'--link-dest="{source_soft_path}"', source_soft_path, dest_soft_path, ]) rsync_module = ' '.join([ 'rsync -a', - '--link-dest=%s' % source_mod_path, + f'--link-dest="{source_mod_path}"', source_mod_file, - dest_mod_file, ]) return '\n'.join([ - 'echo "bwrap install dir: %s"' % source_soft_path, - 'echo "destination install dir: %s"' % dest_soft_path, - 'echo "bwrap module file: %s"' % source_mod_file, - 'echo "destination module file: %s"' % dest_mod_file, - 'if [ ! -d %s ]; then echo "ERROR: bwrap install dir does not exist"; exit 1; fi' % source_soft_path, - 'if [ ! "$(ls -A %s)" ]; then echo "ERROR: bwrap install dir empty"; exit 1; fi' % source_soft_path, - 'if [ ! -s %s ]; then echo "ERROR: bwrap module file does not exist or empty"; exit 1; fi' % source_mod_file, - rsync_software, - 'if [ $? -ne 0 ]; then echo "ERROR: failed to copy bwrap install dir"; exit 1; fi', - rsync_module, - 'if [ $? -ne 0 ]; then echo "ERROR: failed to copy bwrap module file"; exit 1; fi', - 'rm -rf %s %s' % (source_soft_path, source_mod_file), + f'dest_mod_file=$(<"{mod_filepath_file}")', + f'echo "source install dir: {source_soft_path}"', + f'echo "destination install dir: {dest_soft_path}"', + f'echo "source module file: {source_mod_file}"', + 'echo "destination module file: $dest_mod_file"', + f'test -d "{source_soft_path}" || {{ echo "ERROR: source install dir does not exist"; exit 1; }}', + f'test -n "$(ls -A {source_soft_path})" || {{ echo "ERROR: source install dir is empty"; exit 1; }}', + f'test -s "{source_mod_file}" || {{ echo "ERROR: source module file does not exist or is empty"; exit 1; }}', + f'{rsync_software} || {{ echo "ERROR: failed to copy source install dir"; exit 1; }}', + f'{rsync_module} "$dest_mod_file" || {{ echo "ERROR: failed to copy source module file"; exit 1; }}', + f'rm -rf "{source_soft_path}" "{source_mod_file}" "{mod_filepath_file}"', ]) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index 62e5614..684080c 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -36,6 +36,7 @@ from build_tools.clusters import ARCHS from build_tools.ib_modules import IB_MODULE_SOFTWARE, IB_MODULE_SUFFIX, IB_OPT_MARK from build_tools.lmodtools import submit_lmod_cache_job +from build_tools.bwraptools import MOD_FILEPATH_FILENAME, SUBDIR_MODULES_BWRAP # permission groups for licensed software SOFTWARE_GROUPS = { @@ -71,7 +72,7 @@ def get_tc_versions(): - " build dict of (sub)toolchain-versions per valid generation " + " build dict of valid (sub)toolchain-version combinations per valid generation " tc_versions = {} for toolcgen in VALID_TCGENS: tc_versions[toolcgen] = [] @@ -96,7 +97,7 @@ def calc_tc_gen(name, version, tcname, tcversion, easyblock): tc_versions = get_tc_versions() - # (software with) valid (sub)toolchain and version + # (software with) valid (sub)toolchain-version combination for toolcgen in VALID_TCGENS: if toolchain in tc_versions[toolcgen] or name_version in tc_versions[toolcgen]: log_msg = f"Determined toolchain generation {toolcgen} for {software}" @@ -127,39 +128,67 @@ def calc_tc_gen(name, version, tcname, tcversion, easyblock): def update_module_install_paths(self): - " update module install paths unless subdir-modules uption is specified " + """ + update module install paths unless subdir-modules uption is specified " + default subdir_modules config var = 'modules' + here we set it to 'modules/', where subdir can be any of VALID_MODULES_SUBDIRS + exception: with bwrap it is set to SUBDIR_MODULES_BWRAP + """ + configvars = ConfigurationVariables() + subdir_modules = list(Path(configvars['subdir_modules']).parts) + + do_bwrap = subdir_modules == [SUBDIR_MODULES_BWRAP] - # default subdir_modules config var = 'modules' - # in hydra we change it to 'modules/' - subdir_modules = Path(ConfigurationVariables()['subdir_modules']).parts + log_format_msg = '[pre-fetch hook] Format of option subdir-modules %s is not valid. Must be modules/' + if len(subdir_modules) not in [1, 2]: + raise EasyBuildError(log_format_msg, os.path.join(*subdir_modules)) - if len(subdir_modules) not in [1, 2] or subdir_modules[0] != 'modules': - log_msg = '[pre-fetch hook] Format of option subdir-modules %s is not valid. Must be modules/.' - raise EasyBuildError(log_msg, os.path.join(*subdir_modules)) + if not (subdir_modules[0] == 'modules' or subdir_modules != ['modules'] or do_bwrap): + raise EasyBuildError(log_format_msg, os.path.join(*subdir_modules)) if len(subdir_modules) == 2: subdir = subdir_modules[1] + if subdir not in VALID_MODULES_SUBDIRS: - log_msg = "[pre-fetch hook] Specified modules subdir %s is not valid. Choose one of %s." + log_msg = "[pre-fetch hook] Specified modules subdir %s is not valid. Choose one of %s" raise EasyBuildError(log_msg, subdir, VALID_MODULES_SUBDIRS) - log_msg = "[pre-fetch hook] Option subdir-modules was set to %s, not updating module install paths." + + log_msg = "[pre-fetch hook] Option subdir-modules was set to %s, not updating module install paths" self.log.info(log_msg, subdir_modules) return subdir, log_msg = calc_tc_gen( self.name, self.version, self.toolchain.name, self.toolchain.version, self.cfg.easyblock) + if not subdir: raise EasyBuildError("[pre-fetch hook] " + log_msg) + self.log.info("[pre-fetch hook] " + log_msg) - # insert subdir in module install path strings (normally between 'modules' and 'all') + mod_filepath = Path(self.mod_filepath).parts + + if do_bwrap: + self.log.info("[pre-fetch hook] Installing in new namespace with bwrap") + real_mod_filepath = Path().joinpath(*mod_filepath[:-4], 'modules', subdir, *mod_filepath[-3:]).as_posix() + modversion = mod_filepath[-1].removesuffix('.lua') + mod_filepath_file = Path().joinpath( + *mod_filepath[:-1], MOD_FILEPATH_FILENAME.format(modversion=modversion)).as_posix() + + # create file containing the real module file path, in the same dir as the module file + # after installation, the module file is copied to the real path + with open(mod_filepath_file, 'w') as f: + f.write(real_mod_filepath) + self.log.info("Created file %s containing real module file path", mod_filepath_file) + return + + # insert subdir into self.installdir_mod and self.mod_filepath installdir_mod = Path(self.installdir_mod).parts self.installdir_mod = Path().joinpath(*installdir_mod[:-1], subdir, installdir_mod[-1]).as_posix() - self.log.info('[pre-fetch hook] Updated installdir_mod to %s.', self.installdir_mod) + self.log.info('[pre-fetch hook] Updated installdir_mod to %s', self.installdir_mod) - mod_filepath = Path(self.mod_filepath).parts - self.mod_filepath = Path().joinpath(*mod_filepath[:-3], subdir, *mod_filepath[-3:]).as_posix() - self.log.info('[pre-fetch hook] Updated mod_filepath to %s.', self.mod_filepath) + real_mod_filepath = Path().joinpath(*mod_filepath[:-3], subdir, *mod_filepath[-3:]).as_posix() + self.mod_filepath = real_mod_filepath + self.log.info('[pre-fetch hook] Updated mod_filepath to %s', self.mod_filepath) def acquire_fetch_lock(self): From 41076e176cb3ca9517171ee3d9c5de66d404f6b7 Mon Sep 17 00:00:00 2001 From: sam Date: Fri, 4 Oct 2024 15:49:43 +0200 Subject: [PATCH 7/9] update bwrap tests --- tests/test_bwraptools.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/tests/test_bwraptools.py b/tests/test_bwraptools.py index b203de4..6d83fec 100644 --- a/tests/test_bwraptools.py +++ b/tests/test_bwraptools.py @@ -20,31 +20,29 @@ job_options = { 'eb_installpath': '/apps/brussel/$VSC_OS_LOCAL/skylake', - 'tc_gen': '2022a', } def test_bwrap_prefix(mock_realpath_apps_brussel): prefix = bwraptools.bwrap_prefix(job_options, 'HPL', 'skylake') - ref_prefix = 'mkdir -p /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL && mkdir -p /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/modules/2022a/all/HPL && bwrap --bind / / --bind /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL /vscmnt/brussel_pixiu_apps/_apps_brussel/$VSC_OS_LOCAL/skylake/software/HPL --bind /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/modules/2022a/all/HPL /vscmnt/brussel_pixiu_apps/_apps_brussel/$VSC_OS_LOCAL/skylake/modules/2022a/all/HPL --dev /dev --bind /dev/log /dev/log' # noqa: E501 + ref_prefix = 'mkdir -p "/apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL" && mkdir -p "/vscmnt/brussel_pixiu_apps/_apps_brussel/$VSC_OS_LOCAL/skylake/.modules_bwrap/all/HPL" && bwrap --bind / / --bind "/apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL" "/vscmnt/brussel_pixiu_apps/_apps_brussel/$VSC_OS_LOCAL/skylake/software/HPL" --dev /dev --bind /dev/log /dev/log' # noqa: E501 assert prefix == ref_prefix def test_rsync_copy(): rsync_cmds = bwraptools.rsync_copy(job_options, 'HPL', '2.3-foss-2022a', 'skylake') - ref_rsync_cmds = """\ -echo "bwrap install dir: /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/" + ref_rsync_cmds = '''\ +dest_mod_file=$(<"/apps/brussel/$VSC_OS_LOCAL/skylake/.modules_bwrap/all/HPL/2.3-foss-2022a_fp.txt") +echo "source install dir: /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/" echo "destination install dir: /apps/brussel/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/" -echo "bwrap module file: /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/modules/2022a/all/HPL/2.3-foss-2022a.lua" -echo "destination module file: /apps/brussel/$VSC_OS_LOCAL/skylake/modules/2022a/all/HPL/2.3-foss-2022a.lua" -if [ ! -d /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/ ]; then echo "ERROR: bwrap install dir does not exist"; exit 1; fi -if [ ! "$(ls -A /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/)" ]; then echo "ERROR: bwrap install dir empty"; exit 1; fi -if [ ! -s /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/modules/2022a/all/HPL/2.3-foss-2022a.lua ]; then echo "ERROR: bwrap module file does not exist or empty"; exit 1; fi -rsync -a --link-dest=/apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/ /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/ /apps/brussel/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/ -if [ $? -ne 0 ]; then echo "ERROR: failed to copy bwrap install dir"; exit 1; fi -rsync -a --link-dest=/apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/modules/2022a/all/HPL /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/modules/2022a/all/HPL/2.3-foss-2022a.lua /apps/brussel/$VSC_OS_LOCAL/skylake/modules/2022a/all/HPL/2.3-foss-2022a.lua -if [ $? -ne 0 ]; then echo "ERROR: failed to copy bwrap module file"; exit 1; fi -rm -rf /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/ /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/modules/2022a/all/HPL/2.3-foss-2022a.lua""" # noqa: E501 +echo "source module file: /apps/brussel/$VSC_OS_LOCAL/skylake/.modules_bwrap/all/HPL/2.3-foss-2022a.lua" +echo "destination module file: $dest_mod_file" +test -d "/apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/" || { echo "ERROR: source install dir does not exist"; exit 1; } +test -n "$(ls -A /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/)" || { echo "ERROR: source install dir is empty"; exit 1; } +test -s "/apps/brussel/$VSC_OS_LOCAL/skylake/.modules_bwrap/all/HPL/2.3-foss-2022a.lua" || { echo "ERROR: source module file does not exist or is empty"; exit 1; } +rsync -a --link-dest="/apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/" /apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/ /apps/brussel/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/ || { echo "ERROR: failed to copy source install dir"; exit 1; } +rsync -a --link-dest="/apps/brussel/$VSC_OS_LOCAL/skylake/.modules_bwrap/all/HPL" /apps/brussel/$VSC_OS_LOCAL/skylake/.modules_bwrap/all/HPL/2.3-foss-2022a.lua "$dest_mod_file" || { echo "ERROR: failed to copy source module file"; exit 1; } +rm -rf "/apps/brussel/bwrap/$VSC_OS_LOCAL/skylake/software/HPL/2.3-foss-2022a/" "/apps/brussel/$VSC_OS_LOCAL/skylake/.modules_bwrap/all/HPL/2.3-foss-2022a.lua" "/apps/brussel/$VSC_OS_LOCAL/skylake/.modules_bwrap/all/HPL/2.3-foss-2022a_fp.txt"''' # noqa: E501 assert rsync_cmds == ref_rsync_cmds From e358ee7c38b3c0b79213df258385ef3b2687dcf5 Mon Sep 17 00:00:00 2001 From: sam Date: Fri, 4 Oct 2024 16:09:39 +0200 Subject: [PATCH 8/9] remove unnecessary loop --- src/build_tools/hooks_hydra.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index 684080c..5c00ba3 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -103,14 +103,6 @@ def calc_tc_gen(name, version, tcname, tcversion, easyblock): log_msg = f"Determined toolchain generation {toolcgen} for {software}" return toolcgen, log_msg - # (software with) valid (sub)toolchain but invalid version - for toolcgen in VALID_TCGENS: - tcnames = [x['name'] for x in tc_versions[toolcgen]] - if toolchain['name'] in tcnames or name in tcnames: - log_msg = (f"Determined toolchain generation {toolcgen} for {software} is not valid." - f" Choose one of {VALID_TCGENS}.") - return False, log_msg - # invalid toolchains # all toolchains have 'system' toolchain, so we need to handle the invalid toolchains separately # all toolchains have 'Toolchain' easyblock, so checking the easyblock is sufficient From 8d38e4c0ed47b6697dd533217abd674dc88d770f Mon Sep 17 00:00:00 2001 From: sam Date: Sat, 5 Oct 2024 08:29:41 +0200 Subject: [PATCH 9/9] update log msg to account for deleted loop --- src/build_tools/hooks_hydra.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/build_tools/hooks_hydra.py b/src/build_tools/hooks_hydra.py index 5c00ba3..b34d62e 100644 --- a/src/build_tools/hooks_hydra.py +++ b/src/build_tools/hooks_hydra.py @@ -115,7 +115,7 @@ def calc_tc_gen(name, version, tcname, tcversion, easyblock): log_msg = f"Determined toolchain {tcname} for {software}" return tcname, log_msg - log_msg = f"Invalid toolchain {tcname} for {software}" + log_msg = f"Invalid toolchain {tcname} and/or toolchain version {tcversion} for {software}" return False, log_msg