From 8dd0689d786897c9274361087dff9a9663fea86c Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 17 Sep 2024 13:26:26 +0200 Subject: [PATCH 1/3] Make sure ReFrame command only runs for 24h minus 10 minutes. That way, if it's on a daily schedule, jobs will never overlap, as this caused us to exceed vCPU limits on AWS --- CI/run_reframe.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CI/run_reframe.sh b/CI/run_reframe.sh index 497c2a9b..3341146e 100755 --- a/CI/run_reframe.sh +++ b/CI/run_reframe.sh @@ -68,6 +68,11 @@ fi if [ -z "${RFM_PREFIX}" ]; then export RFM_PREFIX="${HOME}/reframe_CI_runs" fi +if [ -z "${REFRAME_TIMEOUT}" ]; then + # 10 minutes short of 1 day, since typically the test suite will be run daily. + # This will prevent multiple ReFrame runs from piling up and exceeding the quota on our Magic Castle clusters + export REFRAME_TIMEOUT=1430m +fi # Create virtualenv for ReFrame using system python python3 -m venv "${TEMPDIR}"/reframe_venv @@ -118,7 +123,7 @@ reframe ${REFRAME_ARGS} --list # Run echo "Run tests:" -reframe ${REFRAME_ARGS} --run +timeout -v --preserve-status -s SIGTERM ${REFRAME_TIMEOUT} reframe ${REFRAME_ARGS} --run # Cleanup rm -rf "${TEMPDIR}" From 57ed3de6aadad31dac66804c5a6c0ba922a878da Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 17 Sep 2024 13:30:34 +0200 Subject: [PATCH 2/3] Document new variable --- CI/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CI/README.md b/CI/README.md index 161c4a48..4ee16a50 100644 --- a/CI/README.md +++ b/CI/README.md @@ -36,6 +36,7 @@ It should define: - `RFM_CHECK_SEARCH_PATH` (optional): the search path where ReFrame should search for tests to run in this CI pipeline. Default: `${TEMPDIR}/test-suite/eessi/testsuite/tests/`. - `RFM_CHECK_SEARCH_RECURSIVE` (optional): whether ReFrame should search `RFM_CHECK_SEARCH_PATH` recursively. Default: `1`. - `RFM_PREFIX` (optional): the prefix in which ReFrame stores all the files. Default: `${HOME}/reframe_CI_runs`. +- `REFRAME_TIMEOUT` (optional): DURATION as passed to the `timeout` command in Unix. If the `reframe` commands runs for longer than this, it will be killed by SIGTERM. The ReFrame runtime will then cancel all scheduled (and running) jobs. Can be used to make sure jobs don't pile up, e.g. if the test suite runs daily, but it takes longer than one day to process all jobs. ## Creating the `crontab` entry and specifying `EESSI_CI_SYSTEM_NAME` This line depends on how often you want to run the tests, and where the `run_reframe_wrapper.sh` is located exactly. We also define the EESSI_CI_SYSTEM_NAME in this entry, as cronjobs don't normally read your `.bashrc` (and thus we need a different way of specifying this environment variable). From 9ef6a625419c991eaba3d8e115d0dafd3f372c10 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Tue, 17 Sep 2024 15:12:04 +0200 Subject: [PATCH 3/3] Make git clone commands more verbose, so that it is clearer what is being used in terms of version --- CI/run_reframe.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/CI/run_reframe.sh b/CI/run_reframe.sh index 3341146e..b50bd8c8 100755 --- a/CI/run_reframe.sh +++ b/CI/run_reframe.sh @@ -81,11 +81,15 @@ python3 -m pip install --upgrade pip python3 -m pip install reframe-hpc=="${REFRAME_VERSION}" # Clone reframe repo to have the hpctestlib: -git clone "${REFRAME_URL}" --branch "${REFRAME_BRANCH}" "${TEMPDIR}"/reframe +REFRAME_CLONE_ARGS="${REFRAME_URL} --branch ${REFRAME_BRANCH} ${TEMPDIR}/reframe" +echo "Cloning ReFrame repo: git clone ${REFRAME_CLONE_ARGS}" +git clone ${REFRAME_CLONE_ARGS} export PYTHONPATH="${PYTHONPATH}":"${TEMPDIR}"/reframe # Clone test suite repo -git clone "${EESSI_TESTSUITE_URL}" --branch "${EESSI_TESTSUITE_BRANCH}" "${TEMPDIR}"/test-suite +EESSI_CLONE_ARGS="${EESSI_TESTSUITE_URL} --branch ${EESSI_TESTSUITE_BRANCH} ${TEMPDIR}/test-suite" +echo "Cloning EESSI repo: git clone ${EESSI_CLONE_ARGS}" +git clone ${EESSI_CLONE_ARGS} export PYTHONPATH="${PYTHONPATH}":"${TEMPDIR}"/test-suite/ # Start the EESSI environment @@ -105,7 +109,7 @@ echo "" echo "TEMPDIR: ${TEMPDIR}" echo "PYTHONPATH: ${PYTHONPATH}" echo "EESSI test suite URL: ${EESSI_TESTSUITE_URL}" -echo "EESSI test suite version: ${EESSI_TESTSUITE_VERSION}" +echo "EESSI test suite version: ${EESSI_TESTSUITE_BRANCH}" echo "HPCtestlib from ReFrame URL: ${REFRAME_URL}" echo "HPCtestlib from ReFrame branch: ${REFRAME_BRANCH}" echo "ReFrame executable: $(which reframe)"