diff --git a/.github/workflows/hvd-tests.yml b/.github/workflows/hvd-tests.yml index e15d0981aaa8..3f6ba7f24bd7 100644 --- a/.github/workflows/hvd-tests.yml +++ b/.github/workflows/hvd-tests.yml @@ -81,6 +81,7 @@ jobs: timeout_minutes: 25 shell: bash command: bash tests/run_cpu_tests.sh + new_command_on_retry: USE_LAST_FAILED=1 bash tests/run_cpu_tests.sh - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 diff --git a/.github/workflows/pytorch-version-tests.yml b/.github/workflows/pytorch-version-tests.yml index 7716cb63611f..62020d0aba12 100644 --- a/.github/workflows/pytorch-version-tests.yml +++ b/.github/workflows/pytorch-version-tests.yml @@ -98,6 +98,7 @@ jobs: timeout_minutes: 25 shell: bash command: bash tests/run_cpu_tests.sh "not test_time_profilers" + new_command_on_retry: USE_LAST_FAILED=1 bash tests/run_cpu_tests.sh "not test_time_profilers" # create-issue: # runs-on: ubuntu-latest diff --git a/.github/workflows/tpu-tests.yml b/.github/workflows/tpu-tests.yml index 9fa2da7974e5..a7a52949114c 100644 --- a/.github/workflows/tpu-tests.yml +++ b/.github/workflows/tpu-tests.yml @@ -97,6 +97,7 @@ jobs: command: | python -c "import torch_xla; print('torch xla version:', torch_xla.__version__)" bash tests/run_tpu_tests.sh + new_command_on_retry: USE_LAST_FAILED=1 bash tests/run_tpu_tests.sh env: LD_LIBRARY_PATH: ${{ env.LD_LIBRARY_PATH }}:${{ env.Python_ROOT_DIR }}/lib XRT_DEVICE_MAP: "CPU:0;/job:localservice/replica:0/task:0/device:XLA_CPU:0" diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 9e12fd84acc0..fd91bc18d4a7 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -126,6 +126,7 @@ jobs: timeout_minutes: 25 shell: bash command: SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh + new_command_on_retry: USE_LAST_FAILED=1 SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 diff --git a/tests/run_cpu_tests.sh b/tests/run_cpu_tests.sh index 2297be94219d..35a9743a712e 100644 --- a/tests/run_cpu_tests.sh +++ b/tests/run_cpu_tests.sh @@ -5,12 +5,21 @@ set -xeu if [ "${SKIP_DISTRIB_TESTS:-0}" -eq "1" ]; then skip_distrib_opt=(-m "not distributed and not tpu and not multinode_distributed") else - skip_distrib_opt=(-m "") + skip_distrib_opt=() fi MATCH_TESTS_EXPRESSION=${1:-""} -CUDA_VISIBLE_DEVICES="" pytest --tx 4*popen//python=python --cov ignite --cov-report term-missing --cov-report xml -vvv tests "${skip_distrib_opt[@]}" -k "$MATCH_TESTS_EXPRESSION" +# Will catch exit code 5 when tests are deselected from previous passing run +EXIT_CODE_ALL_TESTS_DESELECTED=5 + +CACHE_DIR=.cpu-not-distrib +echo [pytest] > pytest.ini ; echo "cache_dir=${CACHE_DIR}" >> pytest.ini +PYTEST_ARGS="--tx 4*popen//python=python --cov ignite --cov-report term-missing --cov-report xml -vvv tests ${skip_distrib_opt[@]} ${MATCH_TESTS_EXPRESSION}" +if [ "${USE_LAST_FAILED:-0}" -eq "1" ] && [ -d "${CACHE_DIR}" ]; then + PYTEST_ARGS="--last-failed --last-failed-no-failures none ${PYTEST_ARGS}" +fi +CUDA_VISIBLE_DEVICES="" eval "pytest ${PYTEST_ARGS}" || { exit_code=$?; if [ "$exit_code" -eq 5 ]; then echo "All tests deselected"; else exit $exit_code; fi;} # https://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_02 if [ "${SKIP_DISTRIB_TESTS:-0}" -eq "1" ]; then @@ -18,5 +27,13 @@ if [ "${SKIP_DISTRIB_TESTS:-0}" -eq "1" ]; then fi export WORLD_SIZE=2 -CUDA_VISIBLE_DEVICES="" pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml --dist=each --tx $WORLD_SIZE*popen//python=python tests -m distributed -vvv -k "$MATCH_TESTS_EXPRESSION" +CACHE_DIR=.cpu-distrib +echo [pytest] > pytest.ini ; echo "cache_dir=${CACHE_DIR}" >> pytest.ini +PYTEST_ARGS="--cov ignite --cov-append --cov-report term-missing --cov-report xml --dist=each --tx ${WORLD_SIZE}*popen//python=python tests -m distributed -vvv ${MATCH_TESTS_EXPRESSION}" +if [ "${USE_LAST_FAILED:-0}" -eq "1" ] && [ -d "${CACHE_DIR}" ]; then + PYTEST_ARGS="--last-failed --last-failed-no-failures none ${PYTEST_ARGS}" +fi +CUDA_VISIBLE_DEVICES="" eval "pytest ${PYTEST_ARGS}" unset WORLD_SIZE + +rm -f pytest.ini diff --git a/tests/run_gpu_tests.sh b/tests/run_gpu_tests.sh index 3146443a531d..84d659e9533a 100644 --- a/tests/run_gpu_tests.sh +++ b/tests/run_gpu_tests.sh @@ -14,22 +14,45 @@ else cuda_pattern="cuda and $MATCH_TESTS_EXPRESSION" fi +# Will catch exit code 5 when tests are deselected from previous passing run +EXIT_CODE_ALL_TESTS_DESELECTED=5 + set -xeu -pytest --cov ignite --cov-report term-missing --cov-report xml -vvv tests/ -k "$cuda_pattern" +CACHE_DIR=.gpu-cuda +echo [pytest] > pytest.ini ; echo "cache_dir=${CACHE_DIR}" >> pytest.ini +PYTEST_ARGS="--cov ignite --cov-report term-missing --cov-report xml -vvv tests/ -k '${cuda_pattern}'" +if [ "${USE_LAST_FAILED:-0}" -eq "1" ] && [ -d "${CACHE_DIR}" ]; then + PYTEST_ARGS="--last-failed --last-failed-no-failures none ${PYTEST_ARGS}" +fi +CUDA_VISIBLE_DEVICES="" eval "pytest ${PYTEST_ARGS}" || { exit_code=$?; if [ "$exit_code" -eq 5 ]; then echo "All tests deselected"; else exit $exit_code; fi;} + + # https://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_02 if [ "${SKIP_DISTRIB_TESTS:-0}" -eq "1" ]; then exit 0 fi -pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml -vvv tests/ -m distributed -k "$MATCH_TESTS_EXPRESSION" - +CACHE_DIR=.gpu-distrib +echo [pytest] > pytest.ini ; echo "cache_dir=${CACHE_DIR}" >> pytest.ini +PYTEST_ARGS="--cov ignite --cov-append --cov-report term-missing --cov-report xml -vvv tests/ -m distributed -k '${MATCH_TESTS_EXPRESSION}'" +if [ "${USE_LAST_FAILED:-0}" -eq "1" ] && [ -d "${CACHE_DIR}" ]; then + PYTEST_ARGS="--last-failed --last-failed-no-failures none ${PYTEST_ARGS}" +fi +CUDA_VISIBLE_DEVICES="" eval "pytest ${PYTEST_ARGS}" || { exit_code=$?; if [ "$exit_code" -eq 5 ]; then echo "All tests deselected"; else exit $exit_code; fi;} if [ ${ngpus} -gt 1 ]; then export WORLD_SIZE=${ngpus} - pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml --dist=each --tx ${WORLD_SIZE}*popen//python=python tests -m distributed -vvv -k "$MATCH_TESTS_EXPRESSION" + CACHE_DIR=.gpu-distrib-multi + echo [pytest] > pytest.ini ; echo "cache_dir=${CACHE_DIR}" >> pytest.ini + PYTEST_ARGS="--cov ignite --cov-append --cov-report term-missing --cov-report xml --dist=each --tx ${WORLD_SIZE}*popen//python=python tests -m distributed -vvv -k '${MATCH_TESTS_EXPRESSION}'" + if [ "${USE_LAST_FAILED:-0}" -eq "1" ] && [ -d "${CACHE_DIR}" ]; then + PYTEST_ARGS="--last-failed --last-failed-no-failures none ${PYTEST_ARGS}" + fi + CUDA_VISIBLE_DEVICES="" eval "pytest ${PYTEST_ARGS}" unset WORLD_SIZE fi +rm -f pytest.ini diff --git a/tests/run_tpu_tests.sh b/tests/run_tpu_tests.sh index 0877de858aed..c4aa3d86e62a 100644 --- a/tests/run_tpu_tests.sh +++ b/tests/run_tpu_tests.sh @@ -1,10 +1,26 @@ #!/bin/bash +# Will catch exit code 5 when tests are deselected from previous passing run +EXIT_CODE_ALL_TESTS_DESELECTED=5 set -xeu -pytest --cov ignite --cov-report term-missing --cov-report xml tests/ -vvv -m tpu +CACHE_DIR=.tpu +echo [pytest] > pytest.ini ; echo "cache_dir=${CACHE_DIR}" >> pytest.ini +PYTEST_ARGS="--cov ignite --cov-report term-missing --cov-report xml tests/ -vvv -m tpu" +if [ "${USE_LAST_FAILED:-0}" -eq "1" ] && [ -d "${CACHE_DIR}" ]; then + PYTEST_ARGS="--last-failed --last-failed-no-failures none ${PYTEST_ARGS}" +fi +CUDA_VISIBLE_DEVICES="" eval "pytest ${PYTEST_ARGS}" || { exit_code=$?; if [ "$exit_code" -eq 5 ]; then echo "All tests deselected"; else exit $exit_code; fi;} + if [ -z ${NUM_TPU_WORKERS+x} ]; then export NUM_TPU_WORKERS=1 - pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml tests/ -vvv -m tpu + CACHE_DIR=.tpu-multi + echo [pytest] > pytest.ini ; echo "cache_dir=${CACHE_DIR}" >> pytest.ini + PYTEST_ARGS="--cov ignite --cov-append --cov-report term-missing --cov-report xml tests/ -vvv -m tpu" + if [ "${USE_LAST_FAILED:-0}" -eq "1" ] && [ -d "${CACHE_DIR}" ]; then + PYTEST_ARGS="--last-failed --last-failed-no-failures none ${PYTEST_ARGS}" + fi + CUDA_VISIBLE_DEVICES="" eval "pytest ${PYTEST_ARGS}" || { exit_code=$?; if [ "$exit_code" -eq 5 ]; then echo "All tests deselected"; else exit $exit_code; fi;} fi +rm -f pytest.ini