From 885a85d6bda22cf80863fc0ced3a889d3b73211f Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Fri, 17 Nov 2023 15:13:04 -0600 Subject: [PATCH 1/4] Check CMake return and cat log files if exists. --- .../github-actions/ci/run_step_ornl-sulfur-2.sh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/test_automation/github-actions/ci/run_step_ornl-sulfur-2.sh b/tests/test_automation/github-actions/ci/run_step_ornl-sulfur-2.sh index afca4f6788..a1aa0d11d2 100755 --- a/tests/test_automation/github-actions/ci/run_step_ornl-sulfur-2.sh +++ b/tests/test_automation/github-actions/ci/run_step_ornl-sulfur-2.sh @@ -77,6 +77,7 @@ case "$1" in cmake -GNinja \ -DCMAKE_C_COMPILER=/usr/lib64/openmpi/bin/mpicc \ -DCMAKE_CXX_COMPILER=/usr/lib64/openmpi/bin/mpicxx \ + -DCMAKE_EXE_LINKER_FLAGS="-L $LLVM_DIR/lib" \ -DMPIEXEC_EXECUTABLE=/usr/lib64/openmpi/bin/mpirun \ -DBOOST_ROOT=$BOOST_DIR \ -DBUILD_AFQMC=ON \ @@ -87,7 +88,7 @@ case "$1" in -DQMC_MIXED_PRECISION=$IS_MIXED_PRECISION \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DQMC_DATA=$QMC_DATA_DIR \ - ${GITHUB_WORKSPACE} + ${GITHUB_WORKSPACE} || cmake_exit_code=$? ;; *"V100-GCC11-MPI-CUDA"*) @@ -112,9 +113,18 @@ case "$1" in -DQMC_MIXED_PRECISION=$IS_MIXED_PRECISION \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DQMC_DATA=$QMC_DATA_DIR \ - ${GITHUB_WORKSPACE} + ${GITHUB_WORKSPACE} || cmake_exit_code=$? ;; esac + if [ $cmake_exit_code -ne 0 ]; then + ls CMakeFiles + cmake --version + echo "int main() {}" > test.cpp + /usr/lib64/openmpi/bin/mpicxx -fopenmp --offload-arch=sm_70 -Wno-linker-warnings -Wno-unknown-cuda-version -fopenmp-assume-no-thread-state -fopenmp-assume-no-nested-parallelism test.cpp -v + if [ -f CMakeFiles/CMakeError.log ]; then cat CMakeFiles/CMakeError.log; fi + if [ -f CMakeFiles/CMakeOutput.log ]; then cat CMakeFiles/CMakeOutput.log; fi + fi + exit $cmake_exit_code ;; build) From 78b187e9eaff0cae88eb9873bc92af614d5e28e6 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Fri, 17 Nov 2023 15:53:33 -0600 Subject: [PATCH 2/4] Fix library search path at linking. Prevent OS pollution. --- .../github-actions/ci/run_step_ornl-sulfur-2.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_automation/github-actions/ci/run_step_ornl-sulfur-2.sh b/tests/test_automation/github-actions/ci/run_step_ornl-sulfur-2.sh index a1aa0d11d2..8509f1d9a7 100755 --- a/tests/test_automation/github-actions/ci/run_step_ornl-sulfur-2.sh +++ b/tests/test_automation/github-actions/ci/run_step_ornl-sulfur-2.sh @@ -117,10 +117,7 @@ case "$1" in ;; esac if [ $cmake_exit_code -ne 0 ]; then - ls CMakeFiles - cmake --version - echo "int main() {}" > test.cpp - /usr/lib64/openmpi/bin/mpicxx -fopenmp --offload-arch=sm_70 -Wno-linker-warnings -Wno-unknown-cuda-version -fopenmp-assume-no-thread-state -fopenmp-assume-no-nested-parallelism test.cpp -v + # for debugging purpose if [ -f CMakeFiles/CMakeError.log ]; then cat CMakeFiles/CMakeError.log; fi if [ -f CMakeFiles/CMakeOutput.log ]; then cat CMakeFiles/CMakeOutput.log; fi fi From 2fa152d54238c4feae22f72a722e1f7d365d851d Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Fri, 17 Nov 2023 15:55:04 -0600 Subject: [PATCH 3/4] Disable fast-fail. Restore previous setting. --- .github/workflows/ci-github-actions-self-hosted.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-github-actions-self-hosted.yaml b/.github/workflows/ci-github-actions-self-hosted.yaml index 6a6bad04b4..e71db54b3b 100644 --- a/.github/workflows/ci-github-actions-self-hosted.yaml +++ b/.github/workflows/ci-github-actions-self-hosted.yaml @@ -17,7 +17,7 @@ jobs: GH_JOBNAME: ${{matrix.jobname}} GH_OS: Linux strategy: - fail-fast: true + fail-fast: false matrix: jobname: [ GCC11-NoMPI-MKL-Real-Mixed, # mixed precision @@ -112,7 +112,7 @@ jobs: GH_JOBNAME: ${{matrix.jobname}} GH_OS: Linux strategy: - fail-fast: true + fail-fast: false matrix: jobname: [ V100-Clang16-MPI-CUDA-AFQMC-Offload-Real-Mixed, # auxiliary field, offload From 3af77b6613ad2d1c6669e9087813710f73224d66 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Fri, 17 Nov 2023 16:26:55 -0600 Subject: [PATCH 4/4] Revert CUDA to 11.2 --- .../github-actions/ci/run_step_ornl-sulfur-2.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_automation/github-actions/ci/run_step_ornl-sulfur-2.sh b/tests/test_automation/github-actions/ci/run_step_ornl-sulfur-2.sh index 8509f1d9a7..ca7f2563a7 100755 --- a/tests/test_automation/github-actions/ci/run_step_ornl-sulfur-2.sh +++ b/tests/test_automation/github-actions/ci/run_step_ornl-sulfur-2.sh @@ -60,8 +60,8 @@ case "$1" in LLVM_DIR=$HOME/opt/spack/linux-rhel9-cascadelake/gcc-9.4.0/llvm-16.0.2-ltjkfjdu6p2cfcyw3zalz4x5sz5do3cr - echo "Set PATHs to cuda-12.1" - export PATH=/usr/local/cuda-12.1/bin:$PATH + echo "Set PATHs to cuda-11.2" + export PATH=$HOME/opt/cuda/11.2/bin:$PATH export OMPI_CC=$LLVM_DIR/bin/clang export OMPI_CXX=$LLVM_DIR/bin/clang++ @@ -71,7 +71,7 @@ case "$1" in echo "OMPI_CC=$OMPI_CC" >> $GITHUB_ENV echo "OMPI_CXX=$OMPI_CXX" >> $GITHUB_ENV - # Confirm that cuda 12.1 gets picked up by the compiler + # Confirm that cuda 11.2 gets picked up by the compiler $OMPI_CXX -v cmake -GNinja \