diff --git a/ci/.gitlab-ci.yml b/ci/.gitlab-ci.yml index e01f75ccad..dc1237783e 100644 --- a/ci/.gitlab-ci.yml +++ b/ci/.gitlab-ci.yml @@ -8,9 +8,9 @@ include: # - local: 'ci/cpu/gcc11_release_stdexec.yml' # - local: 'ci/cpu/gcc11_debug_stdexec.yml' # - local: 'ci/cpu/gcc12_release_cxx20.yml' - - local: 'ci/cpu/gcc13_codecov.yml' +# - local: 'ci/cpu/gcc13_codecov.yml' # - local: 'ci/cpu/gcc13_release.yml' - - local: 'ci/cuda/gcc11_release.yml' + - local: 'ci/cuda/gcc13_release.yml' # - local: 'ci/cuda/gcc11_release_scalapack.yml' # - local: 'ci/cuda/gcc11_codecov.yml' # - local: 'ci/cuda/gcc11_debug_scalapack.yml' diff --git a/ci/common-ci.yml b/ci/common-ci.yml index f98f0c713c..f7e69ac481 100644 --- a/ci/common-ci.yml +++ b/ci/common-ci.yml @@ -10,8 +10,7 @@ stages: ## BUILDS ## -.build_deps_common: - extends: .container-builder +.build_deps_common_base: stage: build_deps timeout: 6 hours before_script: @@ -61,7 +60,17 @@ stages: COMMON_SPACK_ENVIRONMENT: ci/docker/common.yaml USE_CODECOV: "false" -.build_common: +.build_deps_common: + extends: + - .container-builder-cscs-zen2 + - .build_deps_common_base + +.build_deps_common_gh200: + extends: + - .container-builder-cscs-gh200 + - .build_deps_common_base + +.build_common_base: extends: .container-builder stage: build timeout: 2 hours @@ -91,6 +100,16 @@ stages: paths: - pipeline.yml +.build_common: + extends: + - .container-builder-cscs-zen2 + - .build_common_base + +.build_common_gh200: + extends: + - .container-builder-cscs-gh200 + - .build_common_base + .build_for_daint-mc: variables: RUNNER: ".container-runner-daint" @@ -112,6 +131,14 @@ stages: THREADS_MAX_PER_TASK: 32 THREADS_PER_NODE: 256 +.build_for_alps_gh200: + variables: + RUNNER: ".container-runner-todi" + SLURM_CONSTRAINT: gpu + # 64 / 2 to avoid ranks on multiple sockets for RANK6 + THREADS_MAX_PER_TASK: 32 + THREADS_PER_NODE: 256 + .run_common: stage: test trigger: diff --git a/ci/cuda/gcc11_release.yml b/ci/cuda/gcc11_release.yml deleted file mode 100644 index 7d47f16d09..0000000000 --- a/ci/cuda/gcc11_release.yml +++ /dev/null @@ -1,31 +0,0 @@ -include: - - local: 'ci/common-ci.yml' - -cuda gcc11 release deps: - extends: .build_deps_common - variables: - BASE_IMAGE: docker.io/nvidia/cuda:11.7.1-devel-ubuntu22.04 - COMPILER: gcc@11 - CXXSTD: 17 - SPACK_ENVIRONMENT: ci/docker/release-cuda.yaml - USE_MKL: "ON" - BUILD_IMAGE: $CSCS_REGISTRY_PATH/cuda-gcc11-release/build - -cuda gcc11 release build: - extends: - - .build_common - - .build_for_daint-gpu - needs: - - cuda gcc11 release deps - variables: - DEPLOY_BASE_IMAGE: docker.io/ubuntu:22.04 - DEPLOY_IMAGE: $CSCS_REGISTRY_PATH/cuda-gcc11-release/deploy:$CI_COMMIT_SHA - -cuda gcc11 release test: - extends: .run_common - needs: - - cuda gcc11 release build - trigger: - include: - - artifact: pipeline.yml - job: cuda gcc11 release build diff --git a/ci/cuda/gcc13_release.yml b/ci/cuda/gcc13_release.yml new file mode 100644 index 0000000000..cbf1694323 --- /dev/null +++ b/ci/cuda/gcc13_release.yml @@ -0,0 +1,29 @@ +include: + - local: 'ci/common-ci.yml' + +cuda gcc13 release deps: + extends: .build_deps_common_gh200 + variables: + BASE_IMAGE: docker.io/nvidia/12.6.1-devel-ubuntu24.04 + COMPILER: gcc@13 + SPACK_ENVIRONMENT: ci/docker/release-cuda-gh200.yaml + BUILD_IMAGE: $CSCS_REGISTRY_PATH/cuda-gh200-gcc13-release/build + +cuda gcc13 release build: + extends: + - .build_common_gh200 + - .build_for_alps_gh200 + needs: + - cuda gcc13 release deps + variables: + DEPLOY_BASE_IMAGE: docker.io/ubuntu:24.04 + DEPLOY_IMAGE: $CSCS_REGISTRY_PATH/cuda-gh200-gcc13-release/deploy:$CI_COMMIT_SHA + +cuda gcc13 release test: + extends: .run_common + needs: + - cuda gcc13 release build + trigger: + include: + - artifact: pipeline.yml + job: cuda gcc13 release build diff --git a/ci/docker/common-gh200.yaml b/ci/docker/common-gh200.yaml new file mode 100644 index 0000000000..54f2f38783 --- /dev/null +++ b/ci/docker/common-gh200.yaml @@ -0,0 +1,51 @@ +# +# Distributed Linear Algebra with Future (DLAF) +# +# Copyright (c) 2018-2024, ETH Zurich +# All rights reserved. +# +# Please, refer to the LICENSE file in the root directory. +# SPDX-License-Identifier: BSD-3-Clause +# + +packages: + all: + target: [x86_64] + # Set intel MKL as default blas, lapack and scalapack provider. + # Can be overwritten in environments if needed. + blas: + require: 'nvpl-blas' + lapack: + require: 'nvpl-lapack' + scalapack: + require: 'netlib-scalapack' + mpi: + require: 'mpich' + blaspp: + variants: + - '~cuda' + - '~openmp' + - '~rocm' + nvpl-blas: + require: + - 'threads=openmp' + nvpl-lapack: + require: + - 'threads=openmp' + openblas: + variants: + - 'threads=openmp' + mpich: + # Fix version to have better control. + require: + - '@4.2.1' + variants: + - '~fortran' + - '~libxml2' + hwloc: + variants: + - '~libxml2' + git: + # Force git as non-buildable to allow deprecated versions in environments + # https://github.com/spack/spack/pull/30040 + buildable: false