Skip to content

Commit

Permalink
gpu to gh200
Browse files Browse the repository at this point in the history
  • Loading branch information
rasolca committed Sep 23, 2024
1 parent 6eb17c8 commit 1cb6261
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 36 deletions.
4 changes: 2 additions & 2 deletions ci/.gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ include:
# - local: 'ci/cpu/gcc11_release_stdexec.yml'
# - local: 'ci/cpu/gcc11_debug_stdexec.yml'
# - local: 'ci/cpu/gcc12_release_cxx20.yml'
- local: 'ci/cpu/gcc13_codecov.yml'
# - local: 'ci/cpu/gcc13_codecov.yml'
# - local: 'ci/cpu/gcc13_release.yml'
- local: 'ci/cuda/gcc11_release.yml'
- local: 'ci/cuda/gcc13_release.yml'
# - local: 'ci/cuda/gcc11_release_scalapack.yml'
# - local: 'ci/cuda/gcc11_codecov.yml'
# - local: 'ci/cuda/gcc11_debug_scalapack.yml'
Expand Down
33 changes: 30 additions & 3 deletions ci/common-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ stages:
## BUILDS
##

.build_deps_common:
extends: .container-builder
.build_deps_common_base:
stage: build_deps
timeout: 6 hours
before_script:
Expand Down Expand Up @@ -61,7 +60,17 @@ stages:
COMMON_SPACK_ENVIRONMENT: ci/docker/common.yaml
USE_CODECOV: "false"

.build_common:
.build_deps_common:
extends:
- .container-builder-cscs-zen2
- .build_deps_common_base

.build_deps_common_gh200:
extends:
- .container-builder-cscs-gh200
- .build_deps_common_base

.build_common_base:
extends: .container-builder
stage: build
timeout: 2 hours
Expand Down Expand Up @@ -91,6 +100,16 @@ stages:
paths:
- pipeline.yml

.build_common:
extends:
- .container-builder-cscs-zen2
- .build_common_base

.build_common_gh200:
extends:
- .container-builder-cscs-gh200
- .build_common_base

.build_for_daint-mc:
variables:
RUNNER: ".container-runner-daint"
Expand All @@ -112,6 +131,14 @@ stages:
THREADS_MAX_PER_TASK: 32
THREADS_PER_NODE: 256

.build_for_alps_gh200:
variables:
RUNNER: ".container-runner-todi"
SLURM_CONSTRAINT: gpu
# 64 / 2 to avoid ranks on multiple sockets for RANK6
THREADS_MAX_PER_TASK: 32
THREADS_PER_NODE: 256

.run_common:
stage: test
trigger:
Expand Down
31 changes: 0 additions & 31 deletions ci/cuda/gcc11_release.yml

This file was deleted.

29 changes: 29 additions & 0 deletions ci/cuda/gcc13_release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
include:
- local: 'ci/common-ci.yml'

cuda gcc13 release deps:
extends: .build_deps_common_gh200
variables:
BASE_IMAGE: docker.io/nvidia/12.6.1-devel-ubuntu24.04
COMPILER: gcc@13
SPACK_ENVIRONMENT: ci/docker/release-cuda-gh200.yaml
BUILD_IMAGE: $CSCS_REGISTRY_PATH/cuda-gh200-gcc13-release/build

cuda gcc13 release build:
extends:
- .build_common_gh200
- .build_for_alps_gh200
needs:
- cuda gcc13 release deps
variables:
DEPLOY_BASE_IMAGE: docker.io/ubuntu:24.04
DEPLOY_IMAGE: $CSCS_REGISTRY_PATH/cuda-gh200-gcc13-release/deploy:$CI_COMMIT_SHA

cuda gcc13 release test:
extends: .run_common
needs:
- cuda gcc13 release build
trigger:
include:
- artifact: pipeline.yml
job: cuda gcc13 release build
51 changes: 51 additions & 0 deletions ci/docker/common-gh200.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#
# Distributed Linear Algebra with Future (DLAF)
#
# Copyright (c) 2018-2024, ETH Zurich
# All rights reserved.
#
# Please, refer to the LICENSE file in the root directory.
# SPDX-License-Identifier: BSD-3-Clause
#

packages:
all:
target: [x86_64]
# Set intel MKL as default blas, lapack and scalapack provider.
# Can be overwritten in environments if needed.
blas:
require: 'nvpl-blas'
lapack:
require: 'nvpl-lapack'
scalapack:
require: 'netlib-scalapack'
mpi:
require: 'mpich'
blaspp:
variants:
- '~cuda'
- '~openmp'
- '~rocm'
nvpl-blas:
require:
- 'threads=openmp'
nvpl-lapack:
require:
- 'threads=openmp'
openblas:
variants:
- 'threads=openmp'
mpich:
# Fix version to have better control.
require:
- '@4.2.1'
variants:
- '~fortran'
- '~libxml2'
hwloc:
variants:
- '~libxml2'
git:
# Force git as non-buildable to allow deprecated versions in environments
# https://github.com/spack/spack/pull/30040
buildable: false

0 comments on commit 1cb6261

Please sign in to comment.