Skip to content

Commit

Permalink
Add GHA build-hpc running on ECMWF GPU partition
Browse files Browse the repository at this point in the history
  • Loading branch information
wdeconinck committed Feb 14, 2025
1 parent af64837 commit 04a0a25
Showing 1 changed file with 176 additions and 0 deletions.
176 changes: 176 additions & 0 deletions .github/workflows/build-hpc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
name: build-hpc

# Controls when the action will run
on:

# Trigger the workflow on all pushes to main and develop, except on tag creation
push:
branches:
- main
- develop
- ci
tags-ignore:
- '**'

# Trigger the workflow on all pull requests
pull_request: ~

# Allow workflow to be dispatched on demand
workflow_dispatch: ~

# Trigger after public PR approved for CI
pull_request_target:
types: [labeled]

env:
ATLAS_TOOLS: ${{ github.workspace }}/tools
CTEST_PARALLEL_LEVEL: 1
CACHE_SUFFIX: v1 # Increase to force new cache to be created

jobs:
ci-hpc:
name: ci-hpc
if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }}

strategy:
fail-fast: false # false: try to complete all jobs

matrix:
name:
- ac-gpu nvhpc

include:
- name: ac-gpu nvhpc
site: ac-batch
troika_user_secret: HPC_CI_GPU_SSH_USER
sbatch_options: |
#SBATCH --time=00:20:00
#SBATCH --nodes=1
#SBATCH --ntasks=4
#SBATCH --cpus-per-task=32
#SBATCH --gpus-per-task=1
#SBATCH --mem=200G
#SBATCH --qos=dg
modules:
- cmake
- ninja
- prgenv/nvidia
- hpcx-openmpi/2.14.0-cuda
- fftw
- qhull
- eigen
cmake_options: -DMPI_SLOTS=4

runs-on: [self-hosted, linux, hpc]
env:
GH_TOKEN: ${{ github.token }}
steps:
- uses: ecmwf-actions/reusable-workflows/ci-hpc-generic@v2
with:
site: ${{ matrix.site }}
troika_user: ${{ secrets[matrix.troika_user_secret] }}
sbatch_options: ${{ matrix.sbatch_options }}
output_dir: ${{ matrix.output_dir || '' }}
workdir: ${{ matrix.workdir || '' }}
template_data: |
cmake_options:
- -DENABLE_MPI=ON
- -DENABLE_ACC=ON
- -DENABLE_CUDA=ON
- -DSKIP_TEST_atlas_test_field_foreach=TRUE
- ${{ matrix.cmake_options || '' }}
ctest_options: ${{ matrix.ctest_options || '' }}
dependencies:
ecmwf/ecbuild:
version: develop
ecmwf/eckit:
version: develop
cmake_options:
- -DENABLE_MPI=ON
- -DENABLE_CUDA=OFF
- -DENABLE_TESTS=OFF
- -DENABLE_ECKIT_SQL=OFF
- -DENABLE_ECKIT_CMD=OFF
- -DENABLE_AIO=OFF
- -DENABLE_WARNINGS=OFF
- ${{ matrix.cmake_options || '' }}
ecmwf/fckit:
version: develop
cmake_options:
- -DENABLE_TESTS=OFF
- ${{ matrix.cmake_options || '' }}
ecmwf-ifs/fiat:
version: develop
cmake_options:
- -DENABLE_MPI=ON
- -DENABLE_TESTS=OFF
- ${{ matrix.cmake_options || '' }}
ecmwf-ifs/ectrans:
version: develop
cmake_options:
- -DENABLE_MPI=ON
- -DENABLE_ACC=ON
- -DENABLE_GPU=ON
- -DENABLE_TESTS=OFF
- ${{ matrix.cmake_options || '' }}
template: |
set +x
{% for module in "${{ join(matrix.modules, ',') }}".split(',') %}
module load {{module}}
{% endfor %}
echo "+ module list"
module list
set -x
BASEDIR=$PWD
export CMAKE_TEST_LAUNCHER="srun;-n;1"
export CMAKE_PREFIX_PATH=$BASEDIR/install:$CMAKE_PREFIX_PATH
{% for repo_name, options in dependencies.items() %}
name=$(basename {{repo_name}})
mkdir -p $name
pushd $name
git init
git remote add origin ${{ github.server_url }}/{{repo_name}}
git fetch origin {{options['version']}}
git reset --hard FETCH_HEAD
cmake -G Ninja -S . -B build \
{{ options['cmake_options']|join(' ') }}
start=`date +%s`
cmake --build build
end=`date +%s`
runtime=$((end-start))
echo "Build $name took $runtime seconds"
cmake --install build --prefix $BASEDIR/install/$name
export PATH=$BASEDIR/install/$name/bin:$PATH
popd
{% endfor %}
REPO=${{ github.event.pull_request.head.repo.full_name || github.repository }}
SHA=${{ github.event.pull_request.head.sha || github.sha }}
mkdir -p $REPO
pushd $REPO
git init
git remote add origin ${{ github.server_url }}/$REPO
git fetch origin $SHA
git reset --hard FETCH_HEAD
popd
cmake -G Ninja -S $REPO -B build \
{{ cmake_options|join(' ') }}
start=`date +%s`
cmake --build build
end=`date +%s`
runtime=$((end-start))
echo "Build $name took $runtime seconds"
export ATLAS_FINALISES_MPI=1
ctest --test-dir build --output-on-failure {{ ctest_options }}
cmake --install build --prefix $BASEDIR/install/$REPO
export PATH=$BASEDIR/install/$REPO/bin:$PATH
atlas --info
{% for repo_name in dependencies.keys() %}
name=$(basename {{repo_name}})
rm -r $name
{% endfor %}
rm -r $REPO
rm -r build
rm -r $BASEDIR/install

0 comments on commit 04a0a25

Please sign in to comment.