Compile on AWS #278
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2024 OGL authors | |
# | |
# SPDX-License-Identifier: GPL-3.0-or-later | |
name: Compile on AWS | |
run-name: Compile on AWS | |
on: | |
pull_request: | |
types: synchronize | |
jobs: | |
start-runner: | |
if: contains(github.event.pull_request.labels.*.name, 'full_ci') | |
name: Start self-hosted EC2 runner | |
runs-on: ubuntu-latest | |
permissions: | |
id-token: write | |
contents: read | |
outputs: | |
label: ${{ steps.start-ec2-runner.outputs.label }} | |
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::308634587211:role/Github-OIDC-Role-29bocUD8VBZr | |
aws-region: us-east-1 | |
- name: Start EC2 runner | |
id: start-ec2-runner | |
uses: HendriceH/[email protected] # Starts 60GB Root + 30 GB Share volume | |
with: | |
mode: start | |
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | |
ec2-image-id: ami-03af087024bfdbbee # Deep learning AMI | |
ec2-instance-type: g4dn.xlarge | |
iam-role-name: Role4Github | |
subnet-id: subnet-b5d2adbb | |
security-group-id: sg-559f8967 | |
aws-resource-tags: > # optional, requires additional permissions | |
[ | |
{"Key": "ucfd-project", "Value": "BMBF_2022_EXASIM"}, | |
{"Key": "ucfd-client", "Value": "UCFD-RD"}, | |
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"} | |
] | |
pre-runner-script: | | |
#!/bin/bash | |
sudo yum update -y && \ | |
sudo yum install docker git libicu ninja-build libasan10 -y | |
sudo amazon-linux-extras install epel -y | |
sudo yum install Lmod -y | |
sudo systemctl enable docker | |
sudo curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash | |
sudo mkfs -t xfs /dev/sda1 | |
sudo mkdir -p /share | |
sudo mount /dev/sda1 /share | |
aws s3 cp s3://ucfd-share/pcluster/3.x/alinux2/x86_64/postinstall_github . | |
chmod +x postinstall_github | |
sudo ./postinstall_github > ~/install.log | |
ln -s /share/software/cmake/3.27.8/share/cmake-3.27 /usr/share/cmake-3.27 | |
mkdir -p /share/ec2-user | |
export USER=ec2-user | |
do-the-job: | |
name: Do the job on the runner | |
needs: start-runner # required to start the main job when the runner is ready | |
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner | |
steps: | |
- name: Checkout OGL | |
uses: actions/checkout@v2 | |
with: | |
ref: dev | |
- name: Test env | |
shell: bash -o pipefail -i {0} | |
run: | | |
export HOME=/share/ec2-user | |
module load gnu/10 | |
module load libfabric-aws | |
module load openmpi | |
module load OpenFOAM | |
module load cmake | |
env | |
- name: Install OBR | |
shell: bash -o pipefail -i {0} | |
run: | | |
module load conda | |
conda activate | |
python3 -m pip install --upgrade pip | |
pip install setuptools --upgrade | |
git clone https://github.com/exasim-project/OBR | |
cd OBR | |
pip install . | |
obr --version | |
- name: Config | |
if: always() | |
shell: bash -o pipefail -i {0} | |
run: | | |
export HOME=/share/ec2-user | |
module load gnu/10 | |
module load libfabric-aws | |
module load openmpi | |
module load OpenFOAM | |
module load cmake | |
cmake --list-preset | |
cmake -DOGL_CUDA_ARCHITECTURES=52 --preset ninja-cuda-release | |
- name: Build and install | |
shell: bash -o pipefail -i {0} | |
run: | | |
export HOME=/share/ec2-user | |
module load gnu/10 | |
module load libfabric-aws | |
module load openmpi | |
module load OpenFOAM | |
module load cmake | |
cmake --build --preset ninja-cuda-release --target install | |
- name: Run integration tests | |
shell: bash -o pipefail -i {0} | |
run: | | |
export HOME=/share/ec2-user | |
module load conda | |
conda activate | |
module load gnu/10 | |
module load libfabric-aws | |
module load openmpi | |
module load OpenFOAM | |
export GINKGO_EXECUTOR=cuda | |
obr init -g --config test/integration.yaml | |
obr run -o runParallelSolver | |
obr status | |
stop-runner: | |
name: Stop self-hosted EC2 runner | |
needs: | |
- start-runner # required to get output from the start-runner job | |
- do-the-job # required to wait when the main job is done | |
runs-on: ubuntu-latest | |
permissions: | |
id-token: write | |
contents: read | |
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::308634587211:role/Github-OIDC-Role-29bocUD8VBZr | |
aws-region: us-east-1 | |
- name: Stop EC2 runner | |
uses: HendriceH/[email protected] | |
with: | |
mode: stop | |
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | |
label: ${{ needs.start-runner.outputs.label }} | |
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |