Skip to content

Run SOMISANA forecast models on MIMS #404

Run SOMISANA forecast models on MIMS

Run SOMISANA forecast models on MIMS #404

Workflow file for this run

name: Run SOMISANA forecast models on MIMS
on:
schedule:
# the workflow designed to every 6 hours. Optionally it can also be run every 12 or 24 hours
# initialise once a day
- cron: '0 0 * * *'
# initialise twice a day
# - cron: '0 0,12 * * *'
workflow_dispatch:
inputs:
run_date:
description: 'Date and time for T0 for the run in format YYYYMMDD_HH'
required: false
default: ''
type: string
#build_images:
# description: 'Run the image builds?'
# required: true
# default: 'true'
# type: boolean
jobs:
# note that the jobs aren't executed in the order they are written below
# they are executed in the order depending on the 'needs' attribute of each job
build_cli:
# do we still need this to be conditional?
# if: ${{ github.event.schedule || github.event.inputs.build_images == 'true' }}
uses: ./.github/workflows/build_images.yml # Path to your reusable workflow
with:
IMAGE_ID: cli
# set some environment variables
envs:
runs-on: ubuntu-latest
outputs:
BRANCH_REF: ${{ steps.BRANCH_REF.outputs.value }}
RUN_DATE: ${{ steps.calculate_date.outputs.value }}
steps:
- name: Calculate run_date
id: calculate_date
run: |
input_run_date=${{ github.event.inputs.run_date || 'unspecified' }}
if [[ ${{ github.event_name }} == 'workflow_dispatch' && ${input_run_date} != 'unspecified' ]]
then
run_date="${{ github.event.inputs.run_date }}" # Use provided run_date
else
# automatically set the run_date by finding an appropriate time stamp in the past (corresponding to our cron schedule)
# Get the current time in UTC
current_time=$(date -u +'%Y%m%d_%H')
# Extract the hour and calculate the nearest multiple of 12 in the past (as per our cron schedule above)
hour=$(echo ${current_time:9:2} | awk '{print int($1 - ($1%12))}')
# Correct hour formatting (ensure leading zero)
hour=$(printf "%02d" $hour)
# Assemble the run_date
run_date=$(echo ${current_time:0:8}_${hour})
fi
echo "value=$run_date" >> $GITHUB_OUTPUT
# Dynamically set the branch ref to the currently executing branch
- name: Set the BRANCH_REF
id: BRANCH_REF
run: |
echo "value=${GITHUB_REF##*/}" >> $GITHUB_OUTPUT
# everything below here runs using the `mims1` self-hosted runner
# This is a server with 128 cpu's and 256 G ram, dedictated to running SOMISANA's operational models
# It is possible that we may want to use an additional modelling server (which will be a separate node on MIMS)
# In that event, we could put all the code below here in a new reusable workflow called run_ops_mims1.yml
# And then set up another one called run_ops_mims2.yml set up in the same way but obviously running different models/domains
# (note you'll also have to include another `git pull` command at the end of build_images.yml to make sure the latest images are available on the new server)
#
cleanup:
needs: [envs,build_cli]
uses: ./.github/workflows/cleanup.yml # Path to your reusable workflow
with:
RUNNER_NAME: mims1
BRANCH_REF: ${{ needs.envs.outputs.BRANCH_REF }}
# download all the data we'll need to force the models
# no need to provide any model specific inputs as we hard code the extent to cover the whole EEZ
get_forcing:
needs: [envs, build_cli]
uses: ./.github/workflows/get_forcing.yml # Path to your reusable workflow
with:
RUNNER_NAME: mims1
RUN_DATE: ${{ needs.envs.outputs.RUN_DATE }}
BRANCH_REF: ${{ needs.envs.outputs.BRANCH_REF }}
HDAYS: 5
FDAYS: 5
secrets: inherit
# prepare croco config dirs for each domain
prep_domains:
needs: [envs,get_forcing]
strategy:
matrix:
# running as a matrix strategy allows us to prepare different domains in parallel inside a single job
domain: ['sa_southeast_01','sa_west_02']
uses: ./.github/workflows/prep_domain.yml # Path to your reusable workflow
with:
RUN_DATE: ${{ needs.envs.outputs.RUN_DATE }}
BRANCH_REF: ${{ needs.envs.outputs.BRANCH_REF }}
RUNNER_NAME: mims1
MODEL: croco_v1.3.1
DOMAIN: ${{ matrix.domain }}
COMP: C03
# in future we may want to use different compile options for each domain
# one way of handling that may be to add the compile option to each string in the domain variable under the matrix strategy
# and then tease out the domain and compile option inside prep_domain.yml
# prepare croco boundary forcing for each domain
make_forcing:
needs: [envs,get_forcing,prep_domains]
strategy:
matrix:
# running as a matrix strategy allows us to prepare different domains in parallel inside a single job
domain: ['sa_southeast_01','sa_west_02']
ogcm: ['MERCATOR']
uses: ./.github/workflows/make_forcing.yml # Path to your reusable workflow
with:
RUN_DATE: ${{ needs.envs.outputs.RUN_DATE }}
BRANCH_REF: ${{ needs.envs.outputs.BRANCH_REF }}
RUNNER_NAME: mims1
MODEL: croco_v1.3.1
DOMAIN: ${{ matrix.domain }}
OGCM: ${{ matrix.ogcm }}
HDAYS: 5
# run the model, do the postprocessing, archive the output
#
# we considered using githubs 'matrix' strategy to run different domains in parallel,
# but opted against it as if one of the domains fails, then it cancels all others running in parallel...
# so separating the jobs makes it more robust... and also easier to follow in the actions log on github
# here's an idea...
# when we end up with 4 domains, each needing 30-ish processors we'll be able to run all 4 in parallel
# we could put those 4 jobs in a calleable workflow, called run_all_domains.yml which takes the atmospheric and boundary forcings as inputs and runs 4 models in parallel
#
# then we line up different calls to run_all_domains.yml in this workflow, each with different combinations of surface and boundary forcings
run_sa_west_gfs:
needs: [envs,get_forcing,prep_domains,make_forcing]
if: ${{ always() }}
uses: ./.github/workflows/run_croco.yml # Path to your reusable workflow
with:
RUN_DATE: ${{ needs.envs.outputs.RUN_DATE }}
BRANCH_REF: ${{ needs.envs.outputs.BRANCH_REF }}
RUNNER_NAME: mims1
MODEL: croco_v1.3.1
DOMAIN: sa_west_02
VERSION: v1.0
COMP: C03
INP: I99
BLK: GFS
FRC: ''
OGCM: MERCATOR
HDAYS: 5
FDAYS: 5
run_sa_southeast_gfs:
needs: [envs,get_forcing,prep_domains,make_forcing]
if: ${{ always() }}
uses: ./.github/workflows/run_croco.yml # Path to your reusable workflow
with:
RUN_DATE: ${{ needs.envs.outputs.RUN_DATE }}
BRANCH_REF: ${{ needs.envs.outputs.BRANCH_REF }}
RUNNER_NAME: mims1
MODEL: croco_v1.3.1
DOMAIN: sa_southeast_01
VERSION: v1.0
COMP: C03
INP: I99
BLK: GFS
FRC: ''
OGCM: MERCATOR
HDAYS: 5
FDAYS: 5
# Now the SAWS forced runs
# We only execute the SAWS forced runs if needs.get_forcing.outputs.SAWS_OK == 1 i.e. if we were able to find
# a SAWS file which was initialised not longer than 12 hrs ago. This is important, since our FDAYS parameter
# must be less than the number of forecast days in the SAWS forcing file. The SAWS files contain 3 days of
# forecast data, but since it takes 12 hours to get to us, we can only run our model for 2.5 days. We however
# set FDAYS to 2.45 (just over 1 hour less), otherwise CROCO will run out of forcing data in the final hour of the run.
run_sa_west_saws:
needs: [envs,get_forcing,prep_domains,make_forcing]
if: ${{ always() && needs.get_forcing.outputs.SAWS_OK == 1 }}
uses: ./.github/workflows/run_croco.yml # Path to your reusable workflow
with:
RUN_DATE: ${{ needs.envs.outputs.RUN_DATE }}
BRANCH_REF: ${{ needs.envs.outputs.BRANCH_REF }}
RUNNER_NAME: mims1
MODEL: croco_v1.3.1
DOMAIN: sa_west_02
VERSION: v1.0
COMP: C03
INP: I99
BLK: SAWS
FRC: ''
OGCM: MERCATOR
HDAYS: 5
FDAYS: 2.45
run_sa_southeast_saws:
needs: [envs,get_forcing,prep_domains,make_forcing]
if: ${{ always() && needs.get_forcing.outputs.SAWS_OK == 1 }}
uses: ./.github/workflows/run_croco.yml # Path to your reusable workflow
with:
RUN_DATE: ${{ needs.envs.outputs.RUN_DATE }}
BRANCH_REF: ${{ needs.envs.outputs.BRANCH_REF }}
RUNNER_NAME: mims1
MODEL: croco_v1.3.1
DOMAIN: sa_southeast_01
VERSION: v1.0
COMP: C03
INP: I99
BLK: SAWS
FRC: ''
OGCM: MERCATOR
HDAYS: 5
FDAYS: 2.45