diff --git a/env/AZUREPW.env b/env/AZUREPW.env new file mode 100755 index 0000000000..706c659e95 --- /dev/null +++ b/env/AZUREPW.env @@ -0,0 +1,55 @@ +#! /usr/bin/env bash + +if [[ $# -ne 1 ]]; then + + echo "Must specify an input argument to set runtime environment variables!" + exit 1 + +fi + +step=$1 + +export launcher="srun -l --export=ALL" +export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" + +# Configure MPI environment +export OMP_STACKSIZE=2048000 +export NTHSTACK=1024000000 + +ulimit -s unlimited +ulimit -a + +# Calculate common variables +# Check first if the dependent variables are set +if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:-}" ]]; then + max_threads_per_task=$((max_tasks_per_node / tasks_per_node)) + NTHREADSmax=${threads_per_task:-${max_threads_per_task}} + NTHREADS1=${threads_per_task:-1} + [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]] && NTHREADSmax=${max_threads_per_task} + [[ ${NTHREADS1} -gt ${max_threads_per_task} ]] && NTHREADS1=${max_threads_per_task} + APRUN="${launcher} -n ${ntasks}" +else + echo "ERROR config.resources must be sourced before sourcing AZUREPW.env" + exit 2 +fi + +if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then + + export launcher="srun --mpi=pmi2 -l" + + (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) + (( ufs_ntasks = nnodes*tasks_per_node )) + # With ESMF threading, the model wants to use the full node + export APRUN_UFS="${launcher} -n ${ufs_ntasks}" + unset nnodes ufs_ntasks + +elif [[ "${step}" = "post" ]]; then + + export NTHREADS_NP=${NTHREADS1} + export APRUN_NP="${APRUN}" + + export NTHREADS_DWN=${threads_per_task_dwn:-1} + [[ ${NTHREADS_DWN} -gt ${max_threads_per_task} ]] && export NTHREADS_DWN=${max_threads_per_task} + export APRUN_DWN="${launcher} -n ${ntasks_dwn}" + +fi diff --git a/parm/config/gfs/config.base b/parm/config/gfs/config.base index 66d2a51df2..544113f942 100644 --- a/parm/config/gfs/config.base +++ b/parm/config/gfs/config.base @@ -482,9 +482,9 @@ export OFFSET_START_HOUR=0 # Number of regional collectives to create soundings for export NUM_SND_COLLECTIVES=${NUM_SND_COLLECTIVES:-9} -# The tracker, genesis, and METplus jobs are not supported on AWS yet -# TODO: we should place these in workflow/hosts/awspw.yaml as part of AWS setup, not for general. -if [[ "${machine}" == "AWSPW" ]]; then +# The tracker, genesis, and METplus jobs are not supported on CSPs yet +# TODO: we should place these in workflow/hosts/awspw.yaml as part of AWS/AZURE setup, not for general. +if [[ "${machine}" =~ "PW" ]]; then export DO_TRACKER="NO" export DO_GENESIS="NO" export DO_METP="NO" diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index cec2aef238..719b31342a 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -112,6 +112,14 @@ case ${machine} in # shellcheck disable=SC2034 mem_node_max="" ;; + "AZUREPW") + export PARTITION_BATCH="compute" + npe_node_max=24 + max_tasks_per_node=24 + # TODO Supply a max mem/node value for AZURE + # shellcheck disable=SC2034 + mem_node_max="" + ;; "CONTAINER") max_tasks_per_node=1 # TODO Supply a max mem/node value for a container diff --git a/parm/config/gfs/config.resources.AWSPW b/parm/config/gfs/config.resources.AWSPW index 8649713bb7..2bb5f35e76 100644 --- a/parm/config/gfs/config.resources.AWSPW +++ b/parm/config/gfs/config.resources.AWSPW @@ -3,6 +3,7 @@ # AWS-specific job resources export is_exclusive="True" +export memory=None # shellcheck disable=SC2312 for mem_var in $(env | grep '^memory_' | cut -d= -f1); do diff --git a/parm/config/gfs/config.resources.AZUREPW b/parm/config/gfs/config.resources.AZUREPW new file mode 100644 index 0000000000..96303139d8 --- /dev/null +++ b/parm/config/gfs/config.resources.AZUREPW @@ -0,0 +1,11 @@ +#! /usr/bin/env bash + +# AZURE-specific job resources + +export is_exclusive="True" +unset memory + +# shellcheck disable=SC2312 +for mem_var in $(env | grep '^memory_' | cut -d= -f1); do + unset "${mem_var}" +done diff --git a/workflow/hosts.py b/workflow/hosts.py index eced460fd1..6244cf564e 100644 --- a/workflow/hosts.py +++ b/workflow/hosts.py @@ -16,7 +16,8 @@ class Host: """ SUPPORTED_HOSTS = ['HERA', 'ORION', 'JET', 'HERCULES', - 'WCOSS2', 'S4', 'CONTAINER', 'AWSPW', 'GAEA'] + 'WCOSS2', 'S4', 'CONTAINER', 'GAEA', + 'AWSPW', 'AZUREPW'] def __init__(self, host=None): diff --git a/workflow/hosts/azurepw.yaml b/workflow/hosts/azurepw.yaml new file mode 100644 index 0000000000..2155c67dea --- /dev/null +++ b/workflow/hosts/azurepw.yaml @@ -0,0 +1,26 @@ +BASE_GIT: '' #TODO: This does not yet exist. +DMPDIR: '' # TODO: This does not yet exist. +PACKAGEROOT: '' #TODO: This does not yet exist. +COMINsyn: '' #TODO: This does not yet exist. +HOMEDIR: '/contrib/${USER}' +STMP: '/lustre/${USER}/stmp/' +PTMP: '/lustre/${USER}/ptmp/' +NOSCRUB: '${HOMEDIR}' +ACCOUNT: '${USER}' +SCHEDULER: slurm +QUEUE: batch +QUEUE_SERVICE: batch +PARTITION_BATCH: compute +PARTITION_SERVICE: compute +RESERVATION: '' +CLUSTERS: '' +CHGRP_RSTPROD: 'YES' +CHGRP_CMD: 'chgrp rstprod' # TODO: This is not yet supported. +HPSSARCH: 'NO' +HPSS_PROJECT: emc-global #TODO: See `ATARDIR` below. +BASE_CPLIC: '/bucket/global-workflow-shared-data/ICSDIR/prototype_ICs' +LOCALARCH: 'NO' +ATARDIR: '' # TODO: This will not yet work from AZURE. +MAKE_NSSTBUFR: 'NO' +MAKE_ACFTBUFR: 'NO' +SUPPORTED_RESOLUTIONS: ['C48', 'C96'] # TODO: Test and support all cubed-sphere resolutions.