diff --git a/env/AWSPW.env b/env/AWSPW.env new file mode 100755 index 0000000000..79ad52d460 --- /dev/null +++ b/env/AWSPW.env @@ -0,0 +1,137 @@ +#! /usr/bin/env bash + +if [[ $# -ne 1 ]]; then + + echo "Must specify an input argument to set runtime environment variables!" + echo "argument can be any one of the following:" + echo "atmanlrun atmensanlrun aeroanlrun landanlrun" + echo "anal sfcanl fcst post vrfy metp" + echo "eobs eupd ecen efcs epos" + echo "postsnd awips gempak" + exit 1 + +fi + +step=$1 + +export npe_node_max=36 +export launcher="mpiexec.hydra" +export mpmd_opt="" + +# Configure MPI environment +export OMP_STACKSIZE=2048000 +export NTHSTACK=1024000000 + +ulimit -s unlimited +ulimit -a + +if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then + + if [[ "${CDUMP}" =~ "gfs" ]]; then + nprocs="npe_${step}_gfs" + ppn="npe_node_${step}_gfs" || ppn="npe_node_${step}" + else + nprocs="npe_${step}" + ppn="npe_node_${step}" + fi + (( nnodes = (${!nprocs}+${!ppn}-1)/${!ppn} )) + (( ntasks = nnodes*${!ppn} )) + # With ESMF threading, the model wants to use the full node + export APRUN_UFS="${launcher} -n ${ntasks}" + unset nprocs ppn nnodes ntasks + +elif [[ "${step}" = "post" ]]; then + + nth_max=$((npe_node_max / npe_node_post)) + + export NTHREADS_NP=${nth_np:-1} + [[ ${NTHREADS_NP} -gt ${nth_max} ]] && export NTHREADS_NP=${nth_max} + export APRUN_NP="${launcher} -n ${npe_post}" + + export NTHREADS_DWN=${nth_dwn:-1} + [[ ${NTHREADS_DWN} -gt ${nth_max} ]] && export NTHREADS_DWN=${nth_max} + export APRUN_DWN="${launcher} -n ${npe_dwn}" + +elif [[ "${step}" = "ecen" ]]; then + + nth_max=$((npe_node_max / npe_node_ecen)) + + export NTHREADS_ECEN=${nth_ecen:-${nth_max}} + [[ ${NTHREADS_ECEN} -gt ${nth_max} ]] && export NTHREADS_ECEN=${nth_max} + export APRUN_ECEN="${launcher} -n ${npe_ecen}" + + export NTHREADS_CHGRES=${nth_chgres:-12} + [[ ${NTHREADS_CHGRES} -gt ${npe_node_max} ]] && export NTHREADS_CHGRES=${npe_node_max} + export APRUN_CHGRES="time" + + export NTHREADS_CALCINC=${nth_calcinc:-1} + [[ ${NTHREADS_CALCINC} -gt ${nth_max} ]] && export NTHREADS_CALCINC=${nth_max} + export APRUN_CALCINC="${launcher} -n ${npe_ecen}" + +elif [[ "${step}" = "esfc" ]]; then + + nth_max=$((npe_node_max / npe_node_esfc)) + + export NTHREADS_ESFC=${nth_esfc:-${nth_max}} + [[ ${NTHREADS_ESFC} -gt ${nth_max} ]] && export NTHREADS_ESFC=${nth_max} + export APRUN_ESFC="${launcher} -n ${npe_esfc}" + + export NTHREADS_CYCLE=${nth_cycle:-14} + [[ ${NTHREADS_CYCLE} -gt ${npe_node_max} ]] && export NTHREADS_CYCLE=${npe_node_max} + export APRUN_CYCLE="${launcher} -n ${npe_esfc}" + +elif [[ "${step}" = "epos" ]]; then + + nth_max=$((npe_node_max / npe_node_epos)) + + export NTHREADS_EPOS=${nth_epos:-${nth_max}} + [[ ${NTHREADS_EPOS} -gt ${nth_max} ]] && export NTHREADS_EPOS=${nth_max} + export APRUN_EPOS="${launcher} -n ${npe_epos}" + +elif [[ "${step}" = "postsnd" ]]; then + + export CFP_MP="YES" + + nth_max=$((npe_node_max / npe_node_postsnd)) + + export NTHREADS_POSTSND=${nth_postsnd:-1} + [[ ${NTHREADS_POSTSND} -gt ${nth_max} ]] && export NTHREADS_POSTSND=${nth_max} + export APRUN_POSTSND="${launcher} -n ${npe_postsnd}" + + export NTHREADS_POSTSNDCFP=${nth_postsndcfp:-1} + [[ ${NTHREADS_POSTSNDCFP} -gt ${nth_max} ]] && export NTHREADS_POSTSNDCFP=${nth_max} + export APRUN_POSTSNDCFP="${launcher} -n ${npe_postsndcfp} ${mpmd_opt}" + +elif [[ "${step}" = "awips" ]]; then + + nth_max=$((npe_node_max / npe_node_awips)) + + export NTHREADS_AWIPS=${nth_awips:-2} + [[ ${NTHREADS_AWIPS} -gt ${nth_max} ]] && export NTHREADS_AWIPS=${nth_max} + export APRUN_AWIPSCFP="${launcher} -n ${npe_awips} ${mpmd_opt}" + +elif [[ "${step}" = "gempak" ]]; then + + export CFP_MP="YES" + + if [[ ${CDUMP} == "gfs" ]]; then + npe_gempak=${npe_gempak_gfs} + npe_node_gempak=${npe_node_gempak_gfs} + fi + + nth_max=$((npe_node_max / npe_node_gempak)) + + export NTHREADS_GEMPAK=${nth_gempak:-1} + [[ ${NTHREADS_GEMPAK} -gt ${nth_max} ]] && export NTHREADS_GEMPAK=${nth_max} + export APRUN="${launcher} -n ${npe_gempak} ${mpmd_opt}" + + +elif [[ "${step}" = "fit2obs" ]]; then + + nth_max=$((npe_node_max / npe_node_fit2obs)) + + export NTHREADS_FIT2OBS=${nth_fit2obs:-1} + [[ ${NTHREADS_FIT2OBS} -gt ${nth_max} ]] && export NTHREADS_FIT2OBS=${nth_max} + export MPIRUN="${launcher} -n ${npe_fit2obs}" + +fi diff --git a/jobs/rocoto/fcst.sh b/jobs/rocoto/fcst.sh index 512bee127f..5512a0cfa7 100755 --- a/jobs/rocoto/fcst.sh +++ b/jobs/rocoto/fcst.sh @@ -12,9 +12,10 @@ source "${HOMEgfs}/ush/preamble.sh" source "${HOMEgfs}/ush/detect_machine.sh" set +x source "${HOMEgfs}/ush/module-setup.sh" -module use "${HOMEgfs}/sorc/ufs_model.fd/tests" -module load modules.ufs_model.lua -module load prod_util +if [[ "${MACHINE_ID}" != "noaacloud" ]]; then + module use "${HOMEgfs}/sorc/ufs_model.fd/tests" +fi + if [[ "${MACHINE_ID}" = "wcoss2" ]]; then module load cray-pals fi @@ -30,6 +31,25 @@ if [[ "${MACHINE_ID}" = "hera" ]]; then #elif [[ "${MACHINE_ID}" = "wcoss2" ]]; then # module load "python/3.7.5" fi +if [[ "${MACHINE_ID}" == "noaacloud" ]]; then + if [[ "${PW_CSP:-}" = "aws" ]]; then + + # TODO: This can be cleaned-up; most of this is a hack for now. + module use "/contrib/spack-stack/envs/ufswm/install/modulefiles/Core" + module load "stack-intel" + module load "stack-intel-oneapi-mpi" + module use -a "/contrib/spack-stack/miniconda/modulefiles/miniconda/" + module load "py39_4.12.0" + module load "ufs-weather-model-env/1.0.0" + export NETCDF="/contrib/spack-stack/miniconda/apps/miniconda/py39_4.12.0" + # TODO: Are there plans for EPIC to maintain this package or should GW provide support? + export UTILROOT="/contrib/global-workflow/NCEPLIBS-prod_util" + export PATH="${PATH}:/contrib/global-workflow/bin" + ndate_path="$(command -v ndate)" + export NDATE="${ndate_path}" + fi +fi + module list unset MACHINE_ID set_trace diff --git a/sorc/build_ufs.sh b/sorc/build_ufs.sh index bb619162a4..00653cb1e3 100755 --- a/sorc/build_ufs.sh +++ b/sorc/build_ufs.sh @@ -32,10 +32,28 @@ COMPILE_NR=0 CLEAN_BEFORE=YES CLEAN_AFTER=NO -./tests/compile.sh "${MACHINE_ID}" "${MAKE_OPT}" "${COMPILE_NR}" "intel" "${CLEAN_BEFORE}" "${CLEAN_AFTER}" -mv "./tests/fv3_${COMPILE_NR}.exe" ./tests/ufs_model.x -mv "./tests/modules.fv3_${COMPILE_NR}.lua" ./tests/modules.ufs_model.lua -cp "./modulefiles/ufs_common.lua" ./tests/ufs_common.lua -cp "./modulefiles/ufs_common_spack.lua" ./tests/ufs_common_spack.lua +if [[ "${MACHINE_ID}" != "noaacloud" ]]; then + ./tests/compile.sh "${MACHINE_ID}" "${MAKE_OPT}" "${COMPILE_NR}" "intel" "${CLEAN_BEFORE}" "${CLEAN_AFTER}" + mv "./tests/fv3_${COMPILE_NR}.exe" ./tests/ufs_model.x + mv "./tests/modules.fv3_${COMPILE_NR}.lua" ./tests/modules.ufs_model.lua + cp "./modulefiles/ufs_common.lua" ./tests/ufs_common.lua +else + + if [[ "${PW_CSP}" == "aws" ]]; then + # TODO: This will need to be addressed further when the EPIC stacks are available/supported. + module use /contrib/spack-stack/envs/ufswm/install/modulefiles/Core + module load stack-intel + module load stack-intel-oneapi-mpi + module load ufs-weather-model-env/1.0.0 + # TODO: It is still uncertain why this is the only module that is + # missing; check the spack build as this needed to be added manually. + module load w3emc/2.9.2 # TODO: This has similar issues for the EPIC stack. + module list + fi + + export CMAKE_FLAGS="${MAKE_OPT}" + ./build.sh + mv "${cwd}/ufs_model.fd/build/ufs_model" "${cwd}/ufs_model.fd/tests/ufs_model.x" +fi exit 0 diff --git a/ush/detect_machine.sh b/ush/detect_machine.sh index 647722b7a3..f7e4a9d4f3 100755 --- a/ush/detect_machine.sh +++ b/ush/detect_machine.sh @@ -38,6 +38,13 @@ case $(hostname -f) in *) MACHINE_ID=UNKNOWN ;; # Unknown platform esac +if [[ ${MACHINE_ID} == "UNKNOWN" ]]; then + case ${PW_CSP:-} in + "aws" | "google" | "azure") MACHINE_ID=noaacloud ;; + *) PW_CSP="UNKNOWN" + esac +fi + # Overwrite auto-detect with MACHINE if set MACHINE_ID=${MACHINE:-${MACHINE_ID}} diff --git a/ush/module-setup.sh b/ush/module-setup.sh index 9c27ab4f7c..16aa92cc06 100755 --- a/ush/module-setup.sh +++ b/ush/module-setup.sh @@ -102,6 +102,14 @@ elif [[ ${MACHINE_ID} = discover* ]]; then export PATH=${PATH}:${SPACK_ROOT}/bin . "${SPACK_ROOT}"/share/spack/setup-env.sh +# TODO: This can likely be made more general once other cloud +# platforms come online. +elif [[ ${MACHINE_ID} = "noaacloud" ]]; then + + export SPACK_ROOT=/contrib/global-workflow/spack-stack/spack + export PATH=${PATH}:${SPACK_ROOT}/bin + . "${SPACK_ROOT}"/share/spack/setup-env.sh + else echo WARNING: UNKNOWN PLATFORM 1>&2 fi diff --git a/workflow/hosts.py b/workflow/hosts.py index b97ac67d89..fa1362ef2c 100644 --- a/workflow/hosts.py +++ b/workflow/hosts.py @@ -15,14 +15,15 @@ class Host: """ SUPPORTED_HOSTS = ['HERA', 'ORION', 'JET', - 'WCOSS2', 'S4', 'CONTAINER'] + 'WCOSS2', 'S4', 'CONTAINER', 'AWSPW'] def __init__(self, host=None): detected_host = self.detect() if host is not None and host != detected_host: - raise ValueError(f'detected host: "{detected_host}" does not match host: "{host}"') + raise ValueError( + f'detected host: "{detected_host}" does not match host: "{host}"') self.machine = detected_host self.info = self._get_info @@ -57,7 +58,8 @@ def detect(cls): @property def _get_info(self) -> dict: - hostfile = Path(os.path.join(os.path.dirname(__file__), f'hosts/{self.machine.lower()}.yaml')) + hostfile = Path(os.path.join(os.path.dirname(__file__), + f'hosts/{self.machine.lower()}.yaml')) try: info = YAMLFile(path=hostfile) except FileNotFoundError: diff --git a/workflow/hosts/awspw.yaml b/workflow/hosts/awspw.yaml new file mode 100644 index 0000000000..8acbf87fcf --- /dev/null +++ b/workflow/hosts/awspw.yaml @@ -0,0 +1,24 @@ +BASE_GIT: '/scratch1/NCEPDEV/global/glopara/git' #TODO: This does not yet exist. +DMPDIR: '/scratch1/NCEPDEV/global/glopara/dump' # TODO: This does not yet exist. +PACKAGEROOT: '/scratch1/NCEPDEV/global/glopara/nwpara' #TODO: This does not yet exist. +COMROOT: '/scratch1/NCEPDEV/global/glopara/com' #TODO: This does not yet exist. +COMINsyn: '${COMROOT}/gfs/prod/syndat' #TODO: This does not yet exist. +HOMEDIR: '/contrib/${USER}' +STMP: '/lustre/${USER}/stmp2/' +PTMP: '/lustre/${USER}/stmp4/' +NOSCRUB: $HOMEDIR +ACCOUNT: hwufscpldcld +SCHEDULER: slurm +QUEUE: batch +QUEUE_SERVICE: batch +PARTITION_BATCH: compute +PARTITION_SERVICE: compute +CHGRP_RSTPROD: 'YES' +CHGRP_CMD: 'chgrp rstprod' # TODO: This is not yet supported. +HPSSARCH: 'YES' +HPSS_PROJECT: emc-global #TODO: See `ATARDIR` below. +LOCALARCH: 'NO' +ATARDIR: '/NCEPDEV/${HPSS_PROJECT}/1year/${USER}/${machine}/scratch/${PSLOT}' # TODO: This will not yet work from AWS. +MAKE_NSSTBUFR: 'NO' +MAKE_ACFTBUFR: 'NO' +SUPPORTED_RESOLUTIONS: ['C48'] # TODO: Test and support all cubed-sphere resolutions.