From b3f12958acc49ade48990e56f084b9752dd090c5 Mon Sep 17 00:00:00 2001 From: Michael Lueken <63728921+MichaelLueken@users.noreply.github.com> Date: Wed, 21 Feb 2024 15:54:42 -0500 Subject: [PATCH] [develop] Enable UPP 2d decomposition (#917) Changes to enable 2d decomposition include: * parm/model_configure - Added itasks to the model_configure file (values greater than 1 enable 2d decomposition in inline post). * scripts/exregional_run_post.sh - Added numx to the end of the &NAMPGB namelist options (values of numx greater than 1 enable 2d decomposition in offline post). * ush/create_model_configure_file.py - Added itasks to the list of variables to be added to the model_configure file. * tests/WE2E/test_configs/grids_extrn_mdls_suites_community/config.grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2.yaml - Added ITASKS: 2 to enable inline post 2d decomposition. * tests/WE2E/test_configs/grids_extrn_mdls_suites_community/config.grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta.yaml - Added NUMX: 2 to enable offline post 2d decomposition. The ufs-weather-model (020e783), UPP (fae617b), and UFS_UTILS (dc0e4a6) hashes have been updated in this work. --------- Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- Externals.cfg | 6 +++--- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 12 +++++++++-- parm/model_configure | 1 + scripts/exregional_run_post.sh | 20 +++++++++---------- tests/WE2E/run_WE2E_tests.py | 9 +++++++++ ...cs_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2.yaml | 2 ++ ...m_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta.yaml | 4 +++- .../test_create_model_configure_file.py | 1 + ush/config_defaults.yaml | 20 +++++++++++++++++-- ush/create_model_configure_file.py | 1 + 10 files changed, 58 insertions(+), 18 deletions(-) diff --git a/Externals.cfg b/Externals.cfg index 15301c921f..4bae74b316 100644 --- a/Externals.cfg +++ b/Externals.cfg @@ -3,7 +3,7 @@ protocol = git repo_url = https://github.com/ufs-community/UFS_UTILS # Specify either a branch name or a hash but not both. #branch = develop -hash = 6a7d534 +hash = dc0e4a6 local_path = sorc/UFS_UTILS required = True @@ -12,7 +12,7 @@ protocol = git repo_url = https://github.com/ufs-community/ufs-weather-model # Specify either a branch name or a hash but not both. #branch = develop -hash = 788897d +hash = 020e783 local_path = sorc/ufs-weather-model required = True @@ -21,7 +21,7 @@ protocol = git repo_url = https://github.com/NOAA-EMC/UPP # Specify either a branch name or a hash but not both. #branch = develop -hash = baa7751 +hash = fae617b local_path = sorc/UPP required = True diff --git a/docs/UsersGuide/source/CustomizingTheWorkflow/ConfigWorkflow.rst b/docs/UsersGuide/source/CustomizingTheWorkflow/ConfigWorkflow.rst index 08370c5e2d..b0b0301973 100644 --- a/docs/UsersGuide/source/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/docs/UsersGuide/source/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -1139,6 +1139,10 @@ These parameters set values in the Weather Model's ``model_configure`` file. ``WRITE_DOPOST``: (Default: false) Flag that determines whether to use the inline post option, which calls the Unified Post Processor (:term:`UPP`) from within the UFS Weather Model. The default ``WRITE_DOPOST: false`` does not use the inline post functionality, and the ``run_post`` tasks are called from outside of the UFS Weather Model. If ``WRITE_DOPOST: true``, the ``WRITE_DOPOST`` flag in the ``model_configure`` file will be set to true, and the post-processing (:term:`UPP`) tasks will be called from within the Weather Model. This means that the post-processed files (in :term:`grib2` format) are output by the Weather Model at the same time that it outputs the ``dynf###.nc`` and ``phyf###.nc`` files. Setting ``WRITE_DOPOST: true`` turns off the separate ``run_post`` task in ``setup.py`` to avoid unnecessary computations. Valid values: ``True`` | ``False`` +``ITASKS``: (Default: 1) + Variable denoting the number of write tasks in the ``i`` direction in the current group. Used for inline post 2D decomposition. Setting this variable to a value greater than 1 will enable 2D decomposition. + Note that 2D decomposition does not yet work with GNU compilers, so this value will be reset to 1 automatically when using GNU compilers (i.e., when ``COMPILER: gnu``). + .. _CompParams: Computational Parameters @@ -1280,10 +1284,10 @@ Customized Post Configuration Parameters Set parameters for customizing the :term:`UPP`. -``USE_CUSTOM_POST_CONFIG_FILE``: (Default: false) +``USE_CUSTOM_POST_CONFIG_FILE``: (Default: true) Flag that determines whether a user-provided custom configuration file should be used for post-processing the model data. If this is set to true, then the workflow will use the custom post-processing (:term:`UPP`) configuration file specified in ``CUSTOM_POST_CONFIG_FP``. Otherwise, a default configuration file provided in the UPP repository will be used. Valid values: ``True`` | ``False`` -``CUSTOM_POST_CONFIG_FP``: (Default: "") +``CUSTOM_POST_CONFIG_FP``: (Default: ``"{{ user.SORCdir }}/ufs-weather-model/tests/parm/postxconfig-NT-fv3lam.txt"``) The full path to the custom post flat file, including filename, to be used for post-processing. This is only used if ``CUSTOM_POST_CONFIG_FILE`` is set to true. ``POST_OUTPUT_DOMAIN_NAME``: (Default: ``'{{ workflow.PREDEF_GRID_NAME }}'``) @@ -1298,6 +1302,10 @@ Set parameters for customizing the :term:`UPP`. ``TESTBED_FIELDS_FN``: (Default: "") The file that lists grib2 fields to be extracted for testbed files. An empty string means no need to generate testbed files. +``NUMX``: (Default: 1) + The number of ``i`` regions in a 2D decomposition. Each ``i`` row is distributed to ``NUMX`` ranks. Used for offline post 2D decomposition. Set ``NUMX`` to a value greater than 1 to enable 2D decomposition. + Note that 2D decomposition does not yet work with GNU compilers, so this value will be reset to 1 automatically when using GNU compilers (i.e., when ``COMPILER: gnu``). + RUN_PRDGEN Configuration Parameters ===================================== diff --git a/parm/model_configure b/parm/model_configure index 58ea7378d4..d22adf3f3a 100644 --- a/parm/model_configure +++ b/parm/model_configure @@ -21,6 +21,7 @@ jchunk2d: -1 ichunk3d: -1 jchunk3d: -1 kchunk3d: -1 +itasks: {{ itasks }} quilting: {{ quilting }} {% if quilting %} # diff --git a/scripts/exregional_run_post.sh b/scripts/exregional_run_post.sh index edf35ef2a4..60f87c3eaf 100755 --- a/scripts/exregional_run_post.sh +++ b/scripts/exregional_run_post.sh @@ -108,15 +108,15 @@ fi cp_vrfy ${post_config_fp} ./postxconfig-NT.txt cp_vrfy ${PARMdir}/upp/params_grib2_tbl_new . if [ ${USE_CRTM} = "TRUE" ]; then - cp_vrfy ${CRTM_DIR}/fix/EmisCoeff/IR_Water/Big_Endian/Nalli.IRwater.EmisCoeff.bin ./ - cp_vrfy ${CRTM_DIR}/fix/EmisCoeff/MW_Water/Big_Endian/FAST*.bin ./ - cp_vrfy ${CRTM_DIR}/fix/EmisCoeff/IR_Land/SEcategory/Big_Endian/NPOESS.IRland.EmisCoeff.bin ./ - cp_vrfy ${CRTM_DIR}/fix/EmisCoeff/IR_Snow/SEcategory/Big_Endian/NPOESS.IRsnow.EmisCoeff.bin ./ - cp_vrfy ${CRTM_DIR}/fix/EmisCoeff/IR_Ice/SEcategory/Big_Endian/NPOESS.IRice.EmisCoeff.bin ./ - cp_vrfy ${CRTM_DIR}/fix/AerosolCoeff/Big_Endian/AerosolCoeff.bin ./ - cp_vrfy ${CRTM_DIR}/fix/CloudCoeff/Big_Endian/CloudCoeff.bin ./ - cp_vrfy ${CRTM_DIR}/fix/SpcCoeff/Big_Endian/*.bin ./ - cp_vrfy ${CRTM_DIR}/fix/TauCoeff/ODPS/Big_Endian/*.bin ./ + cp_vrfy ${CRTM_DIR}/Nalli.IRwater.EmisCoeff.bin ./ + cp_vrfy ${CRTM_DIR}/FAST*.bin ./ + cp_vrfy ${CRTM_DIR}/NPOESS.IRland.EmisCoeff.bin ./ + cp_vrfy ${CRTM_DIR}/NPOESS.IRsnow.EmisCoeff.bin ./ + cp_vrfy ${CRTM_DIR}/NPOESS.IRice.EmisCoeff.bin ./ + cp_vrfy ${CRTM_DIR}/AerosolCoeff.bin ./ + cp_vrfy ${CRTM_DIR}/CloudCoeff.bin ./ + cp_vrfy ${CRTM_DIR}/*.SpcCoeff.bin ./ + cp_vrfy ${CRTM_DIR}/*.TauCoeff.bin ./ print_info_msg " ==================================================================== Copying the external CRTM fix files from CRTM_DIR to the temporary @@ -201,7 +201,7 @@ fileNameFlux='${phy_file}' / &NAMPGB - KPO=47,PO=1000.,975.,950.,925.,900.,875.,850.,825.,800.,775.,750.,725.,700.,675.,650.,625.,600.,575.,550.,525.,500.,475.,450.,425.,400.,375.,350.,325.,300.,275.,250.,225.,200.,175.,150.,125.,100.,70.,50.,30.,20.,10.,7.,5.,3.,2.,1.,${post_itag_add} + KPO=47,PO=1000.,975.,950.,925.,900.,875.,850.,825.,800.,775.,750.,725.,700.,675.,650.,625.,600.,575.,550.,525.,500.,475.,450.,425.,400.,375.,350.,325.,300.,275.,250.,225.,200.,175.,150.,125.,100.,70.,50.,30.,20.,10.,7.,5.,3.,2.,1.,${post_itag_add},numx=${NUMX} / EOF # diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index 3faca0902e..5c720e7d93 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -228,6 +228,15 @@ def run_we2e_tests(homedir, args) -> None: test_aqm_input_basedir = machine_defaults['platform']['TEST_AQM_INPUT_BASEDIR'] test_cfg['cpl_aqm_parm']['DCOMINfire_default'] = f"{test_aqm_input_basedir}/RAVE_fire" + if args.compiler == "gnu": + # 2D decomposition doesn't work with GNU compilers. Deactivate 2D decomposition for GNU + if 'task_run_post' in test_cfg: + test_cfg['task_run_post'].update({"NUMX": 1}) + logging.info(f"NUMX has been reset to 1 due to issues encountered with GNU compilers") + if 'task_run_fcst' in test_cfg: + test_cfg['task_run_fcst'].update({"ITASKS": 1}) + logging.info(f"ITASKS has been reset to 1 due to issues encountered with GNU compilers") + logging.debug(f"Writing updated config.yaml for test {test_name}\n"\ "based on specified command-line arguments:\n") logging.debug(cfg_to_yaml_str(test_cfg)) diff --git a/tests/WE2E/test_configs/grids_extrn_mdls_suites_community/config.grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2.yaml b/tests/WE2E/test_configs/grids_extrn_mdls_suites_community/config.grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2.yaml index d0d184d288..13e7e5f427 100644 --- a/tests/WE2E/test_configs/grids_extrn_mdls_suites_community/config.grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2.yaml +++ b/tests/WE2E/test_configs/grids_extrn_mdls_suites_community/config.grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2.yaml @@ -4,6 +4,7 @@ metadata: completes successfully on the RRFS_CONUS_25km grid using the GFS_v15p2 physics suite with ICs and LBCs derived from the FV3GFS. In addition, this tests the "inline post" option (WRITE_DOPOST: true) + and enables UPP inline 2D decomposition. user: RUN_ENVIR: community workflow: @@ -22,3 +23,4 @@ task_get_extrn_lbcs: USE_USER_STAGED_EXTRN_FILES: true task_run_fcst: WRITE_DOPOST: true + ITASKS: 2 diff --git a/tests/WE2E/test_configs/grids_extrn_mdls_suites_community/config.grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta.yaml b/tests/WE2E/test_configs/grids_extrn_mdls_suites_community/config.grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta.yaml index 2a5124e9ae..dd5f5a464a 100644 --- a/tests/WE2E/test_configs/grids_extrn_mdls_suites_community/config.grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta.yaml +++ b/tests/WE2E/test_configs/grids_extrn_mdls_suites_community/config.grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta.yaml @@ -3,7 +3,7 @@ metadata: This test is to ensure that the workflow running in community mode completes successfully on the RRFS_CONUScompact_25km grid using the RRFS_v1beta physics suite with ICs derived from the HRRR and LBCs derived from the RAP. - Also tests the "DOT_OR_USCORE" option + It also tests the "DOT_OR_USCORE" option and enables offline UPP 2D decomposition. user: RUN_ENVIR: community workflow: @@ -25,3 +25,5 @@ task_get_extrn_lbcs: USE_USER_STAGED_EXTRN_FILES: true EXTRN_MDL_FILES_LBCS: - '{yy}{jjj}{hh}00{fcst_hr:02d}00' +task_run_post: + NUMX: 2 diff --git a/tests/test_python/test_create_model_configure_file.py b/tests/test_python/test_create_model_configure_file.py index d98f2d56ca..9475028505 100644 --- a/tests/test_python/test_create_model_configure_file.py +++ b/tests/test_python/test_create_model_configure_file.py @@ -47,6 +47,7 @@ def setUp(self): set_env_var("FHROT", 0) set_env_var("DT_ATMOS", 1) set_env_var("RESTART_INTERVAL", 4) + set_env_var("ITASKS", 1) set_env_var("WRTCMP_write_groups", 1) set_env_var("WRTCMP_write_tasks_per_group", 2) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 9b044e3524..b35b6108c7 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -1787,12 +1787,21 @@ task_run_fcst: # weather model]. If this is set to true, the the run_post task will # be deactivated. # + # ITASKS: + # Variable denoting the number of write tasks in the i direction in the + # current group. Used for inline post 2D decomposition. Setting this + # variable to a value greater than 1 will enable 2D decomposition. + # Default setting is 1. + # Note that 2D decomposition does not yet work with GNU compilers, so this value + # will be reset to 1 automatically when using GNU compilers (i.e., when COMPILER: gnu). + # #----------------------------------------------------------------------- # DT_ATMOS: "" FHROT: 0 RESTART_INTERVAL: 0 WRITE_DOPOST: false + ITASKS: 1 # #----------------------------------------------------------------------- # @@ -2067,12 +2076,19 @@ task_run_post: # The file that lists grib2 fields to be extracted for testbed files. # An empty string means no need to generate testbed files. # + # NUMX: + # The number of i regions in a 2D decomposition. Each i row is + # distibuted to numx ranks. Default value of numx is 1. + # Note that 2D decomposition does not yet work with GNU compilers, so this value + # will be reset to 1 automatically when using GNU compilers (i.e., when COMPILER: gnu). + # #----------------------------------------------------------------------- # - USE_CUSTOM_POST_CONFIG_FILE: false - CUSTOM_POST_CONFIG_FP: "" + USE_CUSTOM_POST_CONFIG_FILE: true + CUSTOM_POST_CONFIG_FP: "{{ user.SORCdir }}/ufs-weather-model/tests/parm/postxconfig-NT-fv3lam.txt" POST_OUTPUT_DOMAIN_NAME: '{{ workflow.PREDEF_GRID_NAME }}' TESTBED_FIELDS_FN: "" + NUMX: 1 #---------------------------- # RUN PRDGEN config parameters diff --git a/ush/create_model_configure_file.py b/ush/create_model_configure_file.py index 79994aa695..c2778f1be5 100644 --- a/ush/create_model_configure_file.py +++ b/ush/create_model_configure_file.py @@ -79,6 +79,7 @@ def create_model_configure_file( "fhrot": fhrot, "dt_atmos": DT_ATMOS, "restart_interval": RESTART_INTERVAL, + "itasks": ITASKS, "write_dopost": f".{lowercase(str(WRITE_DOPOST))}.", "quilting": f".{lowercase(str(QUILTING))}.", "output_grid": WRTCMP_output_grid,