Skip to content

Commit

Permalink
Add latent_heat views to temporary workspace instead of allocating ev…
Browse files Browse the repository at this point in the history
…ery timestep
  • Loading branch information
tcclevenger committed Sep 16, 2024
1 parent 6e6c956 commit fd86a15
Show file tree
Hide file tree
Showing 13 changed files with 62 additions and 238 deletions.
13 changes: 6 additions & 7 deletions components/eamxx/src/physics/p3/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ if (NOT EAMXX_ENABLE_GPU OR Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR Kokkos
eti/p3_ice_relaxation_timescale.cpp
eti/p3_ice_nucleation.cpp
eti/p3_ice_cldliq_wet_growth.cpp
eti/p3_get_latent_heat.cpp
eti/p3_check_values.cpp
eti/p3_incloud_mixingratios.cpp
eti/p3_subgrid_variance_scaling.cpp
Expand All @@ -63,13 +62,13 @@ endif()

# List of dispatch source files if monolithic kernels are off
set(P3_SK_SRCS
disp/p3_check_values_impl_disp.cpp
disp/p3_ice_sed_impl_disp.cpp
disp/p3_main_impl_part1_disp.cpp
disp/p3_check_values_impl_disp.cpp
disp/p3_ice_sed_impl_disp.cpp
disp/p3_main_impl_part1_disp.cpp
disp/p3_main_impl_part3_disp.cpp
disp/p3_cloud_sed_impl_disp.cpp
disp/p3_main_impl_disp.cpp
disp/p3_main_impl_part2_disp.cpp
disp/p3_cloud_sed_impl_disp.cpp
disp/p3_main_impl_disp.cpp
disp/p3_main_impl_part2_disp.cpp
disp/p3_rain_sed_impl_disp.cpp
)

Expand Down
15 changes: 11 additions & 4 deletions components/eamxx/src/physics/p3/disp/p3_main_impl_disp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ ::p3_main_init_disp(
const uview_2d<Spack>& diag_eff_radius_qr,
const uview_2d<Spack>& inv_cld_frac_i, const uview_2d<Spack>& inv_cld_frac_l, const uview_2d<Spack>& inv_cld_frac_r,
const uview_2d<Spack>& exner, const uview_2d<Spack>& T_atm, const uview_2d<Spack>& qv, const uview_2d<Spack>& inv_dz,
const uview_2d<Spack>& latent_heat_vapor, const uview_2d<Spack>& latent_heat_sublim, const uview_2d<Spack>& latent_heat_fusion,
const uview_1d<Scalar>& precip_liq_surf, const uview_1d<Scalar>& precip_ice_surf,
const uview_2d<Spack>& mu_r, const uview_2d<Spack>& lamr, const uview_2d<Spack>& logn0r, const uview_2d<Spack>& nu,
const uview_2d<Spack>& cdist, const uview_2d<Spack>& cdist1, const uview_2d<Spack>& cdistr,
Expand All @@ -43,6 +44,9 @@ ::p3_main_init_disp(
precip_liq_surf(i) = 0;
precip_ice_surf(i) = 0;

constexpr Scalar latvap = C::LatVap;
constexpr Scalar latice = C::LatIce;

Kokkos::parallel_for(
Kokkos::TeamVectorRange(team, nk_pack), [&] (Int k) {
diag_equiv_reflectivity(i,k) = -99;
Expand All @@ -58,6 +62,9 @@ ::p3_main_init_disp(
T_atm(i,k) = th_atm(i,k) * exner(i,k);
qv(i,k) = max(qv(i,k), 0);
inv_dz(i,k) = 1 / dz(i,k);
latent_heat_vapor(i,k) = latvap;
latent_heat_sublim(i,k) = latvap+latice;
latent_heat_fusion(i,k) = latice;
mu_r(i,k) = 0.;
lamr(i,k) = 0.;
logn0r(i,k) = 0.;
Expand Down Expand Up @@ -115,10 +122,6 @@ ::p3_main_internal_disp(
{
using ExeSpace = typename KT::ExeSpace;

view_2d<Spack> latent_heat_sublim("latent_heat_sublim", nj, nk), latent_heat_vapor("latent_heat_vapor", nj, nk), latent_heat_fusion("latent_heat_fusion", nj, nk);

get_latent_heat(nj, nk, latent_heat_vapor, latent_heat_sublim, latent_heat_fusion);

const Int nk_pack = ekat::npack<Spack>(nk);

// load constants into local vars
Expand Down Expand Up @@ -225,6 +228,9 @@ ::p3_main_internal_disp(
auto flux_qit = temporaries.flux_qit;
auto v_qr = temporaries.v_qr;
auto v_nr = temporaries.v_nr;
auto latent_heat_vapor = temporaries.latent_heat_vapor;
auto latent_heat_sublim = temporaries.latent_heat_sublim;
auto latent_heat_fusion = temporaries.latent_heat_fusion;

// we do not want to measure init stuff
auto start = std::chrono::steady_clock::now();
Expand All @@ -234,6 +240,7 @@ ::p3_main_internal_disp(
nj, nk_pack, cld_frac_i, cld_frac_l, cld_frac_r, inv_exner, th, dz, diag_equiv_reflectivity,
ze_ice, ze_rain, diag_eff_radius_qc, diag_eff_radius_qi, diag_eff_radius_qr,
inv_cld_frac_i, inv_cld_frac_l, inv_cld_frac_r, exner, T_atm, qv, inv_dz,
latent_heat_vapor, latent_heat_sublim, latent_heat_fusion,
diagnostic_outputs.precip_liq_surf, diagnostic_outputs.precip_ice_surf,
mu_r, lamr, logn0r, nu, cdist, cdist1, cdistr,
qc_incld, qr_incld, qi_incld, qm_incld, nc_incld, nr_incld, ni_incld, bm_incld,
Expand Down
11 changes: 7 additions & 4 deletions components/eamxx/src/physics/p3/eamxx_p3_process_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ size_t P3Microphysics::requested_buffer_size_in_bytes() const

// Number of Reals needed by the WorkspaceManager passed to p3_main
const auto policy = ekat::ExeSpaceUtils<KT::ExeSpace>::get_default_team_policy(m_num_cols, nk_pack);
const size_t wsm_request = WSM::get_total_bytes_needed(nk_pack_p1, 52, policy);
const size_t wsm_request = WSM::get_total_bytes_needed(nk_pack_p1, 55, policy);

return interface_request + wsm_request;
}
Expand Down Expand Up @@ -188,7 +188,7 @@ void P3Microphysics::init_buffers(const ATMBufferManager &buffer_manager)
&m_buffer.ntend_ignore, &m_buffer.mu_c, &m_buffer.lamc, &m_buffer.qr_evap_tend, &m_buffer.v_qc,
&m_buffer.v_nc, &m_buffer.flux_qx, &m_buffer.flux_nx, &m_buffer.v_qit, &m_buffer.v_nit,
&m_buffer.flux_nit, &m_buffer.flux_bir, &m_buffer.flux_qir, &m_buffer.flux_qit, &m_buffer.v_qr,
&m_buffer.v_nr
&m_buffer.v_nr, &m_buffer.latent_heat_vapor, &m_buffer.latent_heat_sublim, &m_buffer.latent_heat_fusion
#endif
};
for (int i=0; i<Buffer::num_2d_vector; ++i) {
Expand All @@ -210,7 +210,7 @@ void P3Microphysics::init_buffers(const ATMBufferManager &buffer_manager)
// Compute workspace manager size to check used memory
// vs. requested memory
const auto policy = ekat::ExeSpaceUtils<KT::ExeSpace>::get_default_team_policy(m_num_cols, nk_pack);
const int wsm_size = WSM::get_total_bytes_needed(nk_pack_p1, 52, policy)/sizeof(Spack);
const int wsm_size = WSM::get_total_bytes_needed(nk_pack_p1, 55, policy)/sizeof(Spack);
s_mem += wsm_size;

size_t used_mem = (reinterpret_cast<Real*>(s_mem) - buffer_manager.get_memory())*sizeof(Real);
Expand Down Expand Up @@ -398,6 +398,9 @@ void P3Microphysics::initialize_impl (const RunType /* run_type */)
temporaries.flux_qit = m_buffer.flux_qit;
temporaries.v_qr = m_buffer.v_qr;
temporaries.v_nr = m_buffer.v_nr;
temporaries.latent_heat_vapor = m_buffer.latent_heat_vapor;
temporaries.latent_heat_sublim = m_buffer.latent_heat_sublim;
temporaries.latent_heat_fusion = m_buffer.latent_heat_fusion;
#endif

// -- Set values for the post-amble structure
Expand Down Expand Up @@ -427,7 +430,7 @@ void P3Microphysics::initialize_impl (const RunType /* run_type */)

// Setup WSM for internal local variables
const auto policy = ekat::ExeSpaceUtils<KT::ExeSpace>::get_default_team_policy(m_num_cols, nk_pack);
workspace_mgr.setup(m_buffer.wsm_data, nk_pack_p1, 52, policy);
workspace_mgr.setup(m_buffer.wsm_data, nk_pack_p1, 55, policy);
}

// =========================================================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ class P3Microphysics : public AtmosphereProcess
static constexpr int num_1d_scalar = 2; //no 2d vars now, but keeping 1d struct for future expansion
// 2d view packed, size (ncol, nlev_packs)
#ifdef SCREAM_P3_SMALL_KERNELS
static constexpr int num_2d_vector = 64;
static constexpr int num_2d_vector = 67;
#else
static constexpr int num_2d_vector = 8;
#endif
Expand Down Expand Up @@ -393,7 +393,7 @@ class P3Microphysics : public AtmosphereProcess
diag_diam_qi, pratot, prctot, qtend_ignore, ntend_ignore,
mu_c, lamc, qr_evap_tend, v_qc, v_nc, flux_qx, flux_nx,
v_qit, v_nit, flux_nit, flux_bir, flux_qir, flux_qit,
v_qr, v_nr;
v_qr, v_nr, latent_heat_vapor, latent_heat_sublim, latent_heat_fusion;
#endif

suview_2d col_location;
Expand Down
14 changes: 0 additions & 14 deletions components/eamxx/src/physics/p3/eti/p3_get_latent_heat.cpp

This file was deleted.

24 changes: 0 additions & 24 deletions components/eamxx/src/physics/p3/impl/p3_get_latent_heat_impl.hpp

This file was deleted.

39 changes: 23 additions & 16 deletions components/eamxx/src/physics/p3/impl/p3_main_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,16 @@ ::p3_main_init(
const uview_1d<Spack>& T_atm,
const uview_1d<Spack>& qv,
const uview_1d<Spack>& inv_dz,
const uview_1d<Spack>& latent_heat_vapor,
const uview_1d<Spack>& latent_heat_sublim,
const uview_1d<Spack>& latent_heat_fusion,
Scalar& precip_liq_surf,
Scalar& precip_ice_surf,
view_1d_ptr_array<Spack, 36>& zero_init)
{
constexpr Scalar latvap = C::LatVap;
constexpr Scalar latice = C::LatIce;

precip_liq_surf = 0;
precip_ice_surf = 0;

Expand All @@ -63,6 +69,10 @@ ::p3_main_init(
T_atm(k) = th_atm(k) * exner(k);
qv(k) = max(qv(k), 0);
inv_dz(k) = 1 / dz(k);
// TODO: use constants througout P3 for latent_heat instead of views
latent_heat_vapor(k) = latvap;
latent_heat_sublim(k) = latvap+latice;
latent_heat_fusion(k) = latice;

for (size_t j = 0; j < zero_init.size(); ++j) {
(*zero_init[j])(k) = 0;
Expand All @@ -89,10 +99,6 @@ ::p3_main_internal(
using ExeSpace = typename KT::ExeSpace;
using ScratchViewType = Kokkos::View<bool*, typename ExeSpace::scratch_memory_space>;

view_2d<Spack> latent_heat_sublim("latent_heat_sublim", nj, nk), latent_heat_vapor("latent_heat_vapor", nj, nk), latent_heat_fusion("latent_heat_fusion", nj, nk);

get_latent_heat(nj, nk, latent_heat_vapor, latent_heat_sublim, latent_heat_fusion);

const Int nk_pack = ekat::npack<Spack>(nk);
const auto scratch_size = ScratchViewType::shmem_size(2);
const auto policy = ekat::ExeSpaceUtils<ExeSpace>::get_default_team_policy(nj, nk_pack).set_scratch_size(0, Kokkos::PerTeam(scratch_size));
Expand Down Expand Up @@ -141,9 +147,12 @@ ::p3_main_internal(
qtend_ignore, ntend_ignore,

// Variables still used in F90 but removed from C++ interface
mu_c, lamc, qr_evap_tend;
mu_c, lamc, qr_evap_tend,

// TODO: use contants instead of WSM vars for these values
latent_heat_vapor, latent_heat_sublim, latent_heat_fusion;

workspace.template take_many_and_reset<44>(
workspace.template take_many_and_reset<47>(
{
"mu_r", "T_atm", "lamr", "logn0r", "nu", "cdist", "cdist1", "cdistr",
"inv_cld_frac_i", "inv_cld_frac_l", "inv_cld_frac_r", "qc_incld", "qr_incld", "qi_incld", "qm_incld",
Expand All @@ -152,7 +161,7 @@ ::p3_main_internal(
"rhofacr", "rhofaci", "acn", "qv_sat_l", "qv_sat_i", "sup", "qv_supersat_i",
"tmparr1", "exner", "diag_equiv_reflectivity", "diag_vm_qi", "diag_diam_qi",
"pratot", "prctot", "qtend_ignore", "ntend_ignore",
"mu_c", "lamc", "qr_evap_tend"
"mu_c", "lamc", "qr_evap_tend", "latent_heat_vapor", "latent_heat_sublim", "latent_heat_fusion"
},
{
&mu_r, &T_atm, &lamr, &logn0r, &nu, &cdist, &cdist1, &cdistr,
Expand All @@ -162,7 +171,7 @@ ::p3_main_internal(
&rhofacr, &rhofaci, &acn, &qv_sat_l, &qv_sat_i, &sup, &qv_supersat_i,
&tmparr1, &exner, &diag_equiv_reflectivity, &diag_vm_qi, &diag_diam_qi,
&pratot, &prctot, &qtend_ignore, &ntend_ignore,
&mu_c, &lamc, &qr_evap_tend
&mu_c, &lamc, &qr_evap_tend, &latent_heat_vapor, &latent_heat_sublim, &latent_heat_fusion
});

// Get single-column subviews of all inputs, shouldn't need any i-indexing
Expand Down Expand Up @@ -201,9 +210,6 @@ ::p3_main_internal(
const auto oliq_ice_exchange = ekat::subview(history_only.liq_ice_exchange, i);
const auto ovap_liq_exchange = ekat::subview(history_only.vap_liq_exchange, i);
const auto ovap_ice_exchange = ekat::subview(history_only.vap_ice_exchange, i);
const auto olatent_heat_vapor = ekat::subview(latent_heat_vapor, i);
const auto olatent_heat_sublim = ekat::subview(latent_heat_sublim, i);
const auto olatent_heat_fusion = ekat::subview(latent_heat_fusion, i);
const auto oqv_prev = ekat::subview(diagnostic_inputs.qv_prev, i);
const auto ot_prev = ekat::subview(diagnostic_inputs.t_prev, i);

Expand All @@ -228,12 +234,13 @@ ::p3_main_internal(
ocld_frac_i, ocld_frac_l, ocld_frac_r, oinv_exner, oth, odz, diag_equiv_reflectivity,
ze_ice, ze_rain, odiag_eff_radius_qc, odiag_eff_radius_qi, odiag_eff_radius_qr,
inv_cld_frac_i, inv_cld_frac_l, inv_cld_frac_r, exner, T_atm, oqv, inv_dz,
latent_heat_vapor, latent_heat_sublim, latent_heat_fusion,
diagnostic_outputs.precip_liq_surf(i), diagnostic_outputs.precip_ice_surf(i), zero_init);

p3_main_part1(
team, nk, infrastructure.predictNc, infrastructure.prescribedCCN, infrastructure.dt,
opres, odpres, odz, onc_nuceat_tend, onccn_prescribed, oinv_exner, exner, inv_cld_frac_l, inv_cld_frac_i,
inv_cld_frac_r, olatent_heat_vapor, olatent_heat_sublim, olatent_heat_fusion,
inv_cld_frac_r, latent_heat_vapor, latent_heat_sublim, latent_heat_fusion,
T_atm, rho, inv_rho, qv_sat_l, qv_sat_i, qv_supersat_i, rhofacr,
rhofaci, acn, oqv, oth, oqc, onc, oqr, onr, oqi, oni, oqm,
obm, qc_incld, qr_incld, qi_incld, qm_incld, nc_incld, nr_incld,
Expand All @@ -252,8 +259,8 @@ ::p3_main_internal(
lookup_tables.dnu_table_vals, lookup_tables.ice_table_vals, lookup_tables.collect_table_vals, lookup_tables.revap_table_vals, opres, odpres, odz, onc_nuceat_tend, oinv_exner,
exner, inv_cld_frac_l, inv_cld_frac_i, inv_cld_frac_r, oni_activated, oinv_qc_relvar, ocld_frac_i,
ocld_frac_l, ocld_frac_r, oqv_prev, ot_prev, T_atm, rho, inv_rho, qv_sat_l, qv_sat_i, qv_supersat_i, rhofacr, rhofaci, acn,
oqv, oth, oqc, onc, oqr, onr, oqi, oni, oqm, obm, olatent_heat_vapor,
olatent_heat_sublim, olatent_heat_fusion, qc_incld, qr_incld, qi_incld, qm_incld, nc_incld,
oqv, oth, oqc, onc, oqr, onr, oqi, oni, oqm, obm, latent_heat_vapor,
latent_heat_sublim, latent_heat_fusion, qc_incld, qr_incld, qi_incld, qm_incld, nc_incld,
nr_incld, ni_incld, bm_incld, mu_c, nu, lamc, cdist, cdist1, cdistr,
mu_r, lamr, logn0r, oqv2qi_depos_tend, oprecip_total_tend, onevapr, qr_evap_tend,
ovap_liq_exchange, ovap_ice_exchange, oliq_ice_exchange,
Expand Down Expand Up @@ -296,7 +303,7 @@ ::p3_main_internal(

// homogeneous freezing of cloud and rain
homogeneous_freezing(
T_atm, oinv_exner, olatent_heat_fusion, team, nk, ktop, kbot, kdir, oqc, onc, oqr, onr, oqi,
T_atm, oinv_exner, latent_heat_fusion, team, nk, ktop, kbot, kdir, oqc, onc, oqr, onr, oqi,
oni, oqm, obm, oth);

//
Expand All @@ -306,7 +313,7 @@ ::p3_main_internal(
p3_main_part3(
team, nk_pack, runtime_options.max_total_ni, lookup_tables.dnu_table_vals, lookup_tables.ice_table_vals, oinv_exner, ocld_frac_l, ocld_frac_r, ocld_frac_i,
rho, inv_rho, rhofaci, oqv, oth, oqc, onc, oqr, onr, oqi, oni,
oqm, obm, olatent_heat_vapor, olatent_heat_sublim, mu_c, nu, lamc, mu_r, lamr,
oqm, obm, latent_heat_vapor, latent_heat_sublim, mu_c, nu, lamc, mu_r, lamr,
ovap_liq_exchange, ze_rain, ze_ice, diag_vm_qi, odiag_eff_radius_qi, diag_diam_qi,
orho_qi, diag_equiv_reflectivity, odiag_eff_radius_qc, odiag_eff_radius_qr, p3constants);

Expand Down
14 changes: 9 additions & 5 deletions components/eamxx/src/physics/p3/p3_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,9 @@ struct Functions
view_2d<Spack> flux_qir, flux_qit;
// rain sedimentation
view_2d<Spack> v_qr, v_nr;
// latent heat
// TODO: Remove and just use constants
view_2d<Spack> latent_heat_vapor, latent_heat_sublim, latent_heat_fusion;
};
#endif

Expand Down Expand Up @@ -899,9 +902,6 @@ struct Functions
Smask& log_wetgrowth, Spack& qr2qi_collect_tend, Spack& qc2qi_collect_tend, Spack& qc_growth_rate,
Spack& nr_ice_shed_tend, Spack& qc2qr_ice_shed_tend, const Smask& context = Smask(true));

// Note: not a kernel function
static void get_latent_heat(const Int& nj, const Int& nk, view_2d<Spack>& v, view_2d<Spack>& s, view_2d<Spack>& f);

KOKKOS_FUNCTION
static void check_values(const uview_1d<const Spack>& qv, const uview_1d<const Spack>& temp, const Int& ktop, const Int& kbot,
const Int& timestepcount, const bool& force_abort, const Int& source_ind, const MemberType& team,
Expand Down Expand Up @@ -949,6 +949,9 @@ struct Functions
const uview_1d<Spack>& T_atm,
const uview_1d<Spack>& qv,
const uview_1d<Spack>& inv_dz,
const uview_1d<Spack>& latent_heat_vapor,
const uview_1d<Spack>& latent_heat_sublim,
const uview_1d<Spack>& latent_heat_fusion,
Scalar& precip_liq_surf,
Scalar& precip_ice_surf,
view_1d_ptr_array<Spack, 36>& zero_init);
Expand All @@ -964,7 +967,9 @@ struct Functions
const uview_2d<Spack>& diag_eff_radius_qi, const uview_2d<Spack>& diag_eff_radius_qr, const uview_2d<Spack>& inv_cld_frac_i,
const uview_2d<Spack>& inv_cld_frac_l, const uview_2d<Spack>& inv_cld_frac_r,
const uview_2d<Spack>& exner, const uview_2d<Spack>& T_atm, const uview_2d<Spack>& qv,
const uview_2d<Spack>& inv_dz, const uview_1d<Scalar>& precip_liq_surf, const uview_1d<Scalar>& precip_ice_surf,
const uview_2d<Spack>& inv_dz,
const uview_2d<Spack>& latent_heat_vapor, const uview_2d<Spack>& latent_heat_sublim, const uview_2d<Spack>& latent_heat_fusion,
const uview_1d<Scalar>& precip_liq_surf, const uview_1d<Scalar>& precip_ice_surf,
const uview_2d<Spack>& mu_r, const uview_2d<Spack>& lamr, const uview_2d<Spack>& logn0r, const uview_2d<Spack>& nu,
const uview_2d<Spack>& cdist, const uview_2d<Spack>& cdist1, const uview_2d<Spack>& cdistr,
const uview_2d<Spack>& qc_incld, const uview_2d<Spack>& qr_incld, const uview_2d<Spack>& qi_incld,
Expand Down Expand Up @@ -1438,7 +1443,6 @@ void init_tables_from_f90_c(Real* vn_table_vals_data, Real* vm_table_vals_data,
# include "p3_ice_melting_impl.hpp"
# include "p3_calc_liq_relaxation_timescale_impl.hpp"
# include "p3_ice_cldliq_wet_growth_impl.hpp"
# include "p3_get_latent_heat_impl.hpp"
# include "p3_check_values_impl.hpp"
# include "p3_incloud_mixingratios_impl.hpp"
# include "p3_subgrid_variance_scaling_impl.hpp"
Expand Down
Loading

0 comments on commit fd86a15

Please sign in to comment.