diff --git a/Src/LinearSolvers/CMakeLists.txt b/Src/LinearSolvers/CMakeLists.txt index 500716be3a0..c2851d49959 100644 --- a/Src/LinearSolvers/CMakeLists.txt +++ b/Src/LinearSolvers/CMakeLists.txt @@ -49,6 +49,10 @@ foreach(D IN LISTS AMReX_SPACEDIM) MLMG/AMReX_MLEBNodeFDLaplacian.cpp MLMG/AMReX_MLEBNodeFDLap_K.H MLMG/AMReX_MLEBNodeFDLap_${D}D_K.H + MLMG/AMReX_MLNodeABecLaplacian.H + MLMG/AMReX_MLNodeABecLaplacian.cpp + MLMG/AMReX_MLNodeABecLap_K.H + MLMG/AMReX_MLNodeABecLap_${D}D_K.H ) if (D EQUAL 3) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_1D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_1D_K.H new file mode 100644 index 00000000000..34a2ddda6f7 --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_1D_K.H @@ -0,0 +1,30 @@ +#ifndef AMREX_MLNODEABECLAP_1D_K_H_ +#define AMREX_MLNODEABECLAP_1D_K_H_ + +namespace amrex { + +inline void +mlndabeclap_gauss_seidel_aa (Box const& /*bx*/, Array4 const& /*sol*/, + Array4 const& /*rhs*/, + Real /*alpha*/, Real /*beta*/, + Array4 const& /*acf*/, + Array4 const& /*bcf*/, + Array4 const& /*msk*/, + GpuArray const& /*dxinv*/) noexcept +{} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void +mlndabeclap_jacobi_aa (int /*i*/, int /*j*/, int /*k*/, + Array4 const& /*sol*/, + Real /*lap*/, + Array4 const& /*rhs*/, + Real /*alpha*/, Real /*beta*/, + Array4 const& /*acf*/, + Array4 const& /*bcf*/, + Array4 const& /*msk*/, + GpuArray const& /*dxinv*/) noexcept +{} + +} + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_2D_K.H new file mode 100644 index 00000000000..3418b19d279 --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_2D_K.H @@ -0,0 +1,67 @@ +#ifndef AMREX_MLNODEABECLAP_2D_K_H_ +#define AMREX_MLNODEABECLAP_2D_K_H_ + +namespace amrex { + +inline void +mlndabeclap_gauss_seidel_aa (Box const& bx, Array4 const& sol, + Array4 const& rhs, + Real alpha, Real beta, + Array4 const& acf, + Array4 const& bcf, + Array4 const& msk, + GpuArray const& dxinv) noexcept +{ + Real facx = Real(1.0/6.0)*dxinv[0]*dxinv[0]; + Real facy = Real(1.0/6.0)*dxinv[1]*dxinv[1]; + Real fxy = facx + facy; + Real f2xmy = Real(2.0)*facx - facy; + Real fmx2y = Real(2.0)*facy - facx; + + amrex::Loop(bx, [=] (int i, int j, int k) noexcept + { + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real s0 = (-Real(2.0))*fxy*(bcf(i-1,j-1,k)+bcf(i,j-1,k)+bcf(i-1,j,k)+bcf(i,j,k)); + Real lap = sol(i-1,j-1,k)*fxy*bcf(i-1,j-1,k) + + sol(i+1,j-1,k)*fxy*bcf(i ,j-1,k) + + sol(i-1,j+1,k)*fxy*bcf(i-1,j ,k) + + sol(i+1,j+1,k)*fxy*bcf(i ,j ,k) + + sol(i-1,j,k)*f2xmy*(bcf(i-1,j-1,k)+bcf(i-1,j,k)) + + sol(i+1,j,k)*f2xmy*(bcf(i ,j-1,k)+bcf(i ,j,k)) + + sol(i,j-1,k)*fmx2y*(bcf(i-1,j-1,k)+bcf(i,j-1,k)) + + sol(i,j+1,k)*fmx2y*(bcf(i-1,j ,k)+bcf(i,j ,k)) + + sol(i,j,k)*s0; + Real Ax = alpha*acf(i,j,k)*sol(i,j,k) - beta*lap; + + sol(i,j,k) += (rhs(i,j,k) - Ax) / (alpha*acf(i,j,k)-beta*s0); + } + }); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void +mlndabeclap_jacobi_aa (int i, int j, int k, Array4 const& sol, + Real lap, Array4 const& rhs, + Real alpha, Real beta, + Array4 const& acf, + Array4 const& bcf, + Array4 const& msk, + GpuArray const& dxinv) noexcept +{ + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real fac = -Real(2.0/6.0)*(dxinv[0]*dxinv[0] + dxinv[1]*dxinv[1]); + Real s0 = fac*(bcf(i-1,j-1,k)+bcf(i,j-1,k)+bcf(i-1,j,k)+bcf(i,j,k)); + Real Ax = alpha*acf(i,j,k)*sol(i,j,k) - beta*lap; + + sol(i,j,k) += Real(2.0/3.0) * (rhs(i,j,k) - Ax) + / (alpha*acf(i,j,k)-beta*s0); + } + +} + +} + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_3D_K.H new file mode 100644 index 00000000000..5ddb93a958c --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_3D_K.H @@ -0,0 +1,93 @@ +#ifndef AMREX_MLNODEABECLAP_3D_K_H_ +#define AMREX_MLNODEABECLAP_3D_K_H_ + +namespace amrex { + +inline void +mlndabeclap_gauss_seidel_aa (Box const& bx, Array4 const& sol, + Array4 const& rhs, + Real alpha, Real beta, + Array4 const& acf, + Array4 const& bcf, + Array4 const& msk, + GpuArray const& dxinv) noexcept +{ + Real facx = Real(1.0/36.0)*dxinv[0]*dxinv[0]; + Real facy = Real(1.0/36.0)*dxinv[1]*dxinv[1]; + Real facz = Real(1.0/36.0)*dxinv[2]*dxinv[2]; + Real fxyz = facx + facy + facz; + Real fmx2y2z = -facx + Real(2.0)*facy + Real(2.0)*facz; + Real f2xmy2z = Real(2.0)*facx - facy + Real(2.0)*facz; + Real f2x2ymz = Real(2.0)*facx + Real(2.0)*facy - facz; + Real f4xm2ym2z = Real(4.0)*facx - Real(2.0)*facy - Real(2.0)*facz; + Real fm2x4ym2z = -Real(2.0)*facx + Real(4.0)*facy - Real(2.0)*facz; + Real fm2xm2y4z = -Real(2.0)*facx - Real(2.0)*facy + Real(4.0)*facz; + + amrex::LoopOnCpu(bx, [=] (int i, int j, int k) noexcept + { + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real s0 = Real(-4.0)*fxyz*(bcf(i-1,j-1,k-1)+bcf(i,j-1,k-1)+bcf(i-1,j,k-1)+bcf(i,j,k-1) + +bcf(i-1,j-1,k )+bcf(i,j-1,k )+bcf(i-1,j,k )+bcf(i,j,k )); + Real lap = sol(i,j,k)*s0 + + fxyz*(sol(i-1,j-1,k-1)*bcf(i-1,j-1,k-1) + + sol(i+1,j-1,k-1)*bcf(i ,j-1,k-1) + + sol(i-1,j+1,k-1)*bcf(i-1,j ,k-1) + + sol(i+1,j+1,k-1)*bcf(i ,j ,k-1) + + sol(i-1,j-1,k+1)*bcf(i-1,j-1,k ) + + sol(i+1,j-1,k+1)*bcf(i ,j-1,k ) + + sol(i-1,j+1,k+1)*bcf(i-1,j ,k ) + + sol(i+1,j+1,k+1)*bcf(i ,j ,k )) + + fmx2y2z*(sol(i ,j-1,k-1)*(bcf(i-1,j-1,k-1)+bcf(i,j-1,k-1)) + + sol(i ,j+1,k-1)*(bcf(i-1,j ,k-1)+bcf(i,j ,k-1)) + + sol(i ,j-1,k+1)*(bcf(i-1,j-1,k )+bcf(i,j-1,k )) + + sol(i ,j+1,k+1)*(bcf(i-1,j ,k )+bcf(i,j ,k ))) + + f2xmy2z*(sol(i-1,j ,k-1)*(bcf(i-1,j-1,k-1)+bcf(i-1,j,k-1)) + + sol(i+1,j ,k-1)*(bcf(i ,j-1,k-1)+bcf(i ,j,k-1)) + + sol(i-1,j ,k+1)*(bcf(i-1,j-1,k )+bcf(i-1,j,k )) + + sol(i+1,j ,k+1)*(bcf(i ,j-1,k )+bcf(i ,j,k ))) + + f2x2ymz*(sol(i-1,j-1,k )*(bcf(i-1,j-1,k-1)+bcf(i-1,j-1,k)) + + sol(i+1,j-1,k )*(bcf(i ,j-1,k-1)+bcf(i ,j-1,k)) + + sol(i-1,j+1,k )*(bcf(i-1,j ,k-1)+bcf(i-1,j ,k)) + + sol(i+1,j+1,k )*(bcf(i ,j ,k-1)+bcf(i ,j ,k))) + + f4xm2ym2z*(sol(i-1,j,k)*(bcf(i-1,j-1,k-1)+bcf(i-1,j,k-1)+bcf(i-1,j-1,k)+bcf(i-1,j,k)) + + sol(i+1,j,k)*(bcf(i ,j-1,k-1)+bcf(i ,j,k-1)+bcf(i ,j-1,k)+bcf(i ,j,k))) + + fm2x4ym2z*(sol(i,j-1,k)*(bcf(i-1,j-1,k-1)+bcf(i,j-1,k-1)+bcf(i-1,j-1,k)+bcf(i,j-1,k)) + + sol(i,j+1,k)*(bcf(i-1,j ,k-1)+bcf(i,j ,k-1)+bcf(i-1,j ,k)+bcf(i,j ,k))) + + fm2xm2y4z*(sol(i,j,k-1)*(bcf(i-1,j-1,k-1)+bcf(i,j-1,k-1)+bcf(i-1,j,k-1)+bcf(i,j,k-1)) + + sol(i,j,k+1)*(bcf(i-1,j-1,k )+bcf(i,j-1,k )+bcf(i-1,j,k )+bcf(i,j,k ))); + Real Ax = alpha*acf(i,j,k)*sol(i,j,k) - beta*lap; + + sol(i,j,k) += (rhs(i,j,k) - Ax) / (alpha*acf(i,j,k)-beta*s0); + } + }); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE void +mlndabeclap_jacobi_aa (int i, int j, int k, Array4 const& sol, + Real lap, Array4 const& rhs, + Real alpha, Real beta, + Array4 const& acf, + Array4 const& bcf, + Array4 const& msk, + GpuArray const& dxinv) noexcept +{ + if (msk(i,j,k)) { + sol(i,j,k) = Real(0.0); + } else { + Real fxyz = Real(-4.0 / 36.0)*(dxinv[0]*dxinv[0] + + dxinv[1]*dxinv[1] + + dxinv[2]*dxinv[2]); + Real s0 = fxyz*(bcf(i-1,j-1,k-1)+bcf(i,j-1,k-1)+bcf(i-1,j,k-1)+bcf(i,j,k-1) + +bcf(i-1,j-1,k )+bcf(i,j-1,k )+bcf(i-1,j,k )+bcf(i,j,k)); + Real Ax = alpha*acf(i,j,k)*sol(i,j,k) - beta*lap; + + sol(i,j,k) += Real(2.0/3.0) * (rhs(i,j,k) - Ax) + / (alpha*acf(i,j,k)-beta*s0); + } +} + +} + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_K.H new file mode 100644 index 00000000000..fd744bacd1e --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLap_K.H @@ -0,0 +1,13 @@ +#ifndef AMREX_MLNODEABECLAP_K_H_ +#define AMREX_MLNODEABECLAP_K_H_ +#include + +#if (AMREX_SPACEDIM == 1) +#include +#elif (AMREX_SPACEDIM == 2) +#include +#else +#include +#endif + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.H new file mode 100644 index 00000000000..b261c5bb3f2 --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.H @@ -0,0 +1,82 @@ +#ifndef AMREX_MLNODEABECLAPLACIAN_H_ +#define AMREX_MLNODEABECLAPLACIAN_H_ +#include + +#include + +namespace amrex { + +// (alpha * a - beta * (del dot b grad)) phi = rhs +// a, phi and rhs are nodal. b is cell-centered. + +class MLNodeABecLaplacian + : public MLNodeLinOp +{ +public: + + MLNodeABecLaplacian () = default; + MLNodeABecLaplacian (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info = LPInfo(), + const Vector const*>& a_factory = {}); + ~MLNodeABecLaplacian () override = default; + + MLNodeABecLaplacian (const MLNodeABecLaplacian&) = delete; + MLNodeABecLaplacian (MLNodeABecLaplacian&&) = delete; + MLNodeABecLaplacian& operator= (const MLNodeABecLaplacian&) = delete; + MLNodeABecLaplacian& operator= (MLNodeABecLaplacian&&) = delete; + + void define (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info = LPInfo(), + const Vector const*>& a_factory = {}); + + std::string name () const override { return std::string("MLNodeABecLaplacian"); } + + void setScalars (Real a, Real b) { + m_a_scalar = a; + m_b_scalar = b; + } + + void setACoeffs (int amrlev, Real a_acoef); + void setACoeffs (int amrlev, const MultiFab& a_acoef); + + void setBCoeffs (int amrlev, Real a_bcoef); + void setBCoeffs (int amrlev, const MultiFab& a_bcoef); + + void Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) const final; + void Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs) const final; + + void fixUpResidualMask (int amrlev, iMultiFab& resmsk) final; + + bool isSingular (int /*amrlev*/) const final { return false; } + bool isBottomSingular () const final { return false; } + + void restriction (int amrlev, int cmglev, MultiFab& crse, MultiFab& fine) const final; + void interpolation (int amrlev, int fmglev, MultiFab& fine, const MultiFab& crse) const final; + void averageDownSolutionRHS (int camrlev, MultiFab& crse_sol, MultiFab& crse_rhs, + const MultiFab& fine_sol, const MultiFab& fine_rhs) final; + + void reflux (int crse_amrlev, + MultiFab& res, const MultiFab& crse_sol, const MultiFab& crse_rhs, + MultiFab& fine_res, MultiFab& fine_sol, const MultiFab& fine_rhs) const final; + + void prepareForSolve () final; + + void averageDownCoeffs (); + void averageDownCoeffsToCoarseAmrLevel (int flev); + void averageDownCoeffsSameAmrLevel (int amrlev); + +private: + + Real m_a_scalar = std::numeric_limits::quiet_NaN(); + Real m_b_scalar = std::numeric_limits::quiet_NaN(); + Vector > m_a_coeffs; + Vector > m_b_coeffs; +}; + +} + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp new file mode 100644 index 00000000000..c58fb0e6da9 --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp @@ -0,0 +1,334 @@ +#include +#include +#include + +namespace amrex { + +MLNodeABecLaplacian::MLNodeABecLaplacian (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info, + const Vector const*>& a_factory) +{ + define(a_geom, a_grids, a_dmap, a_info, a_factory); +} + +void +MLNodeABecLaplacian::define (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info, + const Vector const*>& a_factory) +{ +#ifdef AMREX_USE_EB + amrex::Abort("MLNodeABecLaplacian does not support EB"); +#endif + + BL_PROFILE("MLNodeABecLaplacian::define()"); + + // This makes sure grids are cell-centered; + Vector cc_grids = a_grids; + for (auto& ba : cc_grids) { + ba.enclosedCells(); + } + + MLNodeLinOp::define(a_geom, cc_grids, a_dmap, a_info, a_factory); + + const int ncomp = getNComp(); + + m_a_coeffs.resize(m_num_amr_levels); + m_b_coeffs.resize(m_num_amr_levels); + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + m_a_coeffs[amrlev].resize(m_num_mg_levels[amrlev]); + m_b_coeffs[amrlev].resize(m_num_mg_levels[amrlev]); + for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { + m_a_coeffs[amrlev][mglev].define + (amrex::convert(m_grids[amrlev][mglev], IntVect::TheNodeVector()), + m_dmap[amrlev][mglev], ncomp, 0); + m_b_coeffs[amrlev][mglev].define + (m_grids[amrlev][mglev], m_dmap[amrlev][mglev], ncomp, 1); + } + } +} + +void +MLNodeABecLaplacian::setACoeffs (int amrlev, Real a_acoef) +{ + m_a_coeffs[amrlev][0].setVal(a_acoef); +} + +void +MLNodeABecLaplacian::setACoeffs (int amrlev, const MultiFab& a_acoef) +{ + const int ncomp = getNComp(); + m_a_coeffs[amrlev][0].LocalCopy(a_acoef, 0, 0, ncomp, IntVect(0)); +} + +void +MLNodeABecLaplacian::setBCoeffs (int amrlev, Real a_bcoef) +{ + m_b_coeffs[amrlev][0].setVal(a_bcoef); +} + +void +MLNodeABecLaplacian::setBCoeffs (int amrlev, const MultiFab& a_bcoef) +{ + const int ncomp = getNComp(); + m_b_coeffs[amrlev][0].LocalCopy(a_bcoef, 0, 0, ncomp, IntVect(0)); +} + +void +MLNodeABecLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) const +{ + BL_PROFILE("MLNodeLaplacian::Fapply()"); + + AMREX_ALWAYS_ASSERT(getNComp() == 1); + + auto const alpha = m_a_scalar; + auto const beta = m_b_scalar; + const auto dxinvarr = m_geom[amrlev][mglev].InvCellSizeArray(); + + auto const& acoef_ma = m_a_coeffs[amrlev][mglev].const_arrays(); + auto const& bcoef_ma = m_b_coeffs[amrlev][mglev].const_arrays(); + auto const& dmskarr_ma = m_dirichlet_mask[amrlev][mglev]->const_arrays(); + + auto const& xarr_ma = in.const_arrays(); + auto const& yarr_ma = out.arrays(); + + ParallelFor(out, [=] AMREX_GPU_DEVICE(int box_no, int i, int j, int k) noexcept + { + auto lap = mlndlap_adotx_aa(i,j,k,xarr_ma[box_no],bcoef_ma[box_no],dmskarr_ma[box_no], +#if (AMREX_SPACEDIM == 2) + false, +#endif + dxinvarr); + yarr_ma[box_no](i,j,k) = (dmskarr_ma[box_no](i,j,k)) ? Real(0.0) + : alpha*acoef_ma[box_no](i,j,k)*xarr_ma[box_no](i,j,k) - beta*lap; + }); + Gpu::streamSynchronize(); +} + +void +MLNodeABecLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs) const +{ + BL_PROFILE("MLNodeABecLaplacian::Fsmooth()"); + + auto const alpha = m_a_scalar; + auto const beta = m_b_scalar; + const auto dxinvarr = m_geom[amrlev][mglev].InvCellSizeArray(); + + auto const& acoef = m_a_coeffs[amrlev][mglev]; + auto const& bcoef = m_b_coeffs[amrlev][mglev]; + auto const& dmsk = *(m_dirichlet_mask[amrlev][mglev]); + +#ifdef AMREX_USE_GPU + + auto const& acoef_ma = acoef.const_arrays(); + auto const& bcoef_ma = bcoef.const_arrays(); + auto const& dmskarr_ma = dmsk.const_arrays(); + auto const& solarr_ma = sol.arrays(); + auto const& rhsarr_ma = rhs.const_arrays(); + + for (int ns = 0; ns < m_smooth_num_sweeps; ++ns) { + ParallelFor(sol, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + auto lap = mlndlap_adotx_aa(i,j,k,solarr_ma[box_no],bcoef_ma[box_no],dmskarr_ma[box_no], +#if (AMREX_SPACEDIM == 2) + false, +#endif + dxinvarr); + mlndabeclap_jacobi_aa(i,j,k, solarr_ma[box_no], lap, rhsarr_ma[box_no], alpha, beta, + acoef_ma[box_no], bcoef_ma[box_no], + dmskarr_ma[box_no], dxinvarr); + }); + Gpu::streamSynchronize(); + if (m_smooth_num_sweeps > 1) { nodalSync(amrlev, mglev, sol); } + } +#else + +#ifdef AMREX_USE_OMP +#pragma omp parallel +#endif + for (MFIter mfi(sol); mfi.isValid(); ++mfi) { + const Box& bx = mfi.validbox(); + Array4 const& aarr = acoef.array(mfi); + Array4 const& barr = bcoef.array(mfi); + Array4 const& solarr = sol.array(mfi); + Array4 const& rhsarr = rhs.const_array(mfi); + Array4 const& dmskarr = dmsk.const_array(mfi); + for (int ns = 0; ns < m_smooth_num_sweeps; ++ns) { + mlndabeclap_gauss_seidel_aa(bx, solarr, rhsarr, alpha, beta, + aarr, barr, dmskarr, dxinvarr); + } + } + nodalSync(amrlev, mglev, sol); +#endif +} + +void +MLNodeABecLaplacian::restriction (int amrlev, int cmglev, MultiFab& crse, MultiFab& fine) const +{ + BL_PROFILE("MLNodeABecLaplacian::restriction()"); + + applyBC(amrlev, cmglev-1, fine, BCMode::Homogeneous, StateMode::Solution); + + bool need_parallel_copy = !amrex::isMFIterSafe(crse, fine); + MultiFab cfine; + if (need_parallel_copy) { + const BoxArray& ba = amrex::coarsen(fine.boxArray(), 2); + cfine.define(ba, fine.DistributionMap(), 1, 0); + } + + MultiFab* pcrse = (need_parallel_copy) ? &cfine : &crse; + + auto pcrse_ma = pcrse->arrays(); + auto fine_ma = fine.const_arrays(); + auto msk_ma = m_dirichlet_mask[amrlev][cmglev-1]->const_arrays(); + + ParallelFor(*pcrse, [=] AMREX_GPU_DEVICE(int box_no, int i, int j, int k) noexcept + { + mlndlap_restriction(i,j,k,pcrse_ma[box_no],fine_ma[box_no],msk_ma[box_no]); + }); + Gpu::streamSynchronize(); + + if (need_parallel_copy) { + crse.ParallelCopy(cfine); + } +} + +void +MLNodeABecLaplacian::interpolation (int amrlev, int fmglev, MultiFab& fine, const MultiFab& crse) const +{ + BL_PROFILE("MLNodeABecLaplacian::interpolation()"); + + bool need_parallel_copy = !amrex::isMFIterSafe(crse, fine); + MultiFab cfine; + const MultiFab* cmf = &crse; + if (need_parallel_copy) { + const BoxArray& ba = amrex::coarsen(fine.boxArray(), 2); + cfine.define(ba, fine.DistributionMap(), 1, 0); + cfine.ParallelCopy(crse); + cmf = &cfine; + } + + auto const& fine_ma = fine.arrays(); + auto const& crse_ma = cmf->const_arrays(); + auto const& msk_ma = m_dirichlet_mask[amrlev][fmglev]->const_arrays(); + auto const& sig_ma = m_b_coeffs[amrlev][fmglev].const_arrays(); + + ParallelFor(fine, [=] AMREX_GPU_DEVICE(int box_no, int i, int j, int k) noexcept + { + mlndlap_interpadd_aa(i, j, k, fine_ma[box_no], crse_ma[box_no], + sig_ma[box_no], msk_ma[box_no]); + }); + Gpu::streamSynchronize(); +} + +void +MLNodeABecLaplacian::averageDownSolutionRHS (int camrlev, MultiFab& crse_sol, MultiFab& crse_rhs, + const MultiFab& fine_sol, const MultiFab& fine_rhs) +{ + amrex::ignore_unused(camrlev,crse_sol,crse_rhs,fine_sol,fine_rhs); + amrex::Abort("MLNodeABecLaplacian::averageDownSolutionRHS TODO"); +} + +void +MLNodeABecLaplacian::reflux (int crse_amrlev, + MultiFab& res, const MultiFab& crse_sol, const MultiFab& crse_rhs, + MultiFab& fine_res, MultiFab& fine_sol, const MultiFab& fine_rhs) const +{ + amrex::ignore_unused(crse_amrlev,res,crse_sol,crse_rhs,fine_res,fine_sol,fine_rhs); + amrex::Abort("MLNodeABecLaplacian::reflux TODO"); +} + +void +MLNodeABecLaplacian::prepareForSolve () +{ + BL_PROFILE("MLNodeABecLaplacian::prepareForSolve()"); + + MLNodeLinOp::prepareForSolve(); + + buildMasks(); + + averageDownCoeffs(); +} + +void +MLNodeABecLaplacian::fixUpResidualMask (int amrlev, iMultiFab& resmsk) +{ + if (!m_masks_built) { buildMasks(); } + + auto const& fmsk = m_nd_fine_mask[amrlev]->const_arrays(); + auto const& rmsk = resmsk.arrays(); + + amrex::ParallelFor(resmsk, + [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + { + if (fmsk[bno](i,j,k) == crse_fine_node) { rmsk[bno](i,j,k) = 1; } + }); + Gpu::streamSynchronize(); +} + +void +MLNodeABecLaplacian::averageDownCoeffs () +{ + BL_PROFILE("MLNodeABecLaplacian::averageDownCoeffs()"); + + for (int amrlev = m_num_amr_levels-1; amrlev > 0; --amrlev) { + averageDownCoeffsSameAmrLevel(amrlev); + averageDownCoeffsToCoarseAmrLevel(amrlev); + } + + averageDownCoeffsSameAmrLevel(0); + + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { + m_b_coeffs[amrlev][mglev].FillBoundary(m_geom[amrlev][mglev].periodicity()); + + const Box& domain = m_geom[amrlev][mglev].Domain(); + const auto lobc = LoBC(); + const auto hibc = HiBC(); + + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) { mfi_info.SetDynamic(true); } +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(m_b_coeffs[amrlev][mglev], mfi_info); mfi.isValid(); ++mfi) + { + Array4 const& sfab = m_b_coeffs[amrlev][mglev].array(mfi); + mlndlap_fillbc_cc(mfi.validbox(),sfab,domain,lobc,hibc); + } + } + } +} + +void +MLNodeABecLaplacian::averageDownCoeffsToCoarseAmrLevel (int flev) +{ + const int mglev = 0; + const int ncomp = getNComp(); + // xxxxx TODO: There is a potential issue of the coarse data not consistent + // across periodic boundaries. + amrex::average_down_nodal(m_a_coeffs[flev ][mglev], + m_a_coeffs[flev-1][mglev], + IntVect(m_amr_ref_ratio[flev-1])); + amrex::average_down(m_b_coeffs[flev ][mglev], + m_b_coeffs[flev-1][mglev], 0, ncomp, + m_amr_ref_ratio[flev-1]); +} + +void +MLNodeABecLaplacian::averageDownCoeffsSameAmrLevel (int amrlev) +{ + const int ncomp = getNComp(); + for (int mglev = 1; mglev < m_num_mg_levels[amrlev]; ++mglev) { + IntVect ratio(mg_coarsen_ratio); + amrex::average_down_nodal(m_a_coeffs[amrlev][mglev-1], + m_a_coeffs[amrlev][mglev ], ratio); + amrex::average_down(m_b_coeffs[amrlev][mglev-1], + m_b_coeffs[amrlev][mglev ], 0, ncomp, ratio); + } +} + +} diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H index 7ec9f13ce73..adbf00da231 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H @@ -84,10 +84,6 @@ public : if (m_const_sigma == Real(0.0)) { m_coarsening_strategy = cs; } } - void setSmoothNumSweeps (int nsweeps) noexcept { - m_smooth_num_sweeps = nsweeps; - } - BottomSolver getDefaultBottomSolver () const final { return (m_coarsening_strategy == CoarseningStrategy::RAP) ? BottomSolver::bicgcg : BottomSolver::bicgstab; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H index 3c36989f79a..424d22f60c3 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H @@ -33,6 +33,10 @@ public: const Vector const*>& a_factory = {}, int a_eb_limit_coarsening = -1); + void setSmoothNumSweeps (int nsweeps) noexcept { + m_smooth_num_sweeps = nsweeps; + } + void setLevelBC (int /*amrlev*/, const MultiFab* /*levelbcdata*/, const MultiFab* = nullptr, const MultiFab* = nullptr, const MultiFab* = nullptr) final {} diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp index 5e2389c1f93..0fb9e2ba33b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp @@ -368,7 +368,7 @@ MLNodeLinOp::buildMasks () MLNodeLinOp_set_dot_mask(m_bottom_dot_mask, omask, geom, lobc, hibc, m_coarsening_strategy); } - if (m_is_bottom_singular) + if (isBottomSingular()) { int amrlev = 0; int mglev = 0; diff --git a/Src/LinearSolvers/MLMG/Make.package b/Src/LinearSolvers/MLMG/Make.package index 22934a0d045..d66d64ec0eb 100644 --- a/Src/LinearSolvers/MLMG/Make.package +++ b/Src/LinearSolvers/MLMG/Make.package @@ -50,6 +50,9 @@ ifeq ($(USE_HYPRE),TRUE) CEXE_sources += AMReX_MLNodeLaplacian_hypre.cpp endif +CEXE_headers += AMReX_MLNodeABecLaplacian.H +CEXE_sources += AMReX_MLNodeABecLaplacian.cpp +CEXE_headers += AMReX_MLNodeABecLap_K.H AMReX_MLNodeABecLap_$(DIM)D_K.H CEXE_headers += AMReX_MLNodeTensorLaplacian.H CEXE_sources += AMReX_MLNodeTensorLaplacian.cpp diff --git a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H index afe97c9bea6..c1ed7ba4c3d 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H +++ b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.H @@ -20,6 +20,7 @@ public: void initProbPoisson (); void initProbABecLaplacian (); void initProbABecLaplacianInhomNeumann (); + void initProbNodeABecLaplacian (); private: @@ -28,6 +29,7 @@ private: void solvePoisson (); void solveABecLaplacian (); void solveABecLaplacianInhomNeumann (); + void solveNodeABecLaplacian (); int max_level = 1; int ref_ratio = 2; @@ -36,7 +38,9 @@ private: bool composite_solve = true; - int prob_type = 1; // 1. Poisson, 2. ABecLaplacian + // 1. Poisson, 2. ABecLaplacian, 3. ABecLaplacian w/ inhomNeumann + // 4. NodeABecLaplacian + int prob_type = 1; // For MLMG solver int verbose = 2; diff --git a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp index 0db9f5959d8..9900ce43d7f 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp +++ b/Tests/LinearSolvers/ABecLaplacian_C/MyTest.cpp @@ -1,5 +1,6 @@ #include "MyTest.H" +#include #include #include #include @@ -22,6 +23,8 @@ MyTest::solve () solveABecLaplacian(); } else if (prob_type == 3) { solveABecLaplacianInhomNeumann(); + } else if (prob_type == 4) { + solveNodeABecLaplacian(); } else { amrex::Abort("Unknown prob_type"); } @@ -409,6 +412,54 @@ MyTest::solveABecLaplacianInhomNeumann () } } +void +MyTest::solveNodeABecLaplacian () +{ + LPInfo info; + info.setAgglomeration(agglomeration); + info.setConsolidation(consolidation); + info.setMaxCoarseningLevel(max_coarsening_level); + + const auto tol_rel = Real(1.e-10); + const auto tol_abs = Real(0.0); + + const auto nlevels = static_cast(geom.size()); + + if (composite_solve && nlevels > 1) + { + amrex::Abort("solveNodeABecLaplacian: TODO composite_solve"); + } + else + { + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(nlevels == 1, "solveNodeABecLaplacian: nlevels > 1 TODO"); + for (int ilev = 0; ilev < nlevels; ++ilev) + { + MLNodeABecLaplacian mlndabec({geom[ilev]}, {grids[ilev]}, {dmap[ilev]}, + info); + + mlndabec.setDomainBC({AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Neumann, + LinOpBCType::Dirichlet)}, + {AMREX_D_DECL(LinOpBCType::Neumann, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}); + + mlndabec.setScalars(ascalar, bscalar); + + mlndabec.setACoeffs(0, acoef[ilev]); + mlndabec.setBCoeffs(0, bcoef[ilev]); + + MLMG mlmg(mlndabec); + mlmg.setMaxIter(max_iter); + mlmg.setMaxFmgIter(max_fmg_iter); + mlmg.setVerbose(verbose); + mlmg.setBottomVerbose(bottom_verbose); + + mlmg.solve({&solution[ilev]}, {&rhs[ilev]}, tol_rel, tol_abs); + } + } +} + void MyTest::readParameters () { @@ -463,7 +514,7 @@ MyTest::initData () rhs.resize(nlevels); exact_solution.resize(nlevels); - if (prob_type == 2 || prob_type == 3) { + if (prob_type == 2 || prob_type == 3 || prob_type == 4) { acoef.resize(nlevels); bcoef.resize(nlevels); } @@ -491,12 +542,17 @@ MyTest::initData () for (int ilev = 0; ilev < nlevels; ++ilev) { dmap[ilev].define(grids[ilev]); - solution [ilev].define(grids[ilev], dmap[ilev], 1, 1); - rhs [ilev].define(grids[ilev], dmap[ilev], 1, 0); - exact_solution[ilev].define(grids[ilev], dmap[ilev], 1, 0); + BoxArray ba = grids[ilev]; + if (prob_type == 4) { + ba.surroundingNodes(); + } + solution [ilev].define(ba, dmap[ilev], 1, 1); + rhs [ilev].define(ba, dmap[ilev], 1, 0); + exact_solution[ilev].define(ba, dmap[ilev], 1, 0); if (!acoef.empty()) { - acoef[ilev].define(grids[ilev], dmap[ilev], 1, 0); - bcoef[ilev].define(grids[ilev], dmap[ilev], 1, 1); + acoef[ilev].define(ba , dmap[ilev], 1, 0); + const int ngb = (prob_type == 4) ? 0 : 1; + bcoef[ilev].define(grids[ilev], dmap[ilev], 1, ngb); } } @@ -506,6 +562,8 @@ MyTest::initData () initProbABecLaplacian(); } else if (prob_type == 3) { initProbABecLaplacianInhomNeumann(); + } else if (prob_type == 4) { + initProbNodeABecLaplacian(); } else { amrex::Abort("Unknown prob_type "+std::to_string(prob_type)); } diff --git a/Tests/LinearSolvers/ABecLaplacian_C/MyTestPlotfile.cpp b/Tests/LinearSolvers/ABecLaplacian_C/MyTestPlotfile.cpp index 707361a4e34..4473f978a85 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/MyTestPlotfile.cpp +++ b/Tests/LinearSolvers/ABecLaplacian_C/MyTestPlotfile.cpp @@ -8,6 +8,19 @@ using namespace amrex; void MyTest::writePlotfile () const { + if (prob_type == 4) { + for (int ilev = 0; ilev <= max_level; ++ilev) { + VisMF::Write(solution[ilev], "solution-lev"+std::to_string(ilev)); + MultiFab errmf(solution[ilev].boxArray(), + solution[ilev].DistributionMap(), 1, 1); + MultiFab::Copy(errmf, solution[ilev], 0, 0, 1, 0); + MultiFab::Subtract(errmf, exact_solution[ilev], 0, 0, 1, 0); + auto error = errmf.norminf(); + amrex::Print() << "Level " << ilev << " max-norm error: " << error << std::endl; + } + return; + } + ParmParse pp; bool gpu_regtest = false; #ifdef AMREX_USE_GPU diff --git a/Tests/LinearSolvers/ABecLaplacian_C/initProb.cpp b/Tests/LinearSolvers/ABecLaplacian_C/initProb.cpp index a7b197adbed..ec102ae0601 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/initProb.cpp +++ b/Tests/LinearSolvers/ABecLaplacian_C/initProb.cpp @@ -172,3 +172,40 @@ MyTest::initProbABecLaplacianInhomNeumann () solution[ilev].setVal(0.0,0,1,0); // set interior to 0 } } + +void +MyTest::initProbNodeABecLaplacian () +{ + for (int ilev = 0; ilev <= max_level; ++ilev) + { + solution[ilev].setVal(0.0); + + const auto prob_lo = geom[ilev].ProbLoArray(); + const auto prob_hi = geom[ilev].ProbHiArray(); + const auto dx = geom[ilev].CellSizeArray(); + auto a = ascalar; + auto b = bscalar; + Box const& nddom = amrex::surroundingNodes(geom[ilev].Domain()); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(rhs[ilev]); mfi.isValid(); ++mfi) + { + const Box& ndbx = mfi.validbox(); + + auto rhsfab = rhs[ilev].array(mfi); + auto exactfab = exact_solution[ilev].array(mfi); + auto solfab = solution[ilev].array(mfi); + auto acoeffab = acoef[ilev].array(mfi); + auto bcoeffab = bcoef[ilev].array(mfi); + + amrex::ParallelFor(ndbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + actual_init_nodeabeclap(i,j,k,rhsfab,exactfab,solfab, + acoeffab,bcoeffab,a,b, + nddom,prob_lo,prob_hi,dx); + }); + } + } +} diff --git a/Tests/LinearSolvers/ABecLaplacian_C/initProb_K.H b/Tests/LinearSolvers/ABecLaplacian_C/initProb_K.H index 4fcab046f0b..0b6fbe20f71 100644 --- a/Tests/LinearSolvers/ABecLaplacian_C/initProb_K.H +++ b/Tests/LinearSolvers/ABecLaplacian_C/initProb_K.H @@ -310,4 +310,74 @@ void actual_init_dphi_dz_hi (int i, int j, int k, amrex::Array4 con + .25 * std::cos(fpi*x) * std::sin(fpi*y) * (-fpi) * std::sin(fpi*z); } +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void actual_init_nodeabeclap(int i, int j, int k, + amrex::Array4 const& rhs, + amrex::Array4 const& exact, + amrex::Array4 const& sol, + amrex::Array4 const& acoef, + amrex::Array4 const& bcoef, + amrex::Real a, amrex::Real b, + amrex::Box const& nddom, + amrex::GpuArray const& prob_lo, + amrex::GpuArray const& prob_hi, + amrex::GpuArray const& dx) +{ + constexpr amrex::Real w = 0.05; + constexpr amrex::Real sigma = 10.; + const amrex::Real theta = 0.5*std::log(3.) / (w + 1.e-50); + + constexpr amrex::Real pi = 3.1415926535897932; + constexpr amrex::Real tpi = 2.*pi; + constexpr amrex::Real fpi = 4.*pi; + constexpr amrex::Real fac = static_cast(AMREX_SPACEDIM*4)*pi*pi; + + // bcoef is at cell center, whereas the rest at nodes. + if (bcoef.contains(i,j,k)) { + actual_init_bcoef(i,j,k, bcoef, prob_lo, prob_hi, dx); + } + + amrex::Real xc = (prob_hi[0] + prob_lo[0])*0.5; + amrex::Real yc = (prob_hi[1] + prob_lo[1])*0.5; +#if (AMREX_SPACEDIM == 2) + amrex::Real zc = 0.0; +#else + amrex::Real zc = (prob_hi[2] + prob_lo[2])*0.5; +#endif + + amrex::Real x = prob_lo[0] + dx[0] * (i); + amrex::Real y = prob_lo[1] + dx[1] * (j); +#if (AMREX_SPACEDIM == 2) + amrex::Real z = 0.0; +#else + amrex::Real z = prob_lo[2] + dx[2] * (k); +#endif + + amrex::Real r = std::sqrt((x-xc)*(x-xc) + (y-yc)*(y-yc) + (z-zc)*(z-zc)); + amrex::Real bcnd = (sigma-1.)/2.*std::tanh(theta*(r-0.25)) + (sigma+1.)/2.; + amrex::Real tmp = std::cosh(theta*(r-0.25)); + amrex::Real dbdrfac = (r == amrex::Real(0.0)) + ? amrex::Real(0.0) : (sigma-1.)/2./(tmp*tmp) * theta/r; + dbdrfac *= b; + + acoef(i,j,k) = 1.; + + exact(i,j,k) = std::cos(tpi*x) * std::cos(tpi*y) * std::cos(tpi*z) + + .25 * std::cos(fpi*x) * std::cos(fpi*y) * std::cos(fpi*z); + + rhs(i,j,k) = bcnd*b*fac*( std::cos(tpi*x) * std::cos(tpi*y) * std::cos(tpi*z) + + std::cos(fpi*x) * std::cos(fpi*y) * std::cos(fpi*z)) + + dbdrfac*((x-xc)*(tpi*std::sin(tpi*x) * std::cos(tpi*y) * std::cos(tpi*z) + + pi*std::sin(fpi*x) * std::cos(fpi*y) * std::cos(fpi*z)) + + (y-yc)*(tpi*std::cos(tpi*x) * std::sin(tpi*y) * std::cos(tpi*z) + + pi*std::cos(fpi*x) * std::sin(fpi*y) * std::cos(fpi*z)) + + (z-zc)*(tpi*std::cos(tpi*x) * std::cos(tpi*y) * std::sin(tpi*z) + + pi*std::cos(fpi*x) * std::cos(fpi*y) * std::sin(fpi*z))) + + a * exact(i,j,k); + + if (! nddom.strictly_contains(i,j,k)) { + sol(i,j,k) = exact(i,j,k); // domain boundary + } +} + #endif diff --git a/Tests/LinearSolvers/ABecLaplacian_C/inputs-node b/Tests/LinearSolvers/ABecLaplacian_C/inputs-node new file mode 100644 index 00000000000..2d96588d241 --- /dev/null +++ b/Tests/LinearSolvers/ABecLaplacian_C/inputs-node @@ -0,0 +1,16 @@ + +max_level = 0 +ref_ratio = 2 +n_cell = 128 +max_grid_size = 64 + +composite_solve = 0 # composite solve or level by level? + +prob_type = 4 # nodal ABecLaplacian + +# For MLMG +verbose = 2 +bottom_verbose = 0 +max_iter = 100 +agglomeration = 1 # Do agglomeration on AMR Level 0? +consolidation = 1 # Do consolidation?