Skip to content

Commit

Permalink
Refactor: new memory record interface (#1794)
Browse files Browse the repository at this point in the history
* Refactor: new memory record interface

* Fix: CUDA and ROCM compiler

* Fix: UT related to memory.cpp

Co-authored-by: dyzheng <[email protected]>
  • Loading branch information
dyzheng and dyzheng authored Jan 12, 2023
1 parent 6bf1ab7 commit 86bd28b
Show file tree
Hide file tree
Showing 56 changed files with 369 additions and 216 deletions.
3 changes: 2 additions & 1 deletion source/module_base/math_ylmreal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <cassert>
#include "ylm.h"
#include "module_base/kernels/math_op.h"
#include "module_psi/kernels/memory_op.h"

namespace ModuleBase
{
Expand Down Expand Up @@ -323,7 +324,7 @@ void YlmReal::Ylm_Real(Device * ctx, const int lmax2, const int ng, const FPTYPE
ModuleBase::WARNING_QUIT("YLM_REAL","l>30 or l<0");
}
FPTYPE * p = nullptr, * phi = nullptr, * cost = nullptr;
resmem_var_op()(ctx, p, (lmax + 1) * (lmax + 1) * ng);
resmem_var_op()(ctx, p, (lmax + 1) * (lmax + 1) * ng, "YlmReal::Ylm_Real");

cal_ylm_real_op()(
ctx,
Expand Down
1 change: 0 additions & 1 deletion source/module_base/math_ylmreal.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include "vector3.h"
#include "matrix.h"
#include "module_psi/psi.h"

namespace ModuleBase
{
Expand Down
90 changes: 80 additions & 10 deletions source/module_base/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
// DATE : 2008-11-18
//==========================================================
#include "memory.h"
#include "global_variable.h"
#include "src_parallel/parallel_reduce.h"

namespace ModuleBase
{
Expand All @@ -18,7 +20,7 @@ int Memory::bool_memory = sizeof(bool); // 1.0 Byte
int Memory::float_memory = sizeof(float); // 4.0 Byte
int Memory::short_memory = sizeof(short); // 2.0 Byte

int Memory::n_memory = 500;
int Memory::n_memory = 1000;
int Memory::n_now = 0;
bool Memory::init_flag = false;

Expand Down Expand Up @@ -140,10 +142,75 @@ double Memory::record
return consume[find];
}

void Memory::record
(
const std::string &name_in,
const size_t &n_in,
const bool accumulate
)
{
if(!Memory::init_flag)
{
name = new std::string[n_memory];
class_name = new std::string[n_memory];
consume = new double[n_memory];
for(int i=0;i<n_memory;i++)
{
consume[i] = 0.0;
}
Memory::init_flag = true;
}

int find = 0;
for(find = 0; find < n_now; find++)
{
if( name_in == name[find] )
{
break;
}
}

// find == n_now : found a new record.
if(find == n_now)
{
n_now++;
name[find] = name_in;
class_name[find] = "";
}
if(n_now >= n_memory)
{
std::cout<<" Error! Too many memories has been recorded.";
return;
}

const double factor = 1.0/1024.0/1024.0;
double size_mb = n_in * factor;

if(accumulate)
{
consume[find] += size_mb;
Memory::total += size_mb;
}
else
{
if(consume[find] < size_mb)
{
Memory::total += size_mb - consume[find];
consume[find] = size_mb;
if(consume[find] > 5)
{
print(find);
}
}
}

return;
}

void Memory::print(const int find)
{
// std::cout <<"\n Warning_Memory_Consuming : "
// <<class_name[find]<<" "<<name[find]<<" "<<consume[find]<<" MB" << std::endl;
GlobalV::ofs_running <<"\n Warning_Memory_Consuming allocated: "
<<" "<<name[find]<<" "<<consume[find]<<" MB" << std::endl;
return;
}

Expand All @@ -167,10 +234,12 @@ void Memory::print_all(std::ofstream &ofs)
if(!init_flag) return;

const double small = 1.0;
// std::cout<<"\n CLASS_NAME---------|NAME---------------|MEMORY(MB)--------";
ofs <<"\n CLASS_NAME---------|NAME---------------|MEMORY(MB)--------" << std::endl;
#ifdef __MPI
Parallel_Reduce::reduce_double_all(Memory::total);
#endif
ofs <<"\n NAME---------------|MEMORY(MB)--------" << std::endl;
// std::cout<<"\n"<<std::setw(41)<< " " <<std::setprecision(4)<<total;
ofs <<std::setw(41)<< " " <<std::setprecision(4)<<total << std::endl;
ofs <<std::setw(20)<< "total" << std::setw(15) <<std::setprecision(4)<< Memory::total << std::endl;

bool *print_flag = new bool[n_memory];
for(int i=0; i<n_memory; i++) print_flag[i] = false;
Expand All @@ -192,16 +261,16 @@ void Memory::print_all(std::ofstream &ofs)
}
}
print_flag[k] = true;

#ifdef __MPI
Parallel_Reduce::reduce_double_all(consume[k]);
#endif
if ( consume[k] < small )
{
continue;
}
else
{
ofs << " "
<< std::setw(20) << class_name[k]
<< std::setw(20) << name[k]
ofs << std::setw(20) << name[k]
<< std::setw(15) << consume[k] << std::endl;

// std::cout << "\n "
Expand All @@ -211,6 +280,7 @@ void Memory::print_all(std::ofstream &ofs)
}
}
// std::cout<<"\n ----------------------------------------------------------"<<std::endl;
ofs<<" ------------- < 1.0 MB has been ignored ----------------"<<std::endl;
ofs<<" ----------------------------------------------------------"<<std::endl;
delete[] print_flag; //mohan fix by valgrind at 2012-04-02
return;
Expand Down
13 changes: 13 additions & 0 deletions source/module_base/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,19 @@ class Memory
const std::string &type,
const bool accumulate = false);

/**
* @brief Record memory consumed during computation
*
* @param name The name of a quantity
* @param n The number of the quantity
* @param accumulate Useless, always set false
*/
static void record(
const std::string &name_in,
const size_t &n_in,
const bool accumulate = false
);

static double &get_total(void)
{
return total;
Expand Down
1 change: 1 addition & 0 deletions source/module_base/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ AddTest(
LIBS ${math_libs} device
SOURCES math_ylmreal_test.cpp ../math_ylmreal.cpp ../complexmatrix.cpp ../global_variable.cpp ../ylm.cpp ../realarray.cpp ../timer.cpp ../matrix.cpp ../vector3.h
../../src_parallel/parallel_reduce.cpp ../../src_parallel/parallel_kpoints.cpp ../../src_parallel/parallel_global.cpp ../../src_parallel/parallel_common.cpp
../memory.cpp
)
AddTest(
TARGET base_math_sphbes
Expand Down
5 changes: 5 additions & 0 deletions source/module_base/test/memory_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
#include <fstream>
#include <cstdio>

namespace GlobalV
{
std::ofstream ofs_running;
}

/************************************************
* unit test of class Memory
***********************************************/
Expand Down
8 changes: 4 additions & 4 deletions source/module_deepks/test/klist_1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,10 @@ namespace Test_Deepks
wk = new double[kpoint_number];
isk = new int[kpoint_number];

ModuleBase::Memory::record("K_Vectors","kvec_c",kpoint_number*3,"double");
ModuleBase::Memory::record("K_Vectors","kvec_d",kpoint_number*3,"double");
ModuleBase::Memory::record("K_Vectors","wk",kpoint_number*3,"double");
ModuleBase::Memory::record("K_Vectors","isk",kpoint_number*3,"int");
ModuleBase::Memory::record("KV::kvec_c",sizeof(double) * kpoint_number*3);
ModuleBase::Memory::record("KV::kvec_d",sizeof(double) * kpoint_number*3);
ModuleBase::Memory::record("KV::wk",sizeof(double) * kpoint_number*3);
ModuleBase::Memory::record("KV::isk",sizeof(int) * kpoint_number*3);

return;
}
Expand Down
4 changes: 2 additions & 2 deletions source/module_deepks/test/parallel_orbitals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ namespace Test_Deepks
trace_loc_col[i] = -1;
}

ModuleBase::Memory::record("Parallel_Orbitals","trace_loc_row",GlobalV::NLOCAL,"int");
ModuleBase::Memory::record("Parallel_Orbitals","trace_loc_col",GlobalV::NLOCAL,"int");
ModuleBase::Memory::record("PO::trace_loc_row",sizeof(int) * GlobalV::NLOCAL);
ModuleBase::Memory::record("PO::trace_loc_col",sizeof(int) * GlobalV::NLOCAL);

for (int i=0; i<GlobalV::NLOCAL; i++)
{
Expand Down
2 changes: 1 addition & 1 deletion source/module_dftu/dftu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ void DFTU::init(UnitCell& cell, // unitcell class
}
}

ModuleBase::Memory::record("DFTU", "locale", num_locale, "double");
ModuleBase::Memory::record("DFTU::locale", sizeof(double) * num_locale);
return;
}

Expand Down
4 changes: 2 additions & 2 deletions source/module_elecstate/elecstate_pw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ void ElecStatePW<FPTYPE, Device>::init_rho_data()
this->kin_r = reinterpret_cast<FPTYPE **>(this->charge->kin_r);
}
}
resmem_complex_op()(this->ctx, this->wfcr, this->basis->nmaxgr);
resmem_complex_op()(this->ctx, this->wfcr_another_spin, this->charge->nrxx);
resmem_complex_op()(this->ctx, this->wfcr, this->basis->nmaxgr, "ElecSPW::wfcr");
resmem_complex_op()(this->ctx, this->wfcr_another_spin, this->charge->nrxx, "ElecSPW::wfcr_a");
this->init_rho = true;
}

Expand Down
10 changes: 5 additions & 5 deletions source/module_elecstate/potentials/potential_new.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,15 @@ void Potential::allocate()
return;

this->v_effective_fixed.resize(nrxx);
ModuleBase::Memory::record("Potential", "v_effective_fixed", nrxx, "double");
ModuleBase::Memory::record("Pot::veff_fix", sizeof(double) * nrxx);

this->v_effective.create(GlobalV::NSPIN, nrxx);
ModuleBase::Memory::record("Potential", "vr_eff", GlobalV::NSPIN * nrxx, "double");
ModuleBase::Memory::record("Pot::veff", sizeof(double) * GlobalV::NSPIN * nrxx);

if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
{
this->vofk_effective.create(GlobalV::NSPIN, nrxx);
ModuleBase::Memory::record("Potential", "vofk", GlobalV::NSPIN * nrxx, "double");
ModuleBase::Memory::record("Pot::vofk", sizeof(double) * GlobalV::NSPIN * nrxx);
}
if (GlobalV::device_flag == "gpu") {
if (GlobalV::precision_flag == "single") {
Expand All @@ -168,8 +168,8 @@ void Potential::allocate()
}
else {
if (GlobalV::precision_flag == "single") {
resmem_sh_op()(cpu_ctx, s_v_effective, GlobalV::NSPIN * nrxx);
resmem_sh_op()(cpu_ctx, s_vofk_effective, GlobalV::NSPIN * nrxx);
resmem_sh_op()(cpu_ctx, s_v_effective, GlobalV::NSPIN * nrxx, "POT::sveff");
resmem_sh_op()(cpu_ctx, s_vofk_effective, GlobalV::NSPIN * nrxx, "POT::svofk");
}
else {
this->d_v_effective = this->v_effective.c;
Expand Down
5 changes: 5 additions & 0 deletions source/module_esolver/esolver_ks_pw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "module_hamilt/hamilt_pw.h"
#include "module_hsolver/diago_iter_assist.h"
#include "module_vdw/vdw.h"
#include "module_base/memory.h"

#include "module_io/write_wfc_realspace.h"
#include "module_io/winput.h"
Expand Down Expand Up @@ -134,6 +135,10 @@ namespace ModuleESolver
this->kspw_psi = GlobalV::device_flag == "gpu" || GlobalV::precision_flag == "single" ?
new psi::Psi<std::complex<FPTYPE>, Device>(this->psi[0]) :
reinterpret_cast<psi::Psi<std::complex<FPTYPE>, Device>*> (this->psi);
if(GlobalV::precision_flag == "single")
{
ModuleBase::Memory::record ("Psi_single", sizeof(std::complex<FPTYPE>) * this->psi[0].size());
}

ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT BASIS");
}
Expand Down
2 changes: 2 additions & 0 deletions source/module_esolver/esolver_of.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//-----------temporary-------------------------
#include "../src_pw/global.h"
#include "../module_base/global_function.h"
#include "module_base/memory.h"
#include "../module_symmetry/symmetry.h"
// #include "../src_pw/vdwd2.h"
// #include "../src_pw/vdwd3.h"
Expand Down Expand Up @@ -89,6 +90,7 @@ void ESolver_OF::Init(Input &inp, UnitCell &ucell)

// Initialize the "wavefunction", which is sqrt(rho)
this->psi = new psi::Psi<double>(1, GlobalV::NSPIN, this->nrxx);
ModuleBase::Memory::record("OFDFT::Psi", sizeof(double) * GlobalV::NSPIN * this->nrxx);
this->pphi = new double*[GlobalV::NSPIN];
for (int is = 0; is < GlobalV::NSPIN; ++is)
{
Expand Down
13 changes: 13 additions & 0 deletions source/module_esolver/esolver_sdft_pw_tool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "module_base/complexmatrix.h"
#include "module_base/global_variable.h"
#include "module_base/global_function.h"
#include "module_base/memory.h"
#include "src_pw/global.h"
#include "module_hamilt/ks_pw/velocity_pw.h"

Expand Down Expand Up @@ -201,16 +202,26 @@ void ESolver_SDFT_PW::sKG(const int nche_KG, const double fwhmin, const double w
//before loop

//|psi>
size_t memory_cost = totbands_per * npwx * sizeof(std::complex<double>);
psi::Psi<std::complex<double>> psi0(1,totbands_per,npwx,GlobalC::kv.ngk.data()); //|psi>
ModuleBase::Memory::record("SDFT::psi0", memory_cost);
psi::Psi<std::complex<double>> sfpsi0(1,totbands_per,npwx,GlobalC::kv.ngk.data()); //sqrt(f)|psi>
ModuleBase::Memory::record("SDFT::sfpsi0", memory_cost);
psi::Psi<std::complex<double>> hpsi0(1,totbands_per,npwx,GlobalC::kv.ngk.data()); //h|psi>
ModuleBase::Memory::record("SDFT::hpsi0", memory_cost);
psi::Psi<std::complex<double>> hsfpsi0(1,totbands_per,npwx,GlobalC::kv.ngk.data()); //h*sqrt(f)|psi>
ModuleBase::Memory::record("SDFT::hsfpsi0", memory_cost);
//j|psi> j1=p j2=(Hp+pH)/2 - mu*p
memory_cost = ndim * totbands_per * npwx * sizeof(std::complex<double>);
psi::Psi<std::complex<double>> j1psi(1,ndim*totbands_per,npwx,GlobalC::kv.ngk.data());
ModuleBase::Memory::record("SDFT::j1psi", memory_cost);
psi::Psi<std::complex<double>> j2psi(1,ndim*totbands_per,npwx,GlobalC::kv.ngk.data());
ModuleBase::Memory::record("SDFT::j2psi", memory_cost);
//(1-f)*j*sqrt(f)|psi>
psi::Psi<std::complex<double>> j1sfpsi(1,ndim*totbands_per,npwx,GlobalC::kv.ngk.data());
ModuleBase::Memory::record("SDFT::psi0", memory_cost);
psi::Psi<std::complex<double>> j2sfpsi(1,ndim*totbands_per,npwx,GlobalC::kv.ngk.data());
ModuleBase::Memory::record("SDFT::psi0", memory_cost);
double* en;
if(ksbandper > 0) en = new double [ksbandper];
for(int ib = 0 ; ib < ksbandper ; ++ib)
Expand Down Expand Up @@ -340,7 +351,9 @@ void ESolver_SDFT_PW::sKG(const int nche_KG, const double fwhmin, const double w

//loop of t
psi::Psi<std::complex<double>> exppsi(1,totbands_per,npwx);
ModuleBase::Memory::record("SDFT::exppsi", sizeof(std::complex<double>) * totbands_per * npwx);
psi::Psi<std::complex<double>> expsfpsi(1,totbands_per,npwx);
ModuleBase::Memory::record("SDFT::expsfpsi", sizeof(std::complex<double>) * totbands_per * npwx);
for(int ib = 0; ib < totbands_per; ++ib)
{
for(int ig = 0 ; ig < npw ; ++ig)
Expand Down
14 changes: 1 addition & 13 deletions source/module_gint/gint_k_pvpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,7 @@ void Gint_k::allocate_pvpR(void)
ModuleBase::GlobalFunc::ZEROS( pvpR_reduced[is], GlobalC::GridT.nnrg);
}

double mem = ModuleBase::Memory::record("allocate_pvpR", "pvpR_reduced", GlobalC::GridT.nnrg * GlobalV::NSPIN , "double");

if(GlobalV::OUT_LEVEL != "m")
{
GlobalV::ofs_running << " Memory of pvpR : " << mem << " MB" << std::endl;
}

if( mem > 800 )
{
GlobalV::ofs_warning << " memory for pvpR = " << mem << std::endl;
GlobalV::ofs_warning << " which is larger than 800 MB ! " << std::endl;
ModuleBase::WARNING_QUIT("Gint_k","allocate_pvpR");
}
ModuleBase::Memory::record("pvpR_reduced", sizeof(double) * GlobalC::GridT.nnrg * GlobalV::NSPIN);

this->pvpR_alloc_flag = true;
return;
Expand Down
2 changes: 1 addition & 1 deletion source/module_gint/grid_bigcell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ void Grid_BigCell::init_tau_in_bigcell(void)
delete[] index_atom;
this->index_atom = new int[GlobalC::ucell.nat];

ModuleBase::Memory::record("Grid_BigCell","tau_in_bigcell",GlobalC::ucell.nat*3,"double");
ModuleBase::Memory::record("tau_in_bigcell", sizeof(double) * GlobalC::ucell.nat*3);
}

// get the fraction number of (i,j,k)
Expand Down
Loading

0 comments on commit 86bd28b

Please sign in to comment.