diff --git a/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf b/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf index ad632155b5..f5166de41b 100644 Binary files a/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf and b/amrex/docs_html/_downloads/008eb6dbfab802633dff40122ece848c/amrex.pdf differ diff --git a/amrex/docs_html/doxygen/AMReX__TinyProfiler_8H_source.html b/amrex/docs_html/doxygen/AMReX__TinyProfiler_8H_source.html index b6ee90c32f..b198adf4bb 100644 --- a/amrex/docs_html/doxygen/AMReX__TinyProfiler_8H_source.html +++ b/amrex/docs_html/doxygen/AMReX__TinyProfiler_8H_source.html @@ -270,9 +270,9 @@
AMReX_INT.H
AMReX_REAL.H
amrex::TinyProfileRegion
Definition: AMReX_TinyProfiler.H:155
-
amrex::TinyProfileRegion::~TinyProfileRegion
~TinyProfileRegion()
Definition: AMReX_TinyProfiler.cpp:943
+
amrex::TinyProfileRegion::~TinyProfileRegion
~TinyProfileRegion()
Definition: AMReX_TinyProfiler.cpp:940
amrex::TinyProfileRegion::TinyProfileRegion
TinyProfileRegion(TinyProfileRegion const &)=delete
-
amrex::TinyProfileRegion::TinyProfileRegion
TinyProfileRegion(std::string a_regname) noexcept
Definition: AMReX_TinyProfiler.cpp:927
+
amrex::TinyProfileRegion::TinyProfileRegion
TinyProfileRegion(std::string a_regname) noexcept
Definition: AMReX_TinyProfiler.cpp:924
amrex::TinyProfileRegion::operator=
TinyProfileRegion & operator=(TinyProfileRegion const &)=delete
amrex::TinyProfileRegion::TinyProfileRegion
TinyProfileRegion(TinyProfileRegion &&)=delete
amrex::TinyProfileRegion::regname
std::string regname
Definition: AMReX_TinyProfiler.H:165
@@ -280,12 +280,12 @@
amrex::TinyProfiler
A simple profiler that returns basic performance information (e.g. min, max, and average running time...
Definition: AMReX_TinyProfiler.H:31
amrex::TinyProfiler::memprof_enabled
static bool memprof_enabled
Definition: AMReX_TinyProfiler.H:143
amrex::TinyProfiler::regionstack
static std::vector< std::string > regionstack
Definition: AMReX_TinyProfiler.H:134
-
amrex::TinyProfiler::RegisterArena
static bool RegisterArena(const std::string &memory_name, std::map< std::string, MemStat > &memstats) noexcept
Definition: AMReX_TinyProfiler.cpp:495
+
amrex::TinyProfiler::RegisterArena
static bool RegisterArena(const std::string &memory_name, std::map< std::string, MemStat > &memstats) noexcept
Definition: AMReX_TinyProfiler.cpp:492
amrex::TinyProfiler::MemoryInitialize
static void MemoryInitialize() noexcept
Definition: AMReX_TinyProfiler.cpp:335
amrex::TinyProfiler::device_synchronize_around_region
static bool device_synchronize_around_region
Definition: AMReX_TinyProfiler.H:138
amrex::TinyProfiler::mem_stack
static std::deque< const TinyProfiler * > mem_stack
Definition: AMReX_TinyProfiler.H:121
-
amrex::TinyProfiler::get_output_file
static std::string const & get_output_file()
Definition: AMReX_TinyProfiler.cpp:961
-
amrex::TinyProfiler::PrintMemStats
static void PrintMemStats(std::map< std::string, MemStat > &memstats, std::string const &memname, double dt_max, double t_final, std::ostream *os)
Definition: AMReX_TinyProfiler.cpp:735
+
amrex::TinyProfiler::get_output_file
static std::string const & get_output_file()
Definition: AMReX_TinyProfiler.cpp:958
+
amrex::TinyProfiler::PrintMemStats
static void PrintMemStats(std::map< std::string, MemStat > &memstats, std::string const &memname, double dt_max, double t_final, std::ostream *os)
Definition: AMReX_TinyProfiler.cpp:732
amrex::TinyProfiler::operator=
TinyProfiler & operator=(TinyProfiler const &)=delete
amrex::TinyProfiler::Finalize
static void Finalize(bool bFlushing=false) noexcept
Definition: AMReX_TinyProfiler.cpp:354
amrex::TinyProfiler::MemoryFinalize
static void MemoryFinalize(bool bFlushing=false) noexcept
Definition: AMReX_TinyProfiler.cpp:447
@@ -296,21 +296,21 @@
amrex::TinyProfiler::stats
std::vector< Stats * > stats
Definition: AMReX_TinyProfiler.H:119
amrex::TinyProfiler::ttstack
static std::deque< std::tuple< double, double, std::string * > > ttstack
Definition: AMReX_TinyProfiler.H:135
amrex::TinyProfiler::TinyProfiler
TinyProfiler(TinyProfiler const &)=delete
-
amrex::TinyProfiler::PrintStats
static void PrintStats(std::map< std::string, Stats > &regstats, double dt_max, std::ostream *os)
Definition: AMReX_TinyProfiler.cpp:521
+
amrex::TinyProfiler::PrintStats
static void PrintStats(std::map< std::string, Stats > &regstats, double dt_max, std::ostream *os)
Definition: AMReX_TinyProfiler.cpp:518
amrex::TinyProfiler::memory_alloc
static MemStat * memory_alloc(std::size_t nbytes, std::map< std::string, MemStat > &memstats) noexcept
Definition: AMReX_TinyProfiler.cpp:267
amrex::TinyProfiler::stop
void stop() noexcept
Definition: AMReX_TinyProfiler.cpp:155
amrex::TinyProfiler::fname
std::string fname
Definition: AMReX_TinyProfiler.H:116
-
amrex::TinyProfiler::PrintCallStack
static void PrintCallStack(std::ostream &os)
Definition: AMReX_TinyProfiler.cpp:950
+
amrex::TinyProfiler::PrintCallStack
static void PrintCallStack(std::ostream &os)
Definition: AMReX_TinyProfiler.cpp:947
amrex::TinyProfiler::TinyProfiler
TinyProfiler(std::string funcname) noexcept
Definition: AMReX_TinyProfiler.cpp:64
amrex::TinyProfiler::in_parallel_region
bool in_parallel_region
Definition: AMReX_TinyProfiler.H:117
amrex::TinyProfiler::start
void start() noexcept
Definition: AMReX_TinyProfiler.cpp:94
amrex::TinyProfiler::t_init
static double t_init
Definition: AMReX_TinyProfiler.H:137
amrex::TinyProfiler::memory_start
void memory_start() const noexcept
Definition: AMReX_TinyProfiler.cpp:228
amrex::TinyProfiler::memory_stop
void memory_stop() const noexcept
Definition: AMReX_TinyProfiler.cpp:245
-
amrex::TinyProfiler::StartRegion
static void StartRegion(std::string regname) noexcept
Definition: AMReX_TinyProfiler.cpp:908
+
amrex::TinyProfiler::StartRegion
static void StartRegion(std::string regname) noexcept
Definition: AMReX_TinyProfiler.cpp:905
amrex::TinyProfiler::statsmap
static std::map< std::string, std::map< std::string, Stats > > statsmap
Definition: AMReX_TinyProfiler.H:136
amrex::TinyProfiler::~TinyProfiler
~TinyProfiler()
Definition: AMReX_TinyProfiler.cpp:88
-
amrex::TinyProfiler::StopRegion
static void StopRegion(const std::string &regname) noexcept
Definition: AMReX_TinyProfiler.cpp:918
+
amrex::TinyProfiler::StopRegion
static void StopRegion(const std::string &regname) noexcept
Definition: AMReX_TinyProfiler.cpp:915
amrex::TinyProfiler::memory_free
static void memory_free(std::size_t nbytes, MemStat *stat) noexcept
Definition: AMReX_TinyProfiler.cpp:296
amrex::TinyProfiler::all_memstats
static std::vector< std::map< std::string, MemStat > * > all_memstats
Definition: AMReX_TinyProfiler.H:131
amrex::TinyProfiler::Initialize
static void Initialize() noexcept
Definition: AMReX_TinyProfiler.cpp:311
@@ -318,7 +318,7 @@
amrex::TinyProfiler::TinyProfiler
TinyProfiler(TinyProfiler &&)=delete
amrex::TinyProfiler::output_file
static std::string output_file
Definition: AMReX_TinyProfiler.H:144
amrex::TinyProfiler::n_print_tabs
static int n_print_tabs
Definition: AMReX_TinyProfiler.H:139
-
amrex::TinyProfiler::DeregisterArena
static void DeregisterArena(std::map< std::string, MemStat > &memstats) noexcept
Definition: AMReX_TinyProfiler.cpp:506
+
amrex::TinyProfiler::DeregisterArena
static void DeregisterArena(std::map< std::string, MemStat > &memstats) noexcept
Definition: AMReX_TinyProfiler.cpp:503
amrex::detail::max
@ max
Definition: AMReX_ParallelReduce.H:17
amrex
Definition: AMReX_Amr.cpp:49
amrex::MemStat
Definition: AMReX_Arena.H:12
diff --git a/amrex/docs_xml/doxygen/AMReX__TinyProfiler_8cpp.xml b/amrex/docs_xml/doxygen/AMReX__TinyProfiler_8cpp.xml index 831ba06f91..733717ec99 100644 --- a/amrex/docs_xml/doxygen/AMReX__TinyProfiler_8cpp.xml +++ b/amrex/docs_xml/doxygen/AMReX__TinyProfiler_8cpp.xml @@ -1730,523 +1730,520 @@ std::ofstreamofs; std::ostream*os=nullptr; -std::streamsizeoldprec=0; -if(ParallelDescriptor::IOProcessor()){ -autoconst&ofile=get_output_file(); -if(ofile.empty()){ -os=&(amrex::OutStream()); -}elseif(ofile!="/dev/null"){ -ofs.open(ofile,std::ios_base::app); -if(!ofs.is_open()){ -amrex::Error("TinyProfilerfailedtoopen"+ofile); -} -os=static_cast<std::ostream*>(&ofs); -} -} - -for(std::size_ti=0;i<all_memstats.size();++i){ -PrintMemStats(*(all_memstats[i]),all_memnames[i],dt_max,t_final,os); -} - -if(!bFlushing){ -all_memstats.clear(); -all_memnames.clear(); -} +if(ParallelDescriptor::IOProcessor()){ +autoconst&ofile=get_output_file(); +if(ofile.empty()){ +os=&(amrex::OutStream()); +}elseif(ofile!="/dev/null"){ +ofs.open(ofile,std::ios_base::app); +if(!ofs.is_open()){ +amrex::Error("TinyProfilerfailedtoopen"+ofile); +} +os=static_cast<std::ostream*>(&ofs); +} +} + +for(std::size_ti=0;i<all_memstats.size();++i){ +PrintMemStats(*(all_memstats[i]),all_memnames[i],dt_max,t_final,os); +} + +if(!bFlushing){ +all_memstats.clear(); +all_memnames.clear(); +} +} -if(os){os->precision(oldprec);} -} - -bool -TinyProfiler::RegisterArena(conststd::string&memory_name, -std::map<std::string,MemStat>&memstats)noexcept -{ -if(!memprof_enabled){returnfalse;} - -all_memstats.push_back(&memstats); -all_memnames.push_back(memory_name); -returntrue; -} - -void -TinyProfiler::DeregisterArena(std::map<std::string,MemStat>&memstats)noexcept -{ -if(!memprof_enabled){return;} - -for(std::size_ti=0;i<all_memstats.size();){ -if(all_memstats[i]==&memstats){ -all_memstats.erase(all_memstats.begin()+i);//NOLINT -all_memnames.erase(all_memnames.begin()+i);//NOLINT -}else{ -++i; -} -} -} - -void -TinyProfiler::PrintStats(std::map<std::string,Stats>&regstats,doubledt_max, -std::ostream*os) -{ -//makesurethesetofprofiledfunctionsisthesameonallprocesses -{ -Vector<std::string>localStrings,syncedStrings; -boolalreadySynced; - -for(autoconst&kv:regstats){ -localStrings.push_back(kv.first); -} - -amrex::SyncStrings(localStrings,syncedStrings,alreadySynced); - -if(!alreadySynced){//addthenewname -for(autoconst&s:syncedStrings){ -if(regstats.find(s)==regstats.end()){ -regstats.insert(std::make_pair(s,Stats())); -} -} -} -} - -if(regstats.empty()){return;} +bool +TinyProfiler::RegisterArena(conststd::string&memory_name, +std::map<std::string,MemStat>&memstats)noexcept +{ +if(!memprof_enabled){returnfalse;} + +all_memstats.push_back(&memstats); +all_memnames.push_back(memory_name); +returntrue; +} + +void +TinyProfiler::DeregisterArena(std::map<std::string,MemStat>&memstats)noexcept +{ +if(!memprof_enabled){return;} + +for(std::size_ti=0;i<all_memstats.size();){ +if(all_memstats[i]==&memstats){ +all_memstats.erase(all_memstats.begin()+i);//NOLINT +all_memnames.erase(all_memnames.begin()+i);//NOLINT +}else{ +++i; +} +} +} + +void +TinyProfiler::PrintStats(std::map<std::string,Stats>&regstats,doubledt_max, +std::ostream*os) +{ +//makesurethesetofprofiledfunctionsisthesameonallprocesses +{ +Vector<std::string>localStrings,syncedStrings; +boolalreadySynced; + +for(autoconst&kv:regstats){ +localStrings.push_back(kv.first); +} + +amrex::SyncStrings(localStrings,syncedStrings,alreadySynced); + +if(!alreadySynced){//addthenewname +for(autoconst&s:syncedStrings){ +if(regstats.find(s)==regstats.end()){ +regstats.insert(std::make_pair(s,Stats())); +} +} +} +} + +if(regstats.empty()){return;} + +intnprocs=ParallelDescriptor::NProcs(); +intioproc=ParallelDescriptor::IOProcessorNumber(); -intnprocs=ParallelDescriptor::NProcs(); -intioproc=ParallelDescriptor::IOProcessorNumber(); - -std::vector<ProcStats>allprocstats; -intmaxfnamelen=0; -Longmaxncalls=0; - -//nowcollectglobaldataontotheioproc -for(constauto&regstat:regstats) -{ -Longn=regstat.second.n; -doubledts[2]={regstat.second.dtin,regstat.second.dtex}; +std::vector<ProcStats>allprocstats; +intmaxfnamelen=0; +Longmaxncalls=0; + +//nowcollectglobaldataontotheioproc +for(constauto&regstat:regstats) +{ +Longn=regstat.second.n; +doubledts[2]={regstat.second.dtin,regstat.second.dtex}; + +std::vector<Long>ncalls(nprocs); +std::vector<double>dtdt(2*nprocs); -std::vector<Long>ncalls(nprocs); -std::vector<double>dtdt(2*nprocs); - -if(ParallelDescriptor::NProcs()==1) -{ -ncalls[0]=n; -dtdt[0]=dts[0]; -dtdt[1]=dts[1]; -}else -{ -ParallelDescriptor::Gather(&n,1,ncalls.data(),1,ioproc); -ParallelDescriptor::Gather(dts,2,dtdt.data(),2,ioproc); -} - -if(ParallelDescriptor::IOProcessor()){ -ProcStatspst; -for(inti=0;i<nprocs;++i){ -pst.nmin=std::min(pst.nmin,ncalls[i]); -pst.navg+=ncalls[i]; -pst.nmax=std::max(pst.nmax,ncalls[i]); -pst.dtinmin=std::min(pst.dtinmin,dtdt[2*i]); -pst.dtinavg+=dtdt[2*i]; -pst.dtinmax=std::max(pst.dtinmax,dtdt[2*i]); -pst.dtexmin=std::min(pst.dtexmin,dtdt[2*i+1]); -pst.dtexavg+=dtdt[2*i+1]; -pst.dtexmax=std::max(pst.dtexmax,dtdt[2*i+1]); -} -pst.navg/=nprocs; -pst.dtinavg/=nprocs; -pst.dtexavg/=nprocs; -pst.fname=regstat.first; -allprocstats.push_back(pst); -maxfnamelen=std::max(maxfnamelen,int(pst.fname.size())); -maxncalls=std::max(maxncalls,pst.nmax); -} -} - -if(ParallelDescriptor::IOProcessor()&&os) -{ -IOFormatSaveriofmtsaver(*os); +if(ParallelDescriptor::NProcs()==1) +{ +ncalls[0]=n; +dtdt[0]=dts[0]; +dtdt[1]=dts[1]; +}else +{ +ParallelDescriptor::Gather(&n,1,ncalls.data(),1,ioproc); +ParallelDescriptor::Gather(dts,2,dtdt.data(),2,ioproc); +} + +if(ParallelDescriptor::IOProcessor()){ +ProcStatspst; +for(inti=0;i<nprocs;++i){ +pst.nmin=std::min(pst.nmin,ncalls[i]); +pst.navg+=ncalls[i]; +pst.nmax=std::max(pst.nmax,ncalls[i]); +pst.dtinmin=std::min(pst.dtinmin,dtdt[2*i]); +pst.dtinavg+=dtdt[2*i]; +pst.dtinmax=std::max(pst.dtinmax,dtdt[2*i]); +pst.dtexmin=std::min(pst.dtexmin,dtdt[2*i+1]); +pst.dtexavg+=dtdt[2*i+1]; +pst.dtexmax=std::max(pst.dtexmax,dtdt[2*i+1]); +} +pst.navg/=nprocs; +pst.dtinavg/=nprocs; +pst.dtexavg/=nprocs; +pst.fname=regstat.first; +allprocstats.push_back(pst); +maxfnamelen=std::max(maxfnamelen,int(pst.fname.size())); +maxncalls=std::max(maxncalls,pst.nmax); +} +} + +if(ParallelDescriptor::IOProcessor()&&os) +{ +IOFormatSaveriofmtsaver(*os); + +*os<<std::setfill('')<<std::setprecision(4); +intwt=9; -*os<<std::setfill('')<<std::setprecision(4); -intwt=9; - -intwnc=(int)std::log10((double)maxncalls)+1; -wnc=std::max(wnc,int(std::string("NCalls").size())); -wt=std::max(wt,int(std::string("Excl.Min").size())); -intwp=6; -wp=std::max(wp,int(std::string("Max%").size())); - -conststd::stringhline(maxfnamelen+wnc+2+(wt+2)*3+wp+2,'-'); +intwnc=(int)std::log10((double)maxncalls)+1; +wnc=std::max(wnc,int(std::string("NCalls").size())); +wt=std::max(wt,int(std::string("Excl.Min").size())); +intwp=6; +wp=std::max(wp,int(std::string("Max%").size())); + +conststd::stringhline(maxfnamelen+wnc+2+(wt+2)*3+wp+2,'-'); + +ProcStatsother_procstat; +boolprint_other_procstat=false; -ProcStatsother_procstat; -boolprint_other_procstat=false; - -//trytocombinelow-performanceimpactfunctionsinto"Other"tocleanuptheoutput -if(print_threshold>0.){ -//initializeother_procstattozero -other_procstat.nmin=0; -other_procstat.dtinmin=0.; -other_procstat.dtexmin=0.; -other_procstat.fname="Other"; -intnum_procstats_in_other=0; - -//sortbyexclusivetimeanditeratebackwardsovertheprofiledfunctions -std::sort(allprocstats.begin(),allprocstats.end(),ProcStats::compin); -for(Longi=static_cast<Long>(allprocstats.size())-1;i>=0;--i){ -//includefunctionin"Other"iftogethertheyarebelowthethreshold -if((other_procstat.dtinmax+allprocstats[i].dtinmax)*(100.0/dt_max) -<print_threshold){ -allprocstats[i].do_print=false; -++num_procstats_in_other; - -//addtimeforfunctionto"Other" -//forminandmaxthisisnotexactbutproducesanupperlimit -other_procstat.nmin+=allprocstats[i].nmin; -other_procstat.navg+=allprocstats[i].navg; -other_procstat.nmax+=allprocstats[i].nmax; - -other_procstat.dtinmin+=allprocstats[i].dtinmin; -other_procstat.dtinavg+=allprocstats[i].dtinavg; -other_procstat.dtinmax+=allprocstats[i].dtinmax; - -other_procstat.dtexmin+=allprocstats[i].dtexmin; -other_procstat.dtexavg+=allprocstats[i].dtexavg; -other_procstat.dtexmax+=allprocstats[i].dtexmax; -}else{ -break; -} -} - -if(num_procstats_in_other==1){ -//ifonlyonefunctionwouldbeincludedin"Other" -//theoutputwouldnotgetshorter -allprocstats.back().do_print=true; -}elseif(num_procstats_in_other>=2){ -print_other_procstat=true; -} -} - -//Exclusivetime -std::sort(allprocstats.begin(),allprocstats.end(),ProcStats::compex); -if(print_other_procstat){ -//makesure"Other"isprintedattheendofthelist -allprocstats.push_back(other_procstat); -} -*os<<"\n"<<hline<<"\n"; -*os<<std::left -<<std::setw(maxfnamelen)<<"Name" -<<std::right -<<std::setw(wnc+2)<<"NCalls" -<<std::setw(wt+2)<<"Excl.Min" -<<std::setw(wt+2)<<"Excl.Avg" -<<std::setw(wt+2)<<"Excl.Max" -<<std::setw(wp+2)<<"Max%" -<<"\n"<<hline<<"\n"; -for(constauto&allprocstat:allprocstats) -{ -if(!allprocstat.do_print){ -continue; -} -*os<<std::setprecision(4)<<std::left -<<std::setw(maxfnamelen)<<allprocstat.fname -<<std::right -<<std::setw(wnc+2)<<allprocstat.navg -<<std::setw(wt+2)<<allprocstat.dtexmin -<<std::setw(wt+2)<<allprocstat.dtexavg -<<std::setw(wt+2)<<allprocstat.dtexmax -<<std::setprecision(2)<<std::setw(wp+1)<<std::fixed -<<allprocstat.dtexmax*(100.0/dt_max)<<"%"; -os->unsetf(std::ios_base::fixed); -*os<<"\n"; -} -*os<<hline<<"\n"; -if(print_other_procstat){ -allprocstats.pop_back(); -} - -//Inclusivetime -std::sort(allprocstats.begin(),allprocstats.end(),ProcStats::compin); -if(print_other_procstat){ -//makesure"Other"isprintedattheendofthelist -allprocstats.push_back(other_procstat); -} -*os<<"\n"<<hline<<"\n"; -*os<<std::left -<<std::setw(maxfnamelen)<<"Name" -<<std::right -<<std::setw(wnc+2)<<"NCalls" -<<std::setw(wt+2)<<"Incl.Min" -<<std::setw(wt+2)<<"Incl.Avg" -<<std::setw(wt+2)<<"Incl.Max" -<<std::setw(wp+2)<<"Max%" -<<"\n"<<hline<<"\n"; -for(constauto&allprocstat:allprocstats) -{ -if(!allprocstat.do_print){ -continue; -} -*os<<std::setprecision(4)<<std::left -<<std::setw(maxfnamelen)<<allprocstat.fname -<<std::right -<<std::setw(wnc+2)<<allprocstat.navg -<<std::setw(wt+2)<<allprocstat.dtinmin -<<std::setw(wt+2)<<allprocstat.dtinavg -<<std::setw(wt+2)<<allprocstat.dtinmax -<<std::setprecision(2)<<std::setw(wp+1)<<std::fixed -<<allprocstat.dtinmax*(100.0/dt_max)<<"%"; -os->unsetf(std::ios_base::fixed); -*os<<"\n"; -} -*os<<hline<<"\n\n"; -} -} - -void -TinyProfiler::PrintMemStats(std::map<std::string,MemStat>&memstats, -std::stringconst&memname,doubledt_max, -doublet_final,std::ostream*os) -{ -//makesurethesetofprofiledfunctionsisthesameonallprocesses -{ -Vector<std::string>localStrings,syncedStrings; -boolalreadySynced; - -for(autoconst&kv:memstats){ -localStrings.push_back(kv.first); -} - -amrex::SyncStrings(localStrings,syncedStrings,alreadySynced); - -if(!alreadySynced){//addthenewname -for(autoconst&s:syncedStrings){ -if(memstats.find(s)==memstats.end()){ -memstats[s];//insert -} -} -} -} - -if(memstats.empty()){return;} +//trytocombinelow-performanceimpactfunctionsinto"Other"tocleanuptheoutput +if(print_threshold>0.){ +//initializeother_procstattozero +other_procstat.nmin=0; +other_procstat.dtinmin=0.; +other_procstat.dtexmin=0.; +other_procstat.fname="Other"; +intnum_procstats_in_other=0; + +//sortbyexclusivetimeanditeratebackwardsovertheprofiledfunctions +std::sort(allprocstats.begin(),allprocstats.end(),ProcStats::compin); +for(Longi=static_cast<Long>(allprocstats.size())-1;i>=0;--i){ +//includefunctionin"Other"iftogethertheyarebelowthethreshold +if((other_procstat.dtinmax+allprocstats[i].dtinmax)*(100.0/dt_max) +<print_threshold){ +allprocstats[i].do_print=false; +++num_procstats_in_other; + +//addtimeforfunctionto"Other" +//forminandmaxthisisnotexactbutproducesanupperlimit +other_procstat.nmin+=allprocstats[i].nmin; +other_procstat.navg+=allprocstats[i].navg; +other_procstat.nmax+=allprocstats[i].nmax; + +other_procstat.dtinmin+=allprocstats[i].dtinmin; +other_procstat.dtinavg+=allprocstats[i].dtinavg; +other_procstat.dtinmax+=allprocstats[i].dtinmax; + +other_procstat.dtexmin+=allprocstats[i].dtexmin; +other_procstat.dtexavg+=allprocstats[i].dtexavg; +other_procstat.dtexmax+=allprocstats[i].dtexmax; +}else{ +break; +} +} + +if(num_procstats_in_other==1){ +//ifonlyonefunctionwouldbeincludedin"Other" +//theoutputwouldnotgetshorter +allprocstats.back().do_print=true; +}elseif(num_procstats_in_other>=2){ +print_other_procstat=true; +} +} + +//Exclusivetime +std::sort(allprocstats.begin(),allprocstats.end(),ProcStats::compex); +if(print_other_procstat){ +//makesure"Other"isprintedattheendofthelist +allprocstats.push_back(other_procstat); +} +*os<<"\n"<<hline<<"\n"; +*os<<std::left +<<std::setw(maxfnamelen)<<"Name" +<<std::right +<<std::setw(wnc+2)<<"NCalls" +<<std::setw(wt+2)<<"Excl.Min" +<<std::setw(wt+2)<<"Excl.Avg" +<<std::setw(wt+2)<<"Excl.Max" +<<std::setw(wp+2)<<"Max%" +<<"\n"<<hline<<"\n"; +for(constauto&allprocstat:allprocstats) +{ +if(!allprocstat.do_print){ +continue; +} +*os<<std::setprecision(4)<<std::left +<<std::setw(maxfnamelen)<<allprocstat.fname +<<std::right +<<std::setw(wnc+2)<<allprocstat.navg +<<std::setw(wt+2)<<allprocstat.dtexmin +<<std::setw(wt+2)<<allprocstat.dtexavg +<<std::setw(wt+2)<<allprocstat.dtexmax +<<std::setprecision(2)<<std::setw(wp+1)<<std::fixed +<<allprocstat.dtexmax*(100.0/dt_max)<<"%"; +os->unsetf(std::ios_base::fixed); +*os<<"\n"; +} +*os<<hline<<"\n"; +if(print_other_procstat){ +allprocstats.pop_back(); +} + +//Inclusivetime +std::sort(allprocstats.begin(),allprocstats.end(),ProcStats::compin); +if(print_other_procstat){ +//makesure"Other"isprintedattheendofthelist +allprocstats.push_back(other_procstat); +} +*os<<"\n"<<hline<<"\n"; +*os<<std::left +<<std::setw(maxfnamelen)<<"Name" +<<std::right +<<std::setw(wnc+2)<<"NCalls" +<<std::setw(wt+2)<<"Incl.Min" +<<std::setw(wt+2)<<"Incl.Avg" +<<std::setw(wt+2)<<"Incl.Max" +<<std::setw(wp+2)<<"Max%" +<<"\n"<<hline<<"\n"; +for(constauto&allprocstat:allprocstats) +{ +if(!allprocstat.do_print){ +continue; +} +*os<<std::setprecision(4)<<std::left +<<std::setw(maxfnamelen)<<allprocstat.fname +<<std::right +<<std::setw(wnc+2)<<allprocstat.navg +<<std::setw(wt+2)<<allprocstat.dtinmin +<<std::setw(wt+2)<<allprocstat.dtinavg +<<std::setw(wt+2)<<allprocstat.dtinmax +<<std::setprecision(2)<<std::setw(wp+1)<<std::fixed +<<allprocstat.dtinmax*(100.0/dt_max)<<"%"; +os->unsetf(std::ios_base::fixed); +*os<<"\n"; +} +*os<<hline<<"\n\n"; +} +} + +void +TinyProfiler::PrintMemStats(std::map<std::string,MemStat>&memstats, +std::stringconst&memname,doubledt_max, +doublet_final,std::ostream*os) +{ +//makesurethesetofprofiledfunctionsisthesameonallprocesses +{ +Vector<std::string>localStrings,syncedStrings; +boolalreadySynced; + +for(autoconst&kv:memstats){ +localStrings.push_back(kv.first); +} + +amrex::SyncStrings(localStrings,syncedStrings,alreadySynced); + +if(!alreadySynced){//addthenewname +for(autoconst&s:syncedStrings){ +if(memstats.find(s)==memstats.end()){ +memstats[s];//insert +} +} +} +} + +if(memstats.empty()){return;} + +constintnprocs=ParallelDescriptor::NProcs(); +constintioproc=ParallelDescriptor::IOProcessorNumber(); -constintnprocs=ParallelDescriptor::NProcs(); -constintioproc=ParallelDescriptor::IOProcessorNumber(); - -std::vector<MemProcStats>allprocstats; - -//nowcollectglobaldataontotheioproc -for(constauto&it:memstats) -{ -Longnalloc=it.second.nalloc; -Longnfree=it.second.nfree; -//simulatethefreeingofremainingmemorycurrentmemfortheavgmemmetric -Longavgmem=static_cast<Long>( -(it.second.avgmem+static_cast<double>(it.second.currentmem)*t_final)/dt_max); -Longmaxmem=it.second.maxmem; - -std::vector<Long>nalloc_vec(nprocs); -std::vector<Long>nfree_vec(nprocs); -std::vector<Long>avgmem_vec(nprocs); -std::vector<Long>maxmem_vec(nprocs); - -if(nprocs==1) -{ -nalloc_vec[0]=nalloc; -nfree_vec[0]=nfree; -avgmem_vec[0]=avgmem; -maxmem_vec[0]=maxmem; -}else -{ -ParallelDescriptor::Gather(&nalloc,1,nalloc_vec.data(),1,ioproc); -ParallelDescriptor::Gather(&nfree,1,nfree_vec.data(),1,ioproc); -ParallelDescriptor::Gather(&maxmem,1,maxmem_vec.data(),1,ioproc); -ParallelDescriptor::Gather(&avgmem,1,avgmem_vec.data(),1,ioproc); -} - -if(ParallelDescriptor::IOProcessor()){ -MemProcStatspst; -for(inti=0;i<nprocs;++i){ - -pst.nalloc+=nalloc_vec[i]; -pst.nfree+=nfree_vec[i]; -pst.avgmem_min=std::min(pst.avgmem_min,avgmem_vec[i]); -pst.avgmem_avg+=avgmem_vec[i]; -pst.avgmem_max=std::max(pst.avgmem_max,avgmem_vec[i]); -pst.maxmem_min=std::min(pst.maxmem_min,maxmem_vec[i]); -pst.maxmem_avg+=maxmem_vec[i]; -pst.maxmem_max=std::max(pst.maxmem_max,maxmem_vec[i]); -} -pst.avgmem_avg/=nprocs; -pst.maxmem_avg/=nprocs; -pst.fname=it.first; -allprocstats.push_back(pst); -} -} - -std::sort(allprocstats.begin(),allprocstats.end(),MemProcStats::compmem); - -std::vector<std::vector<std::string>>allstatsstr; - -if(nprocs==1){ -allstatsstr.push_back({"Name","Nalloc","Nfree","AvgMem","MaxMem"}); -}else{ -allstatsstr.push_back({"Name","Nalloc","Nfree", -"AvgMemmin","AvgMemavg","AvgMemmax", -"MaxMemmin","MaxMemavg","MaxMemmax"}); -} - -automem_to_string=[](Longnbytes){ -std::stringunit="B"; -if(nbytes>=10000){ -nbytes/=1024; -unit="KiB"; -} -if(nbytes>=10000){ -nbytes/=1024; -unit="MiB"; -} -if(nbytes>=10000){ -nbytes/=1024; -unit="GiB"; -} -if(nbytes>=10000){ -nbytes/=1024; -unit="TiB"; -} -returnstd::to_string(nbytes)+unit; -}; - -for(auto&stat:allprocstats){ -if(stat.nalloc!=0||stat.nfree!=0||stat.maxmem_max!=0){ -if(nprocs==1){ -allstatsstr.push_back({stat.fname, -std::to_string(stat.nalloc), -std::to_string(stat.nfree), -mem_to_string(stat.avgmem_max), -mem_to_string(stat.maxmem_max)}); -}else{ -allstatsstr.push_back({stat.fname, -std::to_string(stat.nalloc), -std::to_string(stat.nfree), -mem_to_string(stat.avgmem_min), -mem_to_string(stat.avgmem_avg), -mem_to_string(stat.avgmem_max), -mem_to_string(stat.maxmem_min), -mem_to_string(stat.maxmem_avg), -mem_to_string(stat.maxmem_max)}); -} -} -} - -std::vector<int>maxlen(allstatsstr[0].size(),0); -for(auto&strvec:allstatsstr){ -for(std::size_ti=0;i<maxlen.size();++i){ -maxlen[i]=std::max(maxlen[i],static_cast<int>(strvec[i].size())); -} -} - -for(std::size_ti=1;i<maxlen.size();++i){ -maxlen[i]+=2; -} - -if(allstatsstr.size()==1||!os){return;} +std::vector<MemProcStats>allprocstats; + +//nowcollectglobaldataontotheioproc +for(constauto&it:memstats) +{ +Longnalloc=it.second.nalloc; +Longnfree=it.second.nfree; +//simulatethefreeingofremainingmemorycurrentmemfortheavgmemmetric +Longavgmem=static_cast<Long>( +(it.second.avgmem+static_cast<double>(it.second.currentmem)*t_final)/dt_max); +Longmaxmem=it.second.maxmem; + +std::vector<Long>nalloc_vec(nprocs); +std::vector<Long>nfree_vec(nprocs); +std::vector<Long>avgmem_vec(nprocs); +std::vector<Long>maxmem_vec(nprocs); + +if(nprocs==1) +{ +nalloc_vec[0]=nalloc; +nfree_vec[0]=nfree; +avgmem_vec[0]=avgmem; +maxmem_vec[0]=maxmem; +}else +{ +ParallelDescriptor::Gather(&nalloc,1,nalloc_vec.data(),1,ioproc); +ParallelDescriptor::Gather(&nfree,1,nfree_vec.data(),1,ioproc); +ParallelDescriptor::Gather(&maxmem,1,maxmem_vec.data(),1,ioproc); +ParallelDescriptor::Gather(&avgmem,1,avgmem_vec.data(),1,ioproc); +} + +if(ParallelDescriptor::IOProcessor()){ +MemProcStatspst; +for(inti=0;i<nprocs;++i){ + +pst.nalloc+=nalloc_vec[i]; +pst.nfree+=nfree_vec[i]; +pst.avgmem_min=std::min(pst.avgmem_min,avgmem_vec[i]); +pst.avgmem_avg+=avgmem_vec[i]; +pst.avgmem_max=std::max(pst.avgmem_max,avgmem_vec[i]); +pst.maxmem_min=std::min(pst.maxmem_min,maxmem_vec[i]); +pst.maxmem_avg+=maxmem_vec[i]; +pst.maxmem_max=std::max(pst.maxmem_max,maxmem_vec[i]); +} +pst.avgmem_avg/=nprocs; +pst.maxmem_avg/=nprocs; +pst.fname=it.first; +allprocstats.push_back(pst); +} +} + +std::sort(allprocstats.begin(),allprocstats.end(),MemProcStats::compmem); + +std::vector<std::vector<std::string>>allstatsstr; + +if(nprocs==1){ +allstatsstr.push_back({"Name","Nalloc","Nfree","AvgMem","MaxMem"}); +}else{ +allstatsstr.push_back({"Name","Nalloc","Nfree", +"AvgMemmin","AvgMemavg","AvgMemmax", +"MaxMemmin","MaxMemavg","MaxMemmax"}); +} + +automem_to_string=[](Longnbytes){ +std::stringunit="B"; +if(nbytes>=10000){ +nbytes/=1024; +unit="KiB"; +} +if(nbytes>=10000){ +nbytes/=1024; +unit="MiB"; +} +if(nbytes>=10000){ +nbytes/=1024; +unit="GiB"; +} +if(nbytes>=10000){ +nbytes/=1024; +unit="TiB"; +} +returnstd::to_string(nbytes)+unit; +}; + +for(auto&stat:allprocstats){ +if(stat.nalloc!=0||stat.nfree!=0||stat.maxmem_max!=0){ +if(nprocs==1){ +allstatsstr.push_back({stat.fname, +std::to_string(stat.nalloc), +std::to_string(stat.nfree), +mem_to_string(stat.avgmem_max), +mem_to_string(stat.maxmem_max)}); +}else{ +allstatsstr.push_back({stat.fname, +std::to_string(stat.nalloc), +std::to_string(stat.nfree), +mem_to_string(stat.avgmem_min), +mem_to_string(stat.avgmem_avg), +mem_to_string(stat.avgmem_max), +mem_to_string(stat.maxmem_min), +mem_to_string(stat.maxmem_avg), +mem_to_string(stat.maxmem_max)}); +} +} +} + +std::vector<int>maxlen(allstatsstr[0].size(),0); +for(auto&strvec:allstatsstr){ +for(std::size_ti=0;i<maxlen.size();++i){ +maxlen[i]=std::max(maxlen[i],static_cast<int>(strvec[i].size())); +} +} + +for(std::size_ti=1;i<maxlen.size();++i){ +maxlen[i]+=2; +} + +if(allstatsstr.size()==1||!os){return;} + +IOFormatSaveriofmtsaver(*os); +*os<<std::setfill(''); -IOFormatSaveriofmtsaver(*os); -*os<<std::setfill(''); - -intlenhline=0; -for(autoi:maxlen){ -lenhline+=i; -} -conststd::stringhline(lenhline,'-'); - -*os<<memname<<"Usage:\n"; -*os<<hline<<"\n"; -for(std::size_ti=0;i<allstatsstr.size();++i){ -*os<<std::left<<std::setw(maxlen[0])<<allstatsstr[i][0]; -for(std::size_tj=1;j<maxlen.size();++j){ -*os<<std::right<<std::setw(maxlen[j])<<allstatsstr[i][j]; -} -*os<<'\n'; -if(i==0){ -*os<<hline<<"\n"; -} -} -*os<<hline<<"\n\n"; -} - -void -TinyProfiler::StartRegion(std::stringregname)noexcept -{ -if(!enabled){return;} - -if(std::find(regionstack.begin(),regionstack.end(),regname)==regionstack.end()){ -regionstack.emplace_back(std::move(regname)); -} -} - -void -TinyProfiler::StopRegion(conststd::string&regname)noexcept -{ -if(!enabled){return;} - -if(regname==regionstack.back()){ -regionstack.pop_back(); -} -} - -TinyProfileRegion::TinyProfileRegion(std::stringa_regname)noexcept -:regname(std::move(a_regname)), -tprof(std::string("REG::")+regname,false) -{ -TinyProfiler::StartRegion(regname); -tprof.start(); -} - -TinyProfileRegion::TinyProfileRegion(constchar*a_regname)noexcept -:regname(a_regname), -tprof(std::string("REG::")+std::string(a_regname),false) -{ -TinyProfiler::StartRegion(a_regname); -tprof.start(); -} - -TinyProfileRegion::~TinyProfileRegion() -{ -tprof.stop(); -TinyProfiler::StopRegion(regname); -} - -void -TinyProfiler::PrintCallStack(std::ostream&os) -{ -if(!enabled){return;} - -os<<"=====TinyProfilers======\n"; -for(autoconst&x:ttstack){ -os<<*(std::get<2>(x))<<"\n"; -} -} - -std::stringconst& -TinyProfiler::get_output_file() -{ -//Insteadofreadingitonlyonce,wecouldtrytoreadtheparameter -//everytime.ButIamnotsurehowusefulthatmightbe. -staticboolfirst=true; -if(first){ -first=false; +intlenhline=0; +for(autoi:maxlen){ +lenhline+=i; +} +conststd::stringhline(lenhline,'-'); + +*os<<memname<<"Usage:\n"; +*os<<hline<<"\n"; +for(std::size_ti=0;i<allstatsstr.size();++i){ +*os<<std::left<<std::setw(maxlen[0])<<allstatsstr[i][0]; +for(std::size_tj=1;j<maxlen.size();++j){ +*os<<std::right<<std::setw(maxlen[j])<<allstatsstr[i][j]; +} +*os<<'\n'; +if(i==0){ +*os<<hline<<"\n"; +} +} +*os<<hline<<"\n\n"; +} + +void +TinyProfiler::StartRegion(std::stringregname)noexcept +{ +if(!enabled){return;} + +if(std::find(regionstack.begin(),regionstack.end(),regname)==regionstack.end()){ +regionstack.emplace_back(std::move(regname)); +} +} + +void +TinyProfiler::StopRegion(conststd::string&regname)noexcept +{ +if(!enabled){return;} + +if(regname==regionstack.back()){ +regionstack.pop_back(); +} +} + +TinyProfileRegion::TinyProfileRegion(std::stringa_regname)noexcept +:regname(std::move(a_regname)), +tprof(std::string("REG::")+regname,false) +{ +TinyProfiler::StartRegion(regname); +tprof.start(); +} + +TinyProfileRegion::TinyProfileRegion(constchar*a_regname)noexcept +:regname(a_regname), +tprof(std::string("REG::")+std::string(a_regname),false) +{ +TinyProfiler::StartRegion(a_regname); +tprof.start(); +} + +TinyProfileRegion::~TinyProfileRegion() +{ +tprof.stop(); +TinyProfiler::StopRegion(regname); +} + +void +TinyProfiler::PrintCallStack(std::ostream&os) +{ +if(!enabled){return;} + +os<<"=====TinyProfilers======\n"; +for(autoconst&x:ttstack){ +os<<*(std::get<2>(x))<<"\n"; +} +} + +std::stringconst& +TinyProfiler::get_output_file() +{ +//Insteadofreadingitonlyonce,wecouldtrytoreadtheparameter +//everytime.ButIamnotsurehowusefulthatmightbe. +staticboolfirst=true; +if(first){ +first=false; + +amrex::ParmParsepp("tiny_profiler"); +pp.query("output_file",output_file); -amrex::ParmParsepp("tiny_profiler"); -pp.query("output_file",output_file); - -if(ParallelDescriptor::IOProcessor()){ -if(!output_file.empty()&&output_file!="/dev/null"){ -if(FileSystem::Exists(output_file)){ -FileSystem::Remove(output_file); -} -} -} -} +if(ParallelDescriptor::IOProcessor()){ +if(!output_file.empty()&&output_file!="/dev/null"){ +if(FileSystem::Exists(output_file)){ +FileSystem::Remove(output_file); +} +} +} +} + +returnoutput_file; +} -returnoutput_file; -} - -} +} diff --git a/amrex/docs_xml/doxygen/classamrex_1_1TinyProfileRegion.xml b/amrex/docs_xml/doxygen/classamrex_1_1TinyProfileRegion.xml index 4aba217583..b18bbc360e 100644 --- a/amrex/docs_xml/doxygen/classamrex_1_1TinyProfileRegion.xml +++ b/amrex/docs_xml/doxygen/classamrex_1_1TinyProfileRegion.xml @@ -47,7 +47,7 @@ - + @@ -64,7 +64,7 @@ - + @@ -141,7 +141,7 @@ - + diff --git a/amrex/docs_xml/doxygen/classamrex_1_1TinyProfiler.xml b/amrex/docs_xml/doxygen/classamrex_1_1TinyProfiler.xml index 44de1921dd..c2224e56ae 100644 --- a/amrex/docs_xml/doxygen/classamrex_1_1TinyProfiler.xml +++ b/amrex/docs_xml/doxygen/classamrex_1_1TinyProfiler.xml @@ -563,7 +563,7 @@ - + bool @@ -584,7 +584,7 @@ - + void @@ -601,7 +601,7 @@ - + void @@ -618,7 +618,7 @@ - + void @@ -635,7 +635,7 @@ - + void @@ -652,7 +652,7 @@ - + @@ -667,7 +667,7 @@ - + void @@ -692,7 +692,7 @@ - + void @@ -725,7 +725,7 @@ - +