diff --git a/src/madness/chem/CC2.cc b/src/madness/chem/CC2.cc index 327b369b275..8bbfa79fe70 100644 --- a/src/madness/chem/CC2.cc +++ b/src/madness/chem/CC2.cc @@ -404,6 +404,7 @@ double CC2::solve_mp2_coupled(Pairs& doubles, Info& info) { std::cout << std::fixed << std::setprecision(1) << "\nFinished constant part at time " << wall_time() << std::endl; std::cout << std::fixed << std::setprecision(1) << "\nStarting saving pairs and energy calculation at time " << wall_time() << std::endl; } + load_balance(world, result_vec); // transform vector back to Pairs structure for (size_t i = 0; i < pair_vec.size(); i++) { @@ -420,14 +421,25 @@ double CC2::solve_mp2_coupled(Pairs& doubles, Info& info) { } auto compute_energy = [&](const std::vector& pair_vec, std::string msg="") { + for (const auto& p : pair_vec) { + p.function().print_size("function "+p.name()); + p.function().print_size("constant_part "+p.name()); + p.function().reconstruct(); + p.constant_part.reconstruct(); + } MacroTaskComputeCorrelationEnergy t; MacroTask task1(world, t); CC_vecfunction dummy_singles1(PARTICLE); + auto pair_energies=task1(pair_vec, dummy_singles, info); // pair_energies is now scattered over the universe double total_energy=0.0; - for ( auto& pair_energy : pair_energies) total_energy += pair_energy.get(); + for ( auto& pair_energy : pair_energies) { + double pe=pair_energy.get(); + total_energy += pe; + if (world.rank()==0 and parameters.debug()) printf("pair energy for pair %12.8f\n", pe); + } // pair_energy.get() invokes a broadcast from rank 0 to all other ranks if (not msg.empty() and world.rank()==0) printf("%s %12.8f\n", msg.c_str(), total_energy); @@ -464,22 +476,35 @@ double CC2::solve_mp2_coupled(Pairs& doubles, Info& info) { std::vector u; for (auto p : pair_vec) u.push_back(p.function()); - auto residual=u-unew; + auto residual=truncate(u-unew,parameters.tight_thresh_6D()); // some statistics auto [rmsrnorm, maxrnorm]=CCPotentials::residual_stats(residual); + if (parameters.debug()) { + CCSize sz; + sz.add(u); + sz.print(world,"size of u"); + print_size(world,u,"u"); + + sz.add(u,unew,residual,pair_vec,coupling_vec); + for (const auto& r : solver.get_rlist()) sz.add(r); + for (const auto& uu : solver.get_ulist()) sz.add(uu); + sz.print(world,"sizes before KAIN"); + task1.taskq_ptr->cloud.print_size(world); + } // update the pair functions std::string use_kain; if (parameters.kain()) { use_kain="with KAIN"; - // std::vector kain_update = copy(world,solver.update(u, u_update)); - std::vector kain_update = copy(world,solver.update(u, residual)); + std::vector kain_update = solver.update(u, residual); + MADNESS_CHECK_THROW(solver.get_rlist()[0][0].is_reconstructed(),"solver functions are not reconstructed"); + MADNESS_CHECK_THROW(solver.get_ulist()[0][0].is_reconstructed(),"solver functions are not reconstructed"); truncate(kain_update); for (size_t i=0; i 1) { @@ -1433,6 +1434,40 @@ class CCPairBuilder { }; +/// print accumulated size of all functions +struct CCSize { + double size_local=0; + + CCSize() = default; + + template + void add_helper(const std::vector>& v) { + if (v.size()>0) size_local+=get_size_local(v.front().world(),v); + } + + void add_helper(const std::vector& vp) { + if (vp.empty()) return; + for (const auto& p : vp) { + size_local+=get_size(p.constant_part); + if (p.function_exists()) size_local+=get_size_local(p.function()); + } + } + + /// variadic template parameters to add the size of all functions and pairs + template + void add(const Args&... args) { + (add_helper(args), ...); + } + + void print(World& world, const std::string msg="") const { + double size_global=size_local; + world.gop.sum(size_global); + if (msg.size()>0 and world.rank()==0) madness::print(msg); + madness::print("size of all functions on rank",world.rank(),size_local); + if (world.rank()==0) madness::print("total size of all functions",size_global); + + } +}; class MacroTaskMp2ConstantPart : public MacroTaskOperationBase { diff --git a/src/madness/mra/funcimpl.h b/src/madness/mra/funcimpl.h index 07d93ab66bf..3e3d7139a69 100644 --- a/src/madness/mra/funcimpl.h +++ b/src/madness/mra/funcimpl.h @@ -6815,6 +6815,9 @@ template /// Returns the size of the tree structure of the function ... collective global sum std::size_t tree_size() const; + /// Returns the number of coefficients in the function for each rank + std::size_t size_local() const; + /// Returns the number of coefficients in the function ... collective global sum std::size_t size() const; diff --git a/src/madness/mra/macrotaskq.h b/src/madness/mra/macrotaskq.h index ec2db423763..f555e463c21 100644 --- a/src/madness/mra/macrotaskq.h +++ b/src/madness/mra/macrotaskq.h @@ -660,10 +660,12 @@ class MacroTask { return result; } -private: +//private: World &world; std::shared_ptr taskq_ptr; +private: + /// store *pointers* to the result WorldObject in the cloud and return the recordlist recordlistT prepare_output_records(Cloud &cloud, resultT& result) { diff --git a/src/madness/mra/mra.h b/src/madness/mra/mra.h index 84ca9731170..46313cab8ee 100644 --- a/src/madness/mra/mra.h +++ b/src/madness/mra/mra.h @@ -541,7 +541,12 @@ namespace madness { return impl->size(); } - /// Retunrs + /// Return the number of coefficients in the function on this processor + std::size_t size_local() const { + PROFILE_MEMBER_FUNC(Function); + if (!impl) return 0; + return impl->size_local(); + } /// Returns value of autorefine flag. No communication. diff --git a/src/madness/mra/mraimpl.h b/src/madness/mra/mraimpl.h index 02dacdc9e49..7595851163a 100644 --- a/src/madness/mra/mraimpl.h +++ b/src/madness/mra/mraimpl.h @@ -1908,34 +1908,21 @@ namespace madness { return sum; } - /// Returns the number of coefficients in the function ... collective global sum + /// Returns the number of coefficients in the function for each rank template - std::size_t FunctionImpl::size() const { + std::size_t FunctionImpl::size_local() const { std::size_t sum = 0; -#if 1 - typename dcT::const_iterator end = coeffs.end(); - for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) { - const nodeT& node = it->second; - if (node.has_coeff()) - sum+=node.size(); + for (const auto& [key,node] : coeffs) { + if (node.has_coeff()) sum+=node.size(); } - // print("proc",world.rank(),sum); -#else - typename dcT::const_iterator end = coeffs.end(); - for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) { - const nodeT& node = it->second; - if (node.has_coeff()) - ++sum; - } - if (is_compressed()) - for (std::size_t i=0; i + std::size_t FunctionImpl::size() const { + std::size_t sum = size_local(); + world.gop.sum(sum); return sum; } diff --git a/src/madness/mra/vmra.h b/src/madness/mra/vmra.h index 037f0b2cf25..1f495f70419 100644 --- a/src/madness/mra/vmra.h +++ b/src/madness/mra/vmra.h @@ -1456,12 +1456,21 @@ namespace madness { if (a.size()==0) return std::vector >(); World& world=a[0].world(); - compress(world,a); - compress(world,b); std::vector > result(a.size()); - for (unsigned int i=0; i + double get_size_local(World& world, const std::vector< Function >& v){ + double size=0.0; + for(auto x:v){ + if (x.is_initialized()) size+=x.size_local(); + } + const double d=sizeof(T); + const double fac=1024*1024*1024; + return size/fac*d; + } + + /// return the size of a function for each rank + template + double get_size_local(const Function& f){ + return get_size_local(f.world(),std::vector >(1,f)); + } + + // gives back the size in GB template double get_size(World& world, const std::vector< Function >& v){ @@ -2042,6 +2070,21 @@ namespace madness { return d; } + template + void load_balance(World& world, std::vector >& vf) { + + struct LBCost { + LBCost() = default; + double operator()(const Key& key, const FunctionNode& node) const { + return node.coeff().size(); + } + }; + + LoadBalanceDeux<6> lb(world); + for (const auto& f : vf) lb.add_tree(f, LBCost()); + FunctionDefaults<6>::redistribute(world, lb.load_balance()); + + } /// load a vector of functions template