From 35a474fad60920dff1580d93fb3eb32640478157 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 4 Jan 2024 11:16:40 -0500 Subject: [PATCH 1/7] Create proper target when installed Umpire is provided Currently, the TiledArray_UMPIRE target assumes that Umpire is build as part of TA, which fails if umpire was provided via UMPIRE_INSTALL_DIR. Signed-off-by: Joseph Schuchart --- external/umpire.cmake | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/external/umpire.cmake b/external/umpire.cmake index aa98f27b1e..4e9a005341 100644 --- a/external/umpire.cmake +++ b/external/umpire.cmake @@ -14,6 +14,21 @@ if(_UMPIRE_INSTALL_DIR) # find_package(umpire REQUIRED) message(STATUS "Umpire found at ${_UMPIRE_INSTALL_DIR}") + add_library(TiledArray_UMPIRE INTERFACE) + + set_target_properties( + TiledArray_UMPIRE + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES + "${_UMPIRE_INSTALL_DIR}/include" + INTERFACE_LINK_LIBRARIES + "umpire" + INTERFACE_LINK_DIRECTORIES + "${_UMPIRE_INSTALL_DIR}/lib/" + ) + + install(TARGETS TiledArray_UMPIRE EXPORT tiledarray COMPONENT tiledarray) + elseif(TA_EXPERT) message("** Umpire was not found") @@ -190,23 +205,22 @@ else() set(_UMPIRE_INSTALL_DIR ${EXTERNAL_INSTALL_DIR}) -endif(_UMPIRE_INSTALL_DIR) - -# manually add Umpire library -add_library(TiledArray_UMPIRE INTERFACE) + add_library(TiledArray_UMPIRE INTERFACE) -set_target_properties( - TiledArray_UMPIRE - PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES - "$;$;$;$;$;$" - INTERFACE_LINK_LIBRARIES - "$;$" - ) + set_target_properties( + TiledArray_UMPIRE + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES + "$;$;$;$;$;$" + INTERFACE_LINK_LIBRARIES + "$;$" + ) install(TARGETS TiledArray_UMPIRE EXPORT tiledarray COMPONENT tiledarray) +endif(_UMPIRE_INSTALL_DIR) + #TODO test Umpire endif(NOT TARGET TiledArray_UMPIRE) From bc1b712d1315ef7ae352776ef3b4309701d38bff Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 7 Jan 2024 16:35:22 -0500 Subject: [PATCH 2/7] introduced TA_TRACE_GLOBAL_COMM_STATS CMake option that enables tracing stats of communication within global objects (DistEval's + DistributedStorage) --- CMakeLists.txt | 4 ++++ src/TiledArray/config.h.in | 3 +++ 2 files changed, 7 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a47fbd989..7f98e3fbf2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -165,6 +165,10 @@ if(TA_ENABLE_TILE_OPS_LOGGING AND NOT DEFINED TA_TILE_OPS_LOG_LEVEL) set(TA_TILE_OPS_LOG_LEVEL 1) endif(TA_ENABLE_TILE_OPS_LOGGING AND NOT DEFINED TA_TILE_OPS_LOG_LEVEL) +option(TA_TRACE_GLOBAL_COMM_STATS "Enable tracing of communication stats of global objects (DistEval's and DIstributedStorage) TiledArray" OFF) +add_feature_info(TASK_TRACE_DEBUG TA_TRACE_GLOBAL_COMM_STATS "Debug communication stats of global objects (DistEval's and DIstributedStorage) TiledArray") +set(TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE ${TA_TRACE_GLOBAL_COMM_STATS}) + option(TA_RANGEV3 "Enable Range-V3 library" OFF) add_feature_info(TA_RANGEV3 TA_RANGEV3 "Range-V3 ranges library") diff --git a/src/TiledArray/config.h.in b/src/TiledArray/config.h.in index 1c38298623..79f9f0932a 100644 --- a/src/TiledArray/config.h.in +++ b/src/TiledArray/config.h.in @@ -174,6 +174,9 @@ #cmakedefine TA_ENABLE_TILE_OPS_LOGGING 1 #define TA_TILE_OPS_LOG_LEVEL 0@TA_TILE_OPS_LOG_LEVEL@ +/* Enables collection of communication statistics for global objects (DistEval and DistributedStorage) */ +#cmakedefine TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE 1 + /* ----------- pragma helpers ---------------*/ #define TILEDARRAY_PRAGMA(x) _Pragma(#x) /* same as TILEDARRAY_PRAGMA(x), but expands x */ From 56e0e2efb82570cfc24b5745874fd6c30b4ef1a3 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 7 Jan 2024 16:42:25 -0500 Subject: [PATCH 3/7] if configured with TA_TRACE_GLOBAL_COMM_STATS will collect stats of DistEval comms --- src/TiledArray/dist_eval/array_eval.h | 160 ++++++++++++++++++-- src/TiledArray/dist_eval/binary_eval.h | 68 ++++++++- src/TiledArray/dist_eval/contraction_eval.h | 44 +++++- src/TiledArray/dist_eval/dist_eval.h | 94 ++++++++++-- src/TiledArray/dist_eval/unary_eval.h | 33 +++- 5 files changed, 366 insertions(+), 33 deletions(-) diff --git a/src/TiledArray/dist_eval/array_eval.h b/src/TiledArray/dist_eval/array_eval.h index 10ad0543e0..6dade3dc2b 100644 --- a/src/TiledArray/dist_eval/array_eval.h +++ b/src/TiledArray/dist_eval/array_eval.h @@ -198,6 +198,26 @@ class ArrayEvalImpl std::shared_ptr op_; ///< The tile operation BlockRange block_range_; ///< Sub-block range +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + // tracing artifacts + using pending_counter_t = std::atomic[]; // 1 counter per rank + mutable std::shared_ptr + ntiles_pending_; // number of pending tiles from each rank + mutable std::shared_ptr + ntasks_pending_; // number of pending tasks using data from each rank + + struct AtomicCounterDecreaser : public madness::CallbackInterface { + std::shared_ptr> counter; + + AtomicCounterDecreaser(std::shared_ptr> counter) + : counter(std::move(counter)) {} + void notify() override { + --(*counter); + delete this; + } + }; +#endif + public: /// Construct with full array range @@ -217,7 +237,28 @@ class ArrayEvalImpl : DistEvalImpl_(world, trange, shape, pmap, outer(perm)), array_(array), op_(std::make_shared(op)), - block_range_() {} + block_range_() +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + , + ntiles_pending_(new std::atomic[world.size()]), + ntasks_pending_(new std::atomic[world.size()]) +#endif + { +#if 0 + std::stringstream ss; + ss << "ArrayEvalImpl: id=" << this->id(); + if (array_) ss << " array.id()=" << array_.id(); + ss << "\n"; + std::cout << ss.str(); +#endif + +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + for (auto rank = 0; rank != world.size(); ++rank) { + ntiles_pending_[rank] = 0; + ntasks_pending_[rank] = 0; + } +#endif + } /// Constructor with sub-block range @@ -245,10 +286,42 @@ class ArrayEvalImpl : DistEvalImpl_(world, trange, shape, pmap, outer(perm)), array_(array), op_(std::make_shared(op)), - block_range_(array.trange().tiles_range(), lower_bound, upper_bound) {} + block_range_(array.trange().tiles_range(), lower_bound, upper_bound) +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + , + ntiles_pending_(new std::atomic[world.size()]), + ntasks_pending_(new std::atomic[world.size()]) +#endif + { +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + for (auto rank = 0; rank != world.size(); ++rank) { + ntiles_pending_[rank] = 0; + ntasks_pending_[rank] = 0; + } +#endif + } /// Virtual destructor - virtual ~ArrayEvalImpl() {} + virtual ~ArrayEvalImpl() { +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + if (std::find_if(ntiles_pending_.get(), + ntiles_pending_.get() + this->world().size(), + [](const auto& v) { return v != 0; }) != + ntiles_pending_.get() + this->world().size()) { + madness::print_error( + "ArrayEvalImpl: pending tiles at destruction! (id=", this->id(), ")"); + abort(); + } + if (std::find_if(ntasks_pending_.get(), + ntasks_pending_.get() + this->world().size(), + [](const auto& v) { return v != 0; }) != + ntasks_pending_.get() + this->world().size()) { + madness::print_error( + "ArrayEvalImpl: pending tasks at destruction! (id=", this->id(), ")"); + abort(); + } +#endif + } Future get_tile(ordinal_type i) const override { // Get the array index that corresponds to the target index @@ -258,15 +331,49 @@ class ArrayEvalImpl // index to the correct location. if (block_range_.rank()) array_index = block_range_.ordinal(array_index); - // Get the tile from array_, which may be located on a remote node. - Future tile = array_.find(array_index); + const bool arg_tile_is_remote = !array_.is_local(array_index); + const ProcessID arg_tile_owner = array_.owner(array_index); - const bool consumable_tile = !array_.is_local(array_index); - - return eval_tile(tile, consumable_tile); + Future result; + bool task_created = false; + if (arg_tile_is_remote) { + TA_ASSERT(arg_tile_owner != this->world().rank()); +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + ntiles_pending_[arg_tile_owner]++; +#endif + auto arg_tile = array_.find(array_index); +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + arg_tile.register_callback( + new AtomicCounterDecreaser(std::shared_ptr>( + ntiles_pending_, ntiles_pending_.get() + arg_tile_owner))); +#endif + std::tie(result, task_created) = + eval_tile(arg_tile, /* consumable_tile = */ true +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + , + arg_tile_owner +#endif + ); + } else { + TA_ASSERT(arg_tile_owner == this->world().rank()); + std::tie(result, task_created) = eval_tile(array_.find_local(array_index), + /* consumable_tile = */ false +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + , + arg_tile_owner +#endif + ); + } +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + TA_ASSERT(ntiles_pending_[this->world().rank()] == 0); + // even if data is local we may have created a task to evaluate it + // TA_ASSERT(ntasks_pending_[this->world().rank()] == 0); +#endif + return result; } void discard_tile(ordinal_type i) const override { + TA_ASSERT(this->is_local(i)); const_cast(this)->notify(); } @@ -277,23 +384,36 @@ class ArrayEvalImpl } /// Evaluate a single LazyArrayTile - madness::Future eval_tile( + /// @return A pair of the future to the tile and a boolean indicating whether + /// a task was created to produce the tile + [[nodiscard]] std::pair, bool> eval_tile( const madness::Future& tile, - const bool consumable_tile) const { + const bool consumable_tile +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + , + const ProcessID tile_owner +#endif + ) const { // Insert the tile into this evaluator for subsequent processing if (tile.probe()) { // Skip the task since the tile is ready Future result; result.set(make_tile(tile, consumable_tile)); const_cast(this)->notify(); - return result; + return {result, false}; } else { // Spawn a task to set the tile when the input tile is not ready. Future result = TensorImpl_::world().taskq.add( shared_from_this(), &ArrayEvalImpl_::make_tile, tile, consumable_tile, madness::TaskAttributes::hipri()); +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + ntasks_pending_[tile_owner]++; + result.register_callback( + new AtomicCounterDecreaser(std::shared_ptr>( + ntasks_pending_, ntasks_pending_.get() + tile_owner))); +#endif result.register_callback(const_cast(this)); - return result; + return {result, true}; } } /// Evaluate the tiles of this tensor @@ -303,6 +423,22 @@ class ArrayEvalImpl /// \return The number of tiles that will be set by this process int internal_eval() override { return TensorImpl_::local_nnz(); } +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + std::string status() const override { + std::stringstream ss; + ss << "ArrayEvalImpl: array.id()=" << array_.id(); + ss << " ntiles_pending=["; + for (auto rank = 0; rank != this->world().size(); ++rank) { + ss << " " << ntiles_pending_[rank]; + } + ss << "] ntasks_pending=["; + for (auto rank = 0; rank != this->world().size(); ++rank) { + ss << " " << ntasks_pending_[rank]; + } + ss << "]\n"; + return ss.str(); + } +#endif }; // class ArrayEvalImpl } // namespace detail diff --git a/src/TiledArray/dist_eval/binary_eval.h b/src/TiledArray/dist_eval/binary_eval.h index e343c087b3..62bbdb64ce 100644 --- a/src/TiledArray/dist_eval/binary_eval.h +++ b/src/TiledArray/dist_eval/binary_eval.h @@ -68,6 +68,16 @@ class BinaryEvalImpl : public DistEvalImpl, right_type right_; ///< Right argument op_type op_; ///< binary element operator +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + // artifacts of tracing + mutable ordinal_type left_ntiles_used_; // # of tiles used from left_ + mutable ordinal_type right_ntiles_used_; // # of tiles used from right_ + mutable ordinal_type + left_ntiles_discarded_; // # of tiles discarded from left_ + mutable ordinal_type + right_ntiles_discarded_; // # of tiles discarded from right_ +#endif + public: /// Construct a binary evaluator @@ -88,7 +98,15 @@ class BinaryEvalImpl : public DistEvalImpl, : DistEvalImpl_(world, trange, shape, pmap, outer(perm)), left_(left), right_(right), - op_(op) { + op_(op) +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + , + left_ntiles_used_(0), + right_ntiles_used_(0), + left_ntiles_discarded_(0), + right_ntiles_discarded_(0) +#endif + { TA_ASSERT(left.trange() == right.trange()); } @@ -105,9 +123,9 @@ class BinaryEvalImpl : public DistEvalImpl, TA_ASSERT(!TensorImpl_::is_zero(i)); const auto source_index = DistEvalImpl_::perm_index_to_source(i); - const ProcessID source = - left_.owner(source_index); // Left and right - // should have the same owner + const ProcessID source = left_.owner(source_index); + // Left and right should have the same owner + TA_ASSERT(source == right_.owner(source_index)); const madness::DistributedID key(DistEvalImpl_::id(), i); return TensorImpl_::world().gop.template recv(source, key); @@ -195,6 +213,12 @@ class BinaryEvalImpl : public DistEvalImpl, &BinaryEvalImpl_::template eval_tile, target_index, left_.get(source_index), right_.get(source_index)); + TA_ASSERT(left_.is_local(source_index)); + TA_ASSERT(right_.is_local(source_index)); +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + left_ntiles_used_++; + right_ntiles_used_++; +#endif ++task_count; } @@ -213,32 +237,64 @@ class BinaryEvalImpl : public DistEvalImpl, &BinaryEvalImpl_::template eval_tile, target_index, ZeroTensor(), right_.get(index)); +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + right_ntiles_used_++; +#endif } else if (right_.is_zero(index)) { TensorImpl_::world().taskq.add( self, &BinaryEvalImpl_::template eval_tile, target_index, left_.get(index), ZeroTensor()); +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + left_ntiles_used_++; +#endif } else { + TA_ASSERT(!left_.is_zero(index) && !right_.is_zero(index)); TensorImpl_::world().taskq.add( self, &BinaryEvalImpl_::template eval_tile, target_index, left_.get(index), right_.get(index)); +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + left_ntiles_used_++; + right_ntiles_used_++; +#endif } ++task_count; } else { // Cleanup unused tiles - if (!left_.is_zero(index)) left_.discard(index); - if (!right_.is_zero(index)) right_.discard(index); + if (!left_.is_zero(index)) { +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + left_ntiles_discarded_++; +#endif + left_.discard(index); + } + if (!right_.is_zero(index)) { +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + right_ntiles_discarded_++; +#endif + right_.discard(index); + } } } } // Wait for child tensors to be evaluated, and process tasks while waiting. +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + TA_ASSERT(left_.local_nnz() == left_ntiles_used_ + left_ntiles_discarded_); + TA_ASSERT(right_.local_nnz() == + right_ntiles_used_ + right_ntiles_discarded_); +#endif left_.wait(); right_.wait(); +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + // for some evaluators like SUMMA real task counts are not available even + // after wait() TA_ASSERT(left_.task_count() >= left_ntiles_used_ + + // left_ntiles_discarded_); TA_ASSERT(right_.task_count() >= + // right_ntiles_used_ + right_ntiles_discarded_); +#endif return task_count; } diff --git a/src/TiledArray/dist_eval/contraction_eval.h b/src/TiledArray/dist_eval/contraction_eval.h index 8ff0d80091..2da66628fc 100644 --- a/src/TiledArray/dist_eval/contraction_eval.h +++ b/src/TiledArray/dist_eval/contraction_eval.h @@ -118,6 +118,7 @@ class Summa typedef std::pair col_datum; ///< Datum element type for a left-hand argument column + // various tracing/debugging artifacts static constexpr const bool trace_tasks = #ifdef TILEDARRAY_ENABLE_TASK_DEBUG_TRACE true @@ -125,6 +126,16 @@ class Summa false #endif ; +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + mutable std::atomic + left_ntiles_used_; // # of tiles used from left_ + mutable std::atomic + right_ntiles_used_; // # of tiles used from right_ + mutable std::atomic + left_ntiles_discarded_; // # of tiles discarded from left_ + mutable std::atomic + right_ntiles_discarded_; // # of tiles discarded from right_ +#endif protected: // Import base class functions @@ -705,11 +716,17 @@ class Summa if (do_broadcast) { // Broadcast the tile +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + ++left_ntiles_used_; +#endif const madness::DistributedID key(DistEvalImpl_::id(), index); auto tile = get_tile(left_, index); TensorImpl_::world().gop.bcast(key, tile, group_root, row_group); } else { // Discard the tile +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + ++left_ntiles_discarded_; +#endif left_.discard(index); } } @@ -748,12 +765,18 @@ class Summa if (do_broadcast) { // Broadcast the tile +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + ++right_ntiles_used_; +#endif const madness::DistributedID key(DistEvalImpl_::id(), index + left_.size()); auto tile = get_tile(right_, index); TensorImpl_::world().gop.bcast(key, tile, group_root, col_group); } else { // Discard the tile +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + ++right_ntiles_discarded_; +#endif right_.discard(index); } } @@ -1550,7 +1573,16 @@ class Summa left_stride_(k), left_stride_local_(proc_grid.proc_rows() * k), right_stride_(1ul), - right_stride_local_(proc_grid.proc_cols()) {} + right_stride_local_(proc_grid.proc_cols()) +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + , + left_ntiles_used_(0), + right_ntiles_used_(0), + left_ntiles_discarded_(0), + right_ntiles_discarded_(0) +#endif + { + } virtual ~Summa() {} @@ -1728,6 +1760,16 @@ class Summa // Wait for child tensors to be evaluated, and process tasks while waiting. left_.wait(); right_.wait(); +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + // values of left_ntiles_used_ etc. are not available until all broadcasts + // have been completed ... +// TA_ASSERT(left_.local_nnz() == left_ntiles_used_ + +// left_ntiles_discarded_); TA_ASSERT(right_.local_nnz() == +// right_ntiles_used_ + right_ntiles_discarded_); +// TA_ASSERT(left_.task_count() >= left_ntiles_used_ + +// left_ntiles_discarded_); TA_ASSERT(right_.task_count() >= +// right_ntiles_used_ + right_ntiles_discarded_); +#endif #ifdef TILEDARRAY_ENABLE_SUMMA_TRACE_EVAL printf("eval: finished wait children rank=%i\n", diff --git a/src/TiledArray/dist_eval/dist_eval.h b/src/TiledArray/dist_eval/dist_eval.h index 7585b7e4bf..9e0157cb8b 100644 --- a/src/TiledArray/dist_eval/dist_eval.h +++ b/src/TiledArray/dist_eval/dist_eval.h @@ -123,6 +123,28 @@ class DistEvalImpl : public TensorImpl, source_to_target_ = PermIndex(source_range, perm); target_to_source_ = PermIndex(trange.tiles_range(), inv_perm); } + +#if 0 + { + // print out expected number of tiles on each rank + std::vector ntiles_per_rank(world.size(), 0); + for (auto& i : trange.tiles_range()) { + if (!TensorImpl_::is_zero(i)) { + ntiles_per_rank[TensorImpl_::owner(i)]++; + } + } + std::stringstream ss; + ss << "DistEvalImpl: id=" << id_; + if (perm) + ss << " perm=" << perm; + ss << " ntiles=["; + for (auto& i : ntiles_per_rank) { + ss << i << " "; + } + ss << "]"; + std::cout << ss.str() << std::endl; + } +#endif } virtual ~DistEvalImpl() {} @@ -142,7 +164,8 @@ class DistEvalImpl : public TensorImpl, /// This function handles the cleanup for tiles that are not needed in /// subsequent computation. - /// \param i The index of the tile + /// \param i The index of the local tile to discard + /// \pre `this->is_local(i)` virtual void discard_tile(ordinal_type i) const = 0; /// Set tensor value @@ -234,13 +257,36 @@ class DistEvalImpl : public TensorImpl, TA_ASSERT(task_count_ >= 0); } + /// \return The number of tasks spawned on this rank (after invoking eval() + /// this should be equal to local_nnz() for simple evaluators like + /// unary/binary, or greater than that for more complex evaluators like SUMMA + ordinal_type task_count() const { + if (task_count_ == -1) + return 0; + else + return task_count_; + } + +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + /// reports evaluator status + + /// intended for debugging purposes + /// @return string containing log of the current status of evaluator (empty + /// string, unless overridden in the specialization) + [[nodiscard]] virtual std::string status() const { return {}; } +#endif }; // class DistEvalImpl -/// Tensor expression object +/// Tensor expression evaluator wrapper -/// This object holds a tensor expression. It is used to store various type -/// of tensor expressions that depend on the pimpl used to construct the -/// expression. +/// This object holds a tensor expression evaluator (DistEvalImpl). +/// +/// \note Tensor expression evaluators (DistEval and DistEvalImpl) +/// are similar to DistArray in that they has tensorial structure +/// (TensorImpl), with shape and policy, but their semantics that +/// differs from DistArray (e.g., data is not stored +/// persistently). +/// /// \tparam Tile The output tile type /// \tparam Policy The tensor policy class template @@ -333,7 +379,7 @@ class DistEval { return pimpl_->pmap(); } - /// Query the density of the tensor + /// Query if the tensor is dense /// \return \c true if the tensor is dense, otherwise false bool is_dense() const { return pimpl_->is_dense(); } @@ -348,7 +394,7 @@ class DistEval { /// \return The tiled range of the tensor const trange_type& trange() const { return pimpl_->trange(); } - /// Tile move + /// Tile accessor /// Tile is removed after it is set. /// \param i The tile index @@ -359,8 +405,12 @@ class DistEval { /// This function handles the cleanup for tiles that are not needed in /// subsequent computation. - /// \param i The index of the tile - virtual void discard(ordinal_type i) const { pimpl_->discard_tile(i); } + /// \param i The index of a local tile to discard + /// \pre `this->is_local(i)` + virtual void discard(ordinal_type i) const { + TA_ASSERT(this->is_local(i)); + pimpl_->discard_tile(i); + } /// World object accessor @@ -372,9 +422,35 @@ class DistEval { /// \return The unique id for this object madness::uniqueidT id() const { return pimpl_->id(); } + /// \return Number of nonzero tiles on this rank + /// \sa TensorImpl::local_nnz() + ordinal_type local_nnz() const { return pimpl_->local_nnz(); } + + /// \return The number of tasks spawned on this rank (after invoking eval() + /// this should be same as the value returned by local_nnz(), if everything is + /// well) + ordinal_type task_count() const { return pimpl_->task_count(); } + /// Wait for all local tiles to be evaluated void wait() const { pimpl_->wait(); } +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + /// reports evaluator status + + /// intended for debugging purposes + /// @return string containing log of the current status of evaluator (empty + /// string, unless overridden in the specialization) + std::string status() const { + std::ostringstream oss; + oss << "DistEval status: id=" << id() + << " impl_type_name=" << typeid(*(pimpl_.get())).name() + << " "; + oss << pimpl_->status(); + oss << "\n"; + return oss.str(); + } +#endif + }; // class DistEval } // namespace detail diff --git a/src/TiledArray/dist_eval/unary_eval.h b/src/TiledArray/dist_eval/unary_eval.h index d687fcb4af..66ab742ada 100644 --- a/src/TiledArray/dist_eval/unary_eval.h +++ b/src/TiledArray/dist_eval/unary_eval.h @@ -74,7 +74,13 @@ class UnaryEvalImpl const Perm& perm, const op_type& op) : DistEvalImpl_(world, trange, shape, pmap, outer(perm)), arg_(arg), - op_(op) {} + op_(op) +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + , + arg_ntiles_used_(0) +#endif + { + } /// Virtual destructor virtual ~UnaryEvalImpl() {} @@ -152,10 +158,12 @@ class UnaryEvalImpl // Evaluate argument arg_.eval(); - // Counter for the number of tasks submitted by this object + // Counter for the number of tasks that will use local tiles of arg_ ordinal_type task_count = 0ul; - // Make sure all local tiles are present. + // now create tasks that will produce result tiles and push them to the + // destination N.B. data is pushed, rather than pulled, to be able to manage + // the lifetime of the argument const typename pmap_interface::const_iterator end = arg_.pmap()->end(); typename pmap_interface::const_iterator it = arg_.pmap()->begin(); for (; it != end; ++it) { @@ -165,8 +173,10 @@ class UnaryEvalImpl if (!arg_.is_zero(index)) { // Get target tile index const auto target_index = DistEvalImpl_::perm_index_to_target(index); + TA_ASSERT(!this->is_zero(target_index)); // Schedule tile evaluation task + TA_ASSERT(arg_.is_local(index)); #ifdef TILEDARRAY_HAS_DEVICE TensorImpl_::world().taskq.add(self, &UnaryEvalImpl_::template eval_tile<>, @@ -175,12 +185,18 @@ class UnaryEvalImpl TensorImpl_::world().taskq.add(self, &UnaryEvalImpl_::eval_tile, target_index, arg_.get(index)); #endif - +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + arg_ntiles_used_++; +#endif ++task_count; } } // Wait for local tiles of argument to be evaluated +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + TA_ASSERT(arg_.local_nnz() == arg_ntiles_used_); + TA_ASSERT(arg_.task_count() >= arg_ntiles_used_); +#endif // arg_.wait(); return task_count; @@ -188,7 +204,14 @@ class UnaryEvalImpl arg_type arg_; ///< Argument op_type op_; ///< The unary tile operation -}; // class UnaryEvalImpl + +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + // artifacts of tracing/debugging + mutable ordinal_type arg_ntiles_used_; // # of tiles used from arg_ ; N.B. no + // tiles are discarded! +#endif + +}; // class UnaryEvalImpl } // namespace detail } // namespace TiledArray From 78e8ad3d7df467b9a283ff7c7bd2dfa8608e7d77 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 7 Jan 2024 16:43:47 -0500 Subject: [PATCH 4/7] DistributedStorage::get() can use (2 types of) caching if requested by user if configured with TA_TRACE_GLOBAL_COMM_STATS will collect stats of DistributedStorage comms --- src/TiledArray/array_impl.h | 13 +- src/TiledArray/distributed_storage.h | 224 ++++++++++++++++++++++++--- src/TiledArray/expressions/expr.h | 4 + 3 files changed, 222 insertions(+), 19 deletions(-) diff --git a/src/TiledArray/array_impl.h b/src/TiledArray/array_impl.h index beb8ba3e09..e5ad9d5db9 100644 --- a/src/TiledArray/array_impl.h +++ b/src/TiledArray/array_impl.h @@ -636,7 +636,18 @@ class ArrayImpl : public TensorImpl { /// DistributedStorage /// @return const reference to the atomic counter of live DelayedSet requests - const madness::AtomicInt& num_live_ds() const { return data_.num_live_ds(); } + const std::atomic& num_live_ds() const { + return data_.num_live_ds(); + } + + /// Reports the number of live DelayedForward requests for this object's + /// DistributedStorage + + /// @return const reference to the atomic counter of live DelayedForward + /// requests + const std::atomic& num_live_df() const { + return data_.num_live_df(); + } }; // class ArrayImpl diff --git a/src/TiledArray/distributed_storage.h b/src/TiledArray/distributed_storage.h index 47c52ead2a..60eb715c34 100644 --- a/src/TiledArray/distributed_storage.h +++ b/src/TiledArray/distributed_storage.h @@ -23,6 +23,17 @@ #include namespace TiledArray { + +/// Describes how to get remote data +enum class RemoteDataGetPolicy { + /// no caching = each get will trigger data fetch + nocache, + /// aggregate gets until data arrives, subsequent gets will trigger new gets + aggregate, + /// get once, read forever + cache +}; + namespace detail { /// Distributed storage container. @@ -41,7 +52,7 @@ namespace detail { /// thread. DO NOT construct world objects within tasks where the order of /// execution is nondeterministic. template -class DistributedStorage : public madness::WorldObject > { +class DistributedStorage : public madness::WorldObject> { public: typedef DistributedStorage DistributedStorage_; ///< This object type typedef madness::WorldObject @@ -64,8 +75,22 @@ class DistributedStorage : public madness::WorldObject > { ///< stored by this container std::shared_ptr pmap_; ///< The process map that defines the element distribution - mutable container_type data_; ///< The local data container - madness::AtomicInt num_live_ds_; ///< Number of live DelayedSet objects + mutable container_type data_; ///< The local data container + + // tracing/defensive driving artifacts + mutable std::atomic + num_live_ds_; ///< Number of live DelayedSet objects + mutable std::atomic + num_live_df_; ///< Number of live DelayedForward objects +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + mutable std::vector> + ngets_served_per_rank_; ///< Counts # of gets served to remote ranks + mutable std::vector> + ngets_sent_per_rank_; ///< Counts # of gets sent to remote ranks + mutable std::vector> + ngets_received_per_rank_; ///< Counts # of gets received from remote + ///< ranks +#endif // not allowed DistributedStorage(const DistributedStorage_&); @@ -120,6 +145,124 @@ class DistributedStorage : public madness::WorldObject > { }; // struct DelayedSet friend struct DelayedSet; + /// Tile cache works just like madness::detail::DistCache (and in fact is + /// based on it) in that it implements a local cache for asynchronous data + /// pulls. Unlike madness::detail::DistCache: + /// - this is unidirectional, i.e. there is no need to manually push data into + /// the cache (a task sending data + /// will be posted). + /// - depending on get policy data will either stay in the cache forever or + /// will be discarded upon arrival; + /// subsequent gets will need to fetch the data again (may make this + /// user-controllable in the future) + mutable container_type remote_data_cache_; + + /// Get the cache value accosted with \c key + + /// This will get the value associated with \c key to \c value. If + /// the cache element does not exist, a task requesting the data will be sent + /// to the owner, a future referring to the result will be inserted in the + /// cache so that the subsequent gets will receive the same data. After data + /// arrival the future will be removed from the cache, thus subsequent gets + /// will need to fetch the data again. \param[in] key The target key \return A + /// future that holds/will hold the cache value + future get_cached(const key_type& key, bool keep_in_cache = false) const { + // Retrieve the cached future + typename container_type::const_accessor acc; + if (remote_data_cache_.insert( + acc, key)) { // no future in cache yet, create a task + static_assert(std::is_signed_v); + const ProcessID rank = this->get_world().rank(); + ProcessID rank_w_persistence = keep_in_cache ? rank : -(rank + 1); + WorldObject_::task(owner(key), &DistributedStorage_::get_cached_handler, + key, rank_w_persistence, + madness::TaskAttributes::hipri()); +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + ngets_sent_per_rank_.at(owner(key))++; +#endif + } + return acc->second; + } + + /// used to forward data that were unassigned at the time of request arrival + struct DelayedForward : public madness::CallbackInterface { + public: + DelayedForward(const DistributedStorage_& ds, key_type key, + ProcessID destination_rank, bool keep_in_cache) + : ds(ds), + key(key), + destination_rank(destination_rank), + keep_in_cache(keep_in_cache) {} + + void notify() override { + auto& data_fut = ds.get_local(key); + TA_ASSERT( + data_fut.probe()); // must be ready, otherwise why is this invoked? + if (keep_in_cache) { + ds.task(destination_rank, + &DistributedStorage_::template set_cached_handler, key, + data_fut, madness::TaskAttributes::hipri()); + } else { + ds.task(destination_rank, + &DistributedStorage_::template set_cached_handler, key, + data_fut, madness::TaskAttributes::hipri()); + } + delete this; + } + + private: + const DistributedStorage_& ds; + key_type key; + ProcessID destination_rank; + bool keep_in_cache; + }; + + void get_cached_handler(const size_type key, + ProcessID destination_rank_w_persistence) const { + const bool keep_in_cache = destination_rank_w_persistence >= 0; + const ProcessID destination_rank = + destination_rank_w_persistence < 0 + ? (-destination_rank_w_persistence - 1) + : destination_rank_w_persistence; +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + ngets_served_per_rank_.at(destination_rank)++; +#endif + auto& data_fut = get_local(key); + if (data_fut.probe()) { + if (keep_in_cache) { + WorldObject_::task( + destination_rank, + &DistributedStorage_::template set_cached_handler, key, + data_fut, madness::TaskAttributes::hipri()); + } else { + WorldObject_::task( + destination_rank, + &DistributedStorage_::template set_cached_handler, key, + data_fut, madness::TaskAttributes::hipri()); + } + } else { // data not ready yet, defer send to a callback (maybe task??) + const_cast(data_fut).register_callback( + new DelayedForward(*this, key, destination_rank, keep_in_cache)); + } + } + + template + void set_cached_handler(const size_type key, const value_type& datum) const { + // assign the future first, then remove from the cache + typename container_type::accessor acc; + [[maybe_unused]] const bool inserted = remote_data_cache_.insert(acc, key); + // future must be in cache + TA_ASSERT(!inserted); + // assign it + acc->second.set(datum); + // remove it from the cache + if constexpr (!KeepInCache) remote_data_cache_.erase(acc); + +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + ngets_received_per_rank_.at(this->owner(key))++; +#endif + } + public: /// Makes an initialized, empty container with default data distribution (no /// communication) @@ -136,23 +279,47 @@ class DistributedStorage : public madness::WorldObject > { : WorldObject_(world), max_size_(max_size), pmap_(pmap), - data_((max_size / world.size()) + 11) { + data_((max_size / world.size()) + 11) +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + , + ngets_served_per_rank_(world.size()), + ngets_sent_per_rank_(world.size()), + ngets_received_per_rank_(world.size()) +#endif + { // Check that the process map is appropriate for this storage object TA_ASSERT(pmap_); TA_ASSERT(pmap_->size() == max_size); TA_ASSERT(pmap_->rank() == pmap_interface::size_type(world.rank())); TA_ASSERT(pmap_->procs() == pmap_interface::size_type(world.size())); num_live_ds_ = 0; + num_live_df_ = 0; +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + for (auto rank = 0; rank != world.size(); ++rank) { + ngets_served_per_rank_[rank] = 0; + ngets_sent_per_rank_[rank] = 0; + ngets_received_per_rank_[rank] = 0; + } +#endif WorldObject_::process_pending(); } virtual ~DistributedStorage() { if (num_live_ds_ != 0) { - madness::print_error( - "DistributedStorage (object id=", this->id(), - ") destroyed while " - "outstanding tasks exist. Add a fence() to extend the lifetime of " - "this object."); + madness::print_error("DistributedStorage (object id=", this->id(), + ") destroyed while " + "pending tasks that set its data exist. Add a " + "fence() to extend the lifetime of " + "this object."); + abort(); + } + if (num_live_df_ != 0) { + madness::print_error("DistributedStorage (object id=", this->id(), + ") destroyed while " + "pending callbacks that forward its data to other " + "ranks exist. This may indicate a bug in your " + "program or you may need to extend the lifetime of " + "this object."); abort(); } } @@ -207,18 +374,21 @@ class DistributedStorage : public madness::WorldObject > { /// \return A future to element \c i /// \throw TiledArray::Exception If \c i is greater than or equal to \c /// max_size() . - future get(size_type i) const { + future get(size_type i, + RemoteDataGetPolicy policy = RemoteDataGetPolicy::nocache) const { TA_ASSERT(i < max_size_); if (is_local(i)) { return get_local(i); } else { - // Send a request to the owner of i for the element. - future result; - WorldObject_::task(owner(i), &DistributedStorage_::get_handler, i, - result.remote_ref(get_world()), - madness::TaskAttributes::hipri()); - - return result; + if (policy == RemoteDataGetPolicy::nocache) { + // Send a request to the owner of i for the element. + future result; + WorldObject_::task(owner(i), &DistributedStorage_::get_handler, i, + result.remote_ref(get_world()), + madness::TaskAttributes::hipri()); + return result; + } else + return get_cached(i, policy == RemoteDataGetPolicy::cache); } } @@ -343,7 +513,25 @@ class DistributedStorage : public madness::WorldObject > { /// Reports the number of live DelayedSet requests /// @return const reference to the atomic counter of live DelayedSet requests - const madness::AtomicInt& num_live_ds() const { return num_live_ds_; } + const std::atomic& num_live_ds() const { return num_live_ds_; } + + /// Reports the number of live DelayedForward requests + + /// @return const reference to the atomic counter of live DelayedForward + /// requests + const std::atomic& num_live_df() const { return num_live_df_; } + +#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE + const std::vector>& ngets_served_per_rank() const { + return ngets_served_per_rank_; + } + const std::vector>& ngets_sent_per_rank() const { + return ngets_sent_per_rank_; + } + const std::vector>& ngets_received_per_rank() const { + return ngets_received_per_rank_; + } +#endif }; // class DistributedStorage } // namespace detail diff --git a/src/TiledArray/expressions/expr.h b/src/TiledArray/expressions/expr.h index 72ad9a42cd..f77d13dbad 100644 --- a/src/TiledArray/expressions/expr.h +++ b/src/TiledArray/expressions/expr.h @@ -420,6 +420,10 @@ class Expr { dist_eval.wait(); // Swap the new array with the result array object. result.swap(tsr.array()); + +#if 0 + std::cout << "array.id()=" << tsr.array().id() << " evaluated using dist_eval.id=" << dist_eval.id() << std::endl; +#endif } /// Evaluate this object and assign it to \c tsr From 989fd8e6549aaa2bb4e6017f991110c31567ba58 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Sun, 7 Jan 2024 16:46:25 -0500 Subject: [PATCH 5/7] bump MADNESS tag to pull in https://github.com/m-a-d-n-e-s-s/madness/pull/516 which fixes hangs in applications with large number of tasks --- INSTALL.md | 2 +- external/versions.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index c3b7b0659f..c48f0c19b6 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -42,7 +42,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - Boost.Range: header-only, *only used for unit testing* - [BTAS](http://github.com/ValeevGroup/BTAS), tag bf0c376d5cdd6f668174b2a4c67b19634d1c0da7 . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. -- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag cf3c98053453329f35b775c8b9f561301f6a997e . +- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag b1f1c39c497b86ab3ef4e560a686de63eb555cc4 . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. If usable MADNESS installation is not found, TiledArray will download and compile MADNESS from source. *This is the recommended way to compile MADNESS for all users*. diff --git a/external/versions.cmake b/external/versions.cmake index 9499354eba..5255df9780 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -19,8 +19,8 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7) set(TA_INSTALL_EIGEN_URL_HASH SHA256=b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626) set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH MD5=b9e98a200d2455f06db9c661c5610496) -set(TA_TRACKED_MADNESS_TAG cf3c98053453329f35b775c8b9f561301f6a997e) -set(TA_TRACKED_MADNESS_PREVIOUS_TAG 0cb3920715c9a659bbb8158f9a31db1bd97d4614) +set(TA_TRACKED_MADNESS_TAG b1f1c39c497b86ab3ef4e560a686de63eb555cc4) +set(TA_TRACKED_MADNESS_PREVIOUS_TAG cf3c98053453329f35b775c8b9f561301f6a997e) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) From 2e4572af6dae9c2ed92a3ace8807925f9acf99a3 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 8 Jan 2024 01:51:08 -0500 Subject: [PATCH 6/7] patch Umpire to address https://github.com/LLNL/Umpire/issues/616 --- external/umpire.cmake | 2 ++ external/umpire.finalize_io.patch | 47 +++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 external/umpire.finalize_io.patch diff --git a/external/umpire.cmake b/external/umpire.cmake index 581839223a..c7a02d65bf 100644 --- a/external/umpire.cmake +++ b/external/umpire.cmake @@ -170,6 +170,8 @@ else() DOWNLOAD_DIR ${EXTERNAL_SOURCE_DIR} GIT_REPOSITORY ${UMPIRE_URL} GIT_TAG ${UMPIRE_TAG} + #--Patch step----------------- + PATCH_COMMAND patch -p1 -i ${CMAKE_CURRENT_SOURCE_DIR}/external/umpire.finalize_io.patch #--Configure step------------- SOURCE_DIR ${EXTERNAL_SOURCE_DIR} LIST_SEPARATOR :: diff --git a/external/umpire.finalize_io.patch b/external/umpire.finalize_io.patch new file mode 100644 index 0000000000..fa78727d7f --- /dev/null +++ b/external/umpire.finalize_io.patch @@ -0,0 +1,47 @@ +diff --git a/src/umpire/util/io.cpp b/src/umpire/util/io.cpp +index 806fb9e3..551c5e82 100644 +--- a/src/umpire/util/io.cpp ++++ b/src/umpire/util/io.cpp +@@ -52,10 +52,23 @@ std::ostream& error() + + namespace util { + ++namespace detail { ++OutputBuffer& s_log_buffer_accessor() ++{ ++ static OutputBuffer buffer; ++ return buffer; ++} ++OutputBuffer& s_error_buffer_accessor() ++{ ++ static OutputBuffer buffer; ++ return buffer; ++} ++} ++ + void initialize_io(const bool enable_log) + { +- static util::OutputBuffer s_log_buffer; +- static util::OutputBuffer s_error_buffer; ++ OutputBuffer& s_log_buffer = detail::s_log_buffer_accessor(); ++ OutputBuffer& s_error_buffer = detail::s_error_buffer_accessor(); + + s_log_buffer.setConsoleStream(nullptr); + s_error_buffer.setConsoleStream(&std::cerr); +@@ -121,6 +134,16 @@ void initialize_io(const bool enable_log) + MPI::logMpiInfo(); + } + ++void finalize_io() ++{ ++ detail::s_log_buffer_accessor().sync(); ++ detail::s_log_buffer_accessor().setConsoleStream(nullptr); ++ detail::s_log_buffer_accessor().setFileStream(nullptr); ++ detail::s_error_buffer_accessor().sync(); ++ detail::s_error_buffer_accessor().setConsoleStream(nullptr); ++ detail::s_error_buffer_accessor().setFileStream(nullptr); ++} ++ + void flush_files() + { + log().flush(); From c96d357f1f6d138bae7085362ff82637b73a24ed Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Mon, 8 Jan 2024 17:35:15 -0500 Subject: [PATCH 7/7] fixup https://github.com/ValeevGroup/tiledarray/commit/56e0e2efb82570cfc24b5745874fd6c30b4ef1a3 --- src/TiledArray/dist_eval/array_eval.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TiledArray/dist_eval/array_eval.h b/src/TiledArray/dist_eval/array_eval.h index 6dade3dc2b..a4cbdc47b1 100644 --- a/src/TiledArray/dist_eval/array_eval.h +++ b/src/TiledArray/dist_eval/array_eval.h @@ -337,7 +337,7 @@ class ArrayEvalImpl Future result; bool task_created = false; if (arg_tile_is_remote) { - TA_ASSERT(arg_tile_owner != this->world().rank()); + TA_ASSERT(arg_tile_owner != array_.world().rank()); #ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE ntiles_pending_[arg_tile_owner]++; #endif @@ -355,7 +355,7 @@ class ArrayEvalImpl #endif ); } else { - TA_ASSERT(arg_tile_owner == this->world().rank()); + TA_ASSERT(arg_tile_owner == array_.world().rank()); std::tie(result, task_created) = eval_tile(array_.find_local(array_index), /* consumable_tile = */ false #ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE