Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into gaudel/feature/tot_…
Browse files Browse the repository at this point in the history
…inner_tensor_ops
  • Loading branch information
bimalgaudel committed Jan 12, 2024
2 parents c7d43a6 + dee87e8 commit 13ab813
Show file tree
Hide file tree
Showing 14 changed files with 673 additions and 67 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,10 @@ if(TA_ENABLE_TILE_OPS_LOGGING AND NOT DEFINED TA_TILE_OPS_LOG_LEVEL)
set(TA_TILE_OPS_LOG_LEVEL 1)
endif(TA_ENABLE_TILE_OPS_LOGGING AND NOT DEFINED TA_TILE_OPS_LOG_LEVEL)

option(TA_TRACE_GLOBAL_COMM_STATS "Enable tracing of communication stats of global objects (DistEval's and DIstributedStorage) TiledArray" OFF)
add_feature_info(TASK_TRACE_DEBUG TA_TRACE_GLOBAL_COMM_STATS "Debug communication stats of global objects (DistEval's and DIstributedStorage) TiledArray")
set(TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE ${TA_TRACE_GLOBAL_COMM_STATS})

option(TA_RANGEV3 "Enable Range-V3 library" OFF)
add_feature_info(TA_RANGEV3 TA_RANGEV3 "Range-V3 ranges library")

Expand Down
2 changes: 1 addition & 1 deletion INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b
- Boost.Range: header-only, *only used for unit testing*
- [BTAS](http://github.com/ValeevGroup/BTAS), tag bf0c376d5cdd6f668174b2a4c67b19634d1c0da7 . If usable BTAS installation is not found, TiledArray will download and compile
BTAS from source. *This is the recommended way to compile BTAS for all users*.
- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag cf3c98053453329f35b775c8b9f561301f6a997e .
- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag b1f1c39c497b86ab3ef4e560a686de63eb555cc4 .
Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray.
If usable MADNESS installation is not found, TiledArray will download and compile
MADNESS from source. *This is the recommended way to compile MADNESS for all users*.
Expand Down
40 changes: 28 additions & 12 deletions external/umpire.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@ if(_UMPIRE_INSTALL_DIR)
# find_package(umpire REQUIRED)
message(STATUS "Umpire found at ${_UMPIRE_INSTALL_DIR}")

add_library(TiledArray_UMPIRE INTERFACE)

set_target_properties(
TiledArray_UMPIRE
PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES
"${_UMPIRE_INSTALL_DIR}/include"
INTERFACE_LINK_LIBRARIES
"umpire"
INTERFACE_LINK_DIRECTORIES
"${_UMPIRE_INSTALL_DIR}/lib/"
)

install(TARGETS TiledArray_UMPIRE EXPORT tiledarray COMPONENT tiledarray)

elseif(TA_EXPERT)

message("** Umpire was not found")
Expand Down Expand Up @@ -155,6 +170,8 @@ else()
DOWNLOAD_DIR ${EXTERNAL_SOURCE_DIR}
GIT_REPOSITORY ${UMPIRE_URL}
GIT_TAG ${UMPIRE_TAG}
#--Patch step-----------------
PATCH_COMMAND patch -p1 -i ${CMAKE_CURRENT_SOURCE_DIR}/external/umpire.finalize_io.patch
#--Configure step-------------
SOURCE_DIR ${EXTERNAL_SOURCE_DIR}
LIST_SEPARATOR ::
Expand Down Expand Up @@ -191,23 +208,22 @@ else()

set(_UMPIRE_INSTALL_DIR ${EXTERNAL_INSTALL_DIR})

endif(_UMPIRE_INSTALL_DIR)

# manually add Umpire library

add_library(TiledArray_UMPIRE INTERFACE)
add_library(TiledArray_UMPIRE INTERFACE)

set_target_properties(
TiledArray_UMPIRE
PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES
"$<BUILD_INTERFACE:${EXTERNAL_SOURCE_DIR}/src>;$<BUILD_INTERFACE:${EXTERNAL_SOURCE_DIR}/src/tpl>;$<BUILD_INTERFACE:${EXTERNAL_SOURCE_DIR}/src/tpl/umpire/camp/include>;$<BUILD_INTERFACE:${EXTERNAL_BUILD_DIR}/src/tpl/umpire/camp/include>;$<BUILD_INTERFACE:${EXTERNAL_BUILD_DIR}/include>;$<INSTALL_INTERFACE:${_UMPIRE_INSTALL_DIR}/include>"
INTERFACE_LINK_LIBRARIES
"$<BUILD_INTERFACE:${UMPIRE_BUILD_BYPRODUCTS}>;$<INSTALL_INTERFACE:${_UMPIRE_INSTALL_DIR}/lib/libumpire${UMPIRE_DEFAULT_LIBRARY_SUFFIX}>"
)
set_target_properties(
TiledArray_UMPIRE
PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES
"$<BUILD_INTERFACE:${EXTERNAL_SOURCE_DIR}/src>;$<BUILD_INTERFACE:${EXTERNAL_SOURCE_DIR}/src/tpl>;$<BUILD_INTERFACE:${EXTERNAL_SOURCE_DIR}/src/tpl/umpire/camp/include>;$<BUILD_INTERFACE:${EXTERNAL_BUILD_DIR}/src/tpl/umpire/camp/include>;$<BUILD_INTERFACE:${EXTERNAL_BUILD_DIR}/include>;$<INSTALL_INTERFACE:${_UMPIRE_INSTALL_DIR}/include>"
INTERFACE_LINK_LIBRARIES
"$<BUILD_INTERFACE:${UMPIRE_BUILD_BYPRODUCTS}>;$<INSTALL_INTERFACE:${_UMPIRE_INSTALL_DIR}/lib/libumpire${UMPIRE_DEFAULT_LIBRARY_SUFFIX}>"
)

install(TARGETS TiledArray_UMPIRE EXPORT tiledarray COMPONENT tiledarray)

endif(_UMPIRE_INSTALL_DIR)

#TODO test Umpire

endif(NOT TARGET TiledArray_UMPIRE)
47 changes: 47 additions & 0 deletions external/umpire.finalize_io.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
diff --git a/src/umpire/util/io.cpp b/src/umpire/util/io.cpp
index 806fb9e3..551c5e82 100644
--- a/src/umpire/util/io.cpp
+++ b/src/umpire/util/io.cpp
@@ -52,10 +52,23 @@ std::ostream& error()

namespace util {

+namespace detail {
+OutputBuffer& s_log_buffer_accessor()
+{
+ static OutputBuffer buffer;
+ return buffer;
+}
+OutputBuffer& s_error_buffer_accessor()
+{
+ static OutputBuffer buffer;
+ return buffer;
+}
+}
+
void initialize_io(const bool enable_log)
{
- static util::OutputBuffer s_log_buffer;
- static util::OutputBuffer s_error_buffer;
+ OutputBuffer& s_log_buffer = detail::s_log_buffer_accessor();
+ OutputBuffer& s_error_buffer = detail::s_error_buffer_accessor();

s_log_buffer.setConsoleStream(nullptr);
s_error_buffer.setConsoleStream(&std::cerr);
@@ -121,6 +134,16 @@ void initialize_io(const bool enable_log)
MPI::logMpiInfo();
}

+void finalize_io()
+{
+ detail::s_log_buffer_accessor().sync();
+ detail::s_log_buffer_accessor().setConsoleStream(nullptr);
+ detail::s_log_buffer_accessor().setFileStream(nullptr);
+ detail::s_error_buffer_accessor().sync();
+ detail::s_error_buffer_accessor().setConsoleStream(nullptr);
+ detail::s_error_buffer_accessor().setFileStream(nullptr);
+}
+
void flush_files()
{
log().flush();
4 changes: 2 additions & 2 deletions external/versions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7)
set(TA_INSTALL_EIGEN_URL_HASH SHA256=b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626)
set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH MD5=b9e98a200d2455f06db9c661c5610496)

set(TA_TRACKED_MADNESS_TAG cf3c98053453329f35b775c8b9f561301f6a997e)
set(TA_TRACKED_MADNESS_PREVIOUS_TAG 0cb3920715c9a659bbb8158f9a31db1bd97d4614)
set(TA_TRACKED_MADNESS_TAG b1f1c39c497b86ab3ef4e560a686de63eb555cc4)
set(TA_TRACKED_MADNESS_PREVIOUS_TAG cf3c98053453329f35b775c8b9f561301f6a997e)
set(TA_TRACKED_MADNESS_VERSION 0.10.1)
set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1)

Expand Down
13 changes: 12 additions & 1 deletion src/TiledArray/array_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,18 @@ class ArrayImpl : public TensorImpl<Policy> {
/// DistributedStorage

/// @return const reference to the atomic counter of live DelayedSet requests
const madness::AtomicInt& num_live_ds() const { return data_.num_live_ds(); }
const std::atomic<std::size_t>& num_live_ds() const {
return data_.num_live_ds();
}

/// Reports the number of live DelayedForward requests for this object's
/// DistributedStorage

/// @return const reference to the atomic counter of live DelayedForward
/// requests
const std::atomic<std::size_t>& num_live_df() const {
return data_.num_live_df();
}

}; // class ArrayImpl

Expand Down
3 changes: 3 additions & 0 deletions src/TiledArray/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@
#cmakedefine TA_ENABLE_TILE_OPS_LOGGING 1
#define TA_TILE_OPS_LOG_LEVEL 0@TA_TILE_OPS_LOG_LEVEL@

/* Enables collection of communication statistics for global objects (DistEval and DistributedStorage) */
#cmakedefine TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE 1

/* ----------- pragma helpers ---------------*/
#define TILEDARRAY_PRAGMA(x) _Pragma(#x)
/* same as TILEDARRAY_PRAGMA(x), but expands x */
Expand Down
160 changes: 148 additions & 12 deletions src/TiledArray/dist_eval/array_eval.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,26 @@ class ArrayEvalImpl
std::shared_ptr<op_type> op_; ///< The tile operation
BlockRange block_range_; ///< Sub-block range

#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
// tracing artifacts
using pending_counter_t = std::atomic<std::size_t>[]; // 1 counter per rank
mutable std::shared_ptr<pending_counter_t>
ntiles_pending_; // number of pending tiles from each rank
mutable std::shared_ptr<pending_counter_t>
ntasks_pending_; // number of pending tasks using data from each rank

struct AtomicCounterDecreaser : public madness::CallbackInterface {
std::shared_ptr<std::atomic<std::size_t>> counter;

AtomicCounterDecreaser(std::shared_ptr<std::atomic<std::size_t>> counter)
: counter(std::move(counter)) {}
void notify() override {
--(*counter);
delete this;
}
};
#endif

public:
/// Construct with full array range

Expand All @@ -217,7 +237,28 @@ class ArrayEvalImpl
: DistEvalImpl_(world, trange, shape, pmap, outer(perm)),
array_(array),
op_(std::make_shared<op_type>(op)),
block_range_() {}
block_range_()
#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
,
ntiles_pending_(new std::atomic<std::size_t>[world.size()]),
ntasks_pending_(new std::atomic<std::size_t>[world.size()])
#endif
{
#if 0
std::stringstream ss;
ss << "ArrayEvalImpl: id=" << this->id();
if (array_) ss << " array.id()=" << array_.id();
ss << "\n";
std::cout << ss.str();
#endif

#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
for (auto rank = 0; rank != world.size(); ++rank) {
ntiles_pending_[rank] = 0;
ntasks_pending_[rank] = 0;
}
#endif
}

/// Constructor with sub-block range

Expand Down Expand Up @@ -245,10 +286,42 @@ class ArrayEvalImpl
: DistEvalImpl_(world, trange, shape, pmap, outer(perm)),
array_(array),
op_(std::make_shared<op_type>(op)),
block_range_(array.trange().tiles_range(), lower_bound, upper_bound) {}
block_range_(array.trange().tiles_range(), lower_bound, upper_bound)
#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
,
ntiles_pending_(new std::atomic<std::size_t>[world.size()]),
ntasks_pending_(new std::atomic<std::size_t>[world.size()])
#endif
{
#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
for (auto rank = 0; rank != world.size(); ++rank) {
ntiles_pending_[rank] = 0;
ntasks_pending_[rank] = 0;
}
#endif
}

/// Virtual destructor
virtual ~ArrayEvalImpl() {}
virtual ~ArrayEvalImpl() {
#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
if (std::find_if(ntiles_pending_.get(),
ntiles_pending_.get() + this->world().size(),
[](const auto& v) { return v != 0; }) !=
ntiles_pending_.get() + this->world().size()) {
madness::print_error(
"ArrayEvalImpl: pending tiles at destruction! (id=", this->id(), ")");
abort();
}
if (std::find_if(ntasks_pending_.get(),
ntasks_pending_.get() + this->world().size(),
[](const auto& v) { return v != 0; }) !=
ntasks_pending_.get() + this->world().size()) {
madness::print_error(
"ArrayEvalImpl: pending tasks at destruction! (id=", this->id(), ")");
abort();
}
#endif
}

Future<value_type> get_tile(ordinal_type i) const override {
// Get the array index that corresponds to the target index
Expand All @@ -258,15 +331,49 @@ class ArrayEvalImpl
// index to the correct location.
if (block_range_.rank()) array_index = block_range_.ordinal(array_index);

// Get the tile from array_, which may be located on a remote node.
Future<typename array_type::value_type> tile = array_.find(array_index);
const bool arg_tile_is_remote = !array_.is_local(array_index);
const ProcessID arg_tile_owner = array_.owner(array_index);

const bool consumable_tile = !array_.is_local(array_index);

return eval_tile(tile, consumable_tile);
Future<value_type> result;
bool task_created = false;
if (arg_tile_is_remote) {
TA_ASSERT(arg_tile_owner != array_.world().rank());
#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
ntiles_pending_[arg_tile_owner]++;
#endif
auto arg_tile = array_.find(array_index);
#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
arg_tile.register_callback(
new AtomicCounterDecreaser(std::shared_ptr<std::atomic<std::size_t>>(
ntiles_pending_, ntiles_pending_.get() + arg_tile_owner)));
#endif
std::tie(result, task_created) =
eval_tile(arg_tile, /* consumable_tile = */ true
#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
,
arg_tile_owner
#endif
);
} else {
TA_ASSERT(arg_tile_owner == array_.world().rank());
std::tie(result, task_created) = eval_tile(array_.find_local(array_index),
/* consumable_tile = */ false
#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
,
arg_tile_owner
#endif
);
}
#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
TA_ASSERT(ntiles_pending_[this->world().rank()] == 0);
// even if data is local we may have created a task to evaluate it
// TA_ASSERT(ntasks_pending_[this->world().rank()] == 0);
#endif
return result;
}

void discard_tile(ordinal_type i) const override {
TA_ASSERT(this->is_local(i));
const_cast<ArrayEvalImpl_*>(this)->notify();
}

Expand All @@ -277,23 +384,36 @@ class ArrayEvalImpl
}

/// Evaluate a single LazyArrayTile
madness::Future<value_type> eval_tile(
/// @return A pair of the future to the tile and a boolean indicating whether
/// a task was created to produce the tile
[[nodiscard]] std::pair<madness::Future<value_type>, bool> eval_tile(
const madness::Future<typename array_type::value_type>& tile,
const bool consumable_tile) const {
const bool consumable_tile
#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
,
const ProcessID tile_owner
#endif
) const {
// Insert the tile into this evaluator for subsequent processing
if (tile.probe()) {
// Skip the task since the tile is ready
Future<value_type> result;
result.set(make_tile(tile, consumable_tile));
const_cast<ArrayEvalImpl_*>(this)->notify();
return result;
return {result, false};
} else {
// Spawn a task to set the tile when the input tile is not ready.
Future<value_type> result = TensorImpl_::world().taskq.add(
shared_from_this(), &ArrayEvalImpl_::make_tile, tile, consumable_tile,
madness::TaskAttributes::hipri());
#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
ntasks_pending_[tile_owner]++;
result.register_callback(
new AtomicCounterDecreaser(std::shared_ptr<std::atomic<std::size_t>>(
ntasks_pending_, ntasks_pending_.get() + tile_owner)));
#endif
result.register_callback(const_cast<ArrayEvalImpl_*>(this));
return result;
return {result, true};
}
}
/// Evaluate the tiles of this tensor
Expand All @@ -303,6 +423,22 @@ class ArrayEvalImpl
/// \return The number of tiles that will be set by this process
int internal_eval() override { return TensorImpl_::local_nnz(); }

#ifdef TILEDARRAY_ENABLE_GLOBAL_COMM_STATS_TRACE
std::string status() const override {
std::stringstream ss;
ss << "ArrayEvalImpl: array.id()=" << array_.id();
ss << " ntiles_pending=[";
for (auto rank = 0; rank != this->world().size(); ++rank) {
ss << " " << ntiles_pending_[rank];
}
ss << "] ntasks_pending=[";
for (auto rank = 0; rank != this->world().size(); ++rank) {
ss << " " << ntasks_pending_[rank];
}
ss << "]\n";
return ss.str();
}
#endif
}; // class ArrayEvalImpl

} // namespace detail
Expand Down
Loading

0 comments on commit 13ab813

Please sign in to comment.