Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PFA annotations #8

Open
wants to merge 8 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,24 @@ $ mkdir build; cd build
$ cmake \
-DCMAKE_CXX_COMPILER=clang++ \
-DCMAKE_C_COMPILER=clang \
-DWITH_HDF5=On -DHDF5_Dir=$AMS_HDF5_PATH \
-DCMAKE_PREFIX_PATH=$INSTALL_DIR \
-DWITH_DB=On -DWITH_RMQ=On \
-Damqpcpp_DIR=$AMS_AMQPCPP_PATH \
-DBUILD_SHARED_LIBS=On \
-DCMAKE_INSTALL_PREFIX=./install \
-DCMAKE_BUILD_TYPE=Release \
-DWITH_EXAMPLES=On \
-DMFEM_DIR=$AMS_MFEM_PATH \
-DUMPIRE_DIR=$AMS_UMPIRE_PATH \
-DWITH_MPI=On \
-DWITH_CUDA=On \
-DWITH_CALIPER=On \
-DWITH_TORCH=On -DTorch_DIR=$AMS_TORCH_PATH \
-DWITH_TESTS=Off \
-DWITH_FAISS=On -DFAISS_DIR=$AMS_FAISS_PATH \
-DAMS_CUDA_ARCH=${AMS_CUDA_ARCH} \
-DWITH_AMS_DEBUG=On \
-DWITH_PERFFLOWASPECT=On \
-Dperfflowaspect_DIR=$AMS_PFA_PATH/share \
../
Expand Down
2 changes: 2 additions & 0 deletions examples/app/utils_mfem.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ using dt3 = mfem::DeviceTensor<3, T>;


template <typename Tin, typename Tout>
__attribute__((annotate("@critical_path(pointcut='around')")))
Copy link
Member

@koparasy koparasy Oct 2, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not using the macro (PERFFASPECT) here?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The macro was causing error here. I think its scope is within the src directory, though I could be wrong.

static inline void pack_ij(const int k,
const int sz_i,
const int sz_sparse_j,
Expand All @@ -63,6 +64,7 @@ static inline void pack_ij(const int k,
}

template <typename Tin, typename Tout>
__attribute__((annotate("@critical_path(pointcut='around')")))
static inline void unpack_ij(const int k,
const int sz_i,
const int sz_sparse_j,
Expand Down
1 change: 1 addition & 0 deletions examples/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ double unitrand() { return (double)rand() / RAND_MAX; }

// TODO: we could to this on the device but need something more than `rand'
template <typename T>
PERFFASPECT()
void random_init(mfem::Array<T> &arr)
{
T *h_arr = arr.HostWrite();
Expand Down
1 change: 1 addition & 0 deletions src/AMSlib/wf/basedb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,7 @@ class RedisDB : public BaseDB<TypeValue>
return connection_info;
}

PERFFASPECT()
void store(size_t num_elements,
std::vector<TypeValue*>& inputs,
std::vector<TypeValue*>& outputs)
Expand Down
6 changes: 6 additions & 0 deletions src/AMSlib/wf/redist_load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class AMSLoadBalancer
* It initializes the 'dataElements', 'displs' on the root node and the localLoad, balancedLoad
* across all ranks.
*/
PERFFASPECT()
void init(int numIn, int numOut, AMSResourceType resource)
{
// We need to store information
Expand Down Expand Up @@ -176,6 +177,7 @@ class AMSLoadBalancer
/** @brief Computes the number of elements every rank will receive after balancing.
* @returns the number of elements computed by this rank.
**/
PERFFASPECT()
int computeBalanceLoad()
{
int rc = MPI_Bcast(&globalLoad, 1, MPI_INT, root, Comm);
Expand Down Expand Up @@ -208,6 +210,7 @@ class AMSLoadBalancer
*
* \return void.
*/
PERFFASPECT()
void distribute(FPTypeValue *src,
FPTypeValue *dest,
FPTypeValue *buffer,
Expand Down Expand Up @@ -253,6 +256,7 @@ class AMSLoadBalancer
*
* \return void.
*/
PERFFASPECT()
void distributeV(std::vector<FPTypeValue *> &src,
std::vector<FPTypeValue *> &dest,
int *gNElems,
Expand Down Expand Up @@ -352,6 +356,7 @@ class AMSLoadBalancer
* @param[out] outputs The vector to store all the output values gathered from their compute (remote) ranks.
* @param[in] resource The location of the data (CPU|GPU)
*/
PERFFASPECT()
void gatherOutputs(std::vector<FPTypeValue *> &outputs,
AMSResourceType resource)
{
Expand Down Expand Up @@ -381,6 +386,7 @@ class AMSLoadBalancer
* @param[out] inputs The vector to load balance across all compute (remote) ranks.
* @param[in] resource The location of the data (CPU|GPU)
*/
PERFFASPECT()
void scatterInputs(std::vector<FPTypeValue *> &inputs,
AMSResourceType resource)
{
Expand Down
2 changes: 2 additions & 0 deletions src/AMSlib/wf/workflow.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class AMSWorkflow
* @param[in] outputs vector to 1-D vectors storing num_elements
* items to be stored in the database
*/
PERFFASPECT()
void Store(size_t num_elements,
std::vector<FPTypeValue *> &inputs,
std::vector<FPTypeValue *> &outputs)
Expand Down Expand Up @@ -241,6 +242,7 @@ class AMSWorkflow
* This transformation can exploit the parallel nature of all the required
* steps.
*/
PERFFASPECT()
void evaluate(void *probDescr,
const int totalElements,
const FPTypeValue **inputs,
Expand Down