Skip to content

Commit

Permalink
prov/util: util_coll uses peer_mc_context
Browse files Browse the repository at this point in the history
util_coll:fi_join() called with the FI_PEER flag restores peer_mc_context.mc_fid and
uses it as the actual context of fi_join() operation. This includes also reporting
the join operation completion with mc_fid as event's context.

Signed-off-by: Tomasz Gromadzki <[email protected]>
  • Loading branch information
grom72 committed Dec 14, 2022
1 parent 35608c9 commit 401e416
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 1 deletion.
1 change: 1 addition & 0 deletions include/ofi_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,7 @@ struct util_coll_mc {
uint16_t group_id;
uint16_t seq;
ofi_atomic32_t ref;
struct fid_mc *peer_mc;
};

struct util_av_set {
Expand Down
15 changes: 14 additions & 1 deletion prov/coll/src/coll_coll.c
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,7 @@ void coll_join_comp(struct util_coll_operation *coll_op)
struct fi_eq_entry entry;
struct coll_ep *ep;
struct coll_eq *eq;
uint64_t flags;

ep = container_of(coll_op->ep, struct coll_ep, util_ep.ep_fid);
eq = container_of(ep->util_ep.eq, struct coll_eq, util_eq.eq_fid);
Expand All @@ -709,8 +710,11 @@ void coll_join_comp(struct util_coll_operation *coll_op)
entry.fid = &coll_op->mc->mc_fid.fid;
entry.context = coll_op->context;

flags = FI_COLLECTIVE;
if (coll_op->mc->peer_mc)
flags |= FI_PEER;
if (fi_eq_write(eq->peer_eq, FI_JOIN_COMPLETE, &entry,
sizeof(struct fi_eq_entry), FI_COLLECTIVE) < 0)
sizeof(struct fi_eq_entry), flags) < 0)
FI_WARN(ep->util_ep.domain->fabric->prov, FI_LOG_DOMAIN,
"join collective - eq write failed\n");

Expand Down Expand Up @@ -911,6 +915,7 @@ static struct util_coll_mc *coll_create_mc(struct util_av_set *av_set,
int coll_join_collective(struct fid_ep *ep, const void *addr,
uint64_t flags, struct fid_mc **mc, void *context)
{
struct fi_peer_mc_context *peer_context;
struct util_coll_mc *new_coll_mc;
struct util_av_set *av_set;
struct util_coll_mc *coll_mc;
Expand All @@ -924,6 +929,11 @@ int coll_join_collective(struct fid_ep *ep, const void *addr,
if (!(flags & FI_COLLECTIVE))
return -FI_ENOSYS;

if (flags & FI_PEER) {
peer_context = context;
context = peer_context->mc_fid;
}

c_addr = (struct fi_collective_addr *)addr;
coll_addr = c_addr->coll_addr;
set = c_addr->set;
Expand All @@ -941,6 +951,9 @@ int coll_join_collective(struct fid_ep *ep, const void *addr,
if (!new_coll_mc)
return -FI_ENOMEM;

if (flags & FI_PEER)
new_coll_mc->peer_mc = context;

/* get the rank */
coll_find_local_rank(ep, new_coll_mc);
coll_find_local_rank(ep, coll_mc);
Expand Down

0 comments on commit 401e416

Please sign in to comment.