Skip to content

Commit

Permalink
some more PR feedback
Browse files Browse the repository at this point in the history
Signed-off-by: Howard Pritchard <[email protected]>
  • Loading branch information
hppritcha committed Oct 7, 2024
1 parent 05e91a0 commit 3e0ed25
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 43 deletions.
70 changes: 29 additions & 41 deletions ompi/communicator/comm_cid.c
Original file line number Diff line number Diff line change
Expand Up @@ -1047,7 +1047,7 @@ int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm
return rc;
}

int ompi_comm_get_remote_cid (ompi_communicator_t *comm, int dest, uint32_t *remote_cid)
int ompi_comm_get_remote_cid_from_pmix (ompi_communicator_t *comm, int dest, uint32_t *remote_cid)
{
ompi_proc_t *ompi_proc;
pmix_proc_t pmix_proc;
Expand All @@ -1059,53 +1059,41 @@ int ompi_comm_get_remote_cid (ompi_communicator_t *comm, int dest, uint32_t *rem

assert(NULL != remote_cid);

if (OPAL_LIKELY(OMPI_COMM_IS_GLOBAL_INDEX(comm))) {
ompi_proc = ompi_comm_peer_lookup(comm, dest);
OPAL_PMIX_CONVERT_NAME(&pmix_proc, &ompi_proc->super.proc_name);

*remote_cid = comm->c_index;
PMIx_Info_construct(&tinfo[0]);
PMIX_INFO_LOAD(&tinfo[0], PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);

} else if (0 != comm->c_index_vec[dest]) {
excid = ompi_comm_get_extended_cid(comm);

*remote_cid = comm->c_index_vec[dest];

} else {

ompi_proc = ompi_comm_peer_lookup(comm, dest);
OPAL_PMIX_CONVERT_NAME(&pmix_proc, &ompi_proc->super.proc_name);

PMIx_Info_construct(&tinfo[0]);
PMIX_INFO_LOAD(&tinfo[0], PMIX_TIMEOUT, &ompi_pmix_connect_timeout, PMIX_UINT32);

excid = ompi_comm_get_extended_cid(comm);

PMIX_INFO_CONSTRUCT(&tinfo[1]);
PMIX_INFO_LOAD(&tinfo[1], PMIX_GROUP_CONTEXT_ID, &excid.cid_base, PMIX_SIZE);
PMIX_INFO_SET_QUALIFIER(&tinfo[1]);
if (PMIX_SUCCESS != (rc = PMIx_Get(&pmix_proc, PMIX_GROUP_LOCAL_CID, tinfo, 2, &val))) {
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Get failed for PMIX_GROUP_LOCAL_CID cid_base %ld %s", excid.cid_base, PMIx_Error_string(rc)));
rc = OMPI_ERR_NOT_FOUND;
goto done;
}

if (NULL == val) {
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Get failed for PMIX_GROUP_LOCAL_CID val returned NULL"));
rc = OMPI_ERR_NOT_FOUND;
goto done;
}

if (val->type != PMIX_SIZE) {
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Get failed for PMIX_GROUP_LOCAL_CID type mismatch"));
rc = OMPI_ERR_TYPE_MISMATCH;
goto done;
}
PMIX_INFO_CONSTRUCT(&tinfo[1]);
PMIX_INFO_LOAD(&tinfo[1], PMIX_GROUP_CONTEXT_ID, &excid.cid_base, PMIX_SIZE);
PMIX_INFO_SET_QUALIFIER(&tinfo[1]);
if (PMIX_SUCCESS != (rc = PMIx_Get(&pmix_proc, PMIX_GROUP_LOCAL_CID, tinfo, 2, &val))) {
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Get failed for PMIX_GROUP_LOCAL_CID cid_base %ld %s", excid.cid_base, PMIx_Error_string(rc)));
rc = OMPI_ERR_NOT_FOUND;
goto done;
}

PMIX_VALUE_GET_NUMBER(rc, val, remote_cid64, size_t);
rc = OMPI_SUCCESS;
*remote_cid = (uint32_t)remote_cid64;
comm->c_index_vec[dest] = (uint32_t)remote_cid64;
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Get PMIX_GROUP_LOCAL_CID %d for cid_base %ld", *remote_cid, excid.cid_base));
if (NULL == val) {
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Get failed for PMIX_GROUP_LOCAL_CID val returned NULL"));
rc = OMPI_ERR_NOT_FOUND;
goto done;
}

if (val->type != PMIX_SIZE) {
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Get failed for PMIX_GROUP_LOCAL_CID type mismatch"));
rc = OMPI_ERR_TYPE_MISMATCH;
goto done;
}

PMIX_VALUE_GET_NUMBER(rc, val, remote_cid64, size_t);
rc = OMPI_SUCCESS;
*remote_cid = (uint32_t)remote_cid64;
comm->c_index_vec[dest] = (uint32_t)remote_cid64;
OPAL_OUTPUT_VERBOSE((10, ompi_comm_output, "PMIx_Get PMIX_GROUP_LOCAL_CID %d for cid_base %ld", *remote_cid, excid.cid_base));

done:
if (NULL != val) {
PMIX_VALUE_RELEASE(val);
Expand Down
31 changes: 29 additions & 2 deletions ompi/communicator/communicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,35 @@ static inline uint32_t ompi_comm_get_local_cid (const ompi_communicator_t* comm)
return comm->c_index;
}

int ompi_comm_get_remote_cid_from_pmix (ompi_communicator_t *comm, int dest, uint32_t *remote_cid);

/**
* Get remote cid for the communicator. In the case of communicators created
* using methods that don't supply an input communicator, i.e.
* MPI_Comm_create_from_group, the remote cid may be different from the local cid.
*/
static inline int ompi_comm_get_remote_cid (ompi_communicator_t *comm, int dest, uint32_t *remote_cid)
{
int rc = OMPI_SUCCESS;

assert(NULL != remote_cid);

if (OPAL_LIKELY(OMPI_COMM_IS_GLOBAL_INDEX(comm))) {

*remote_cid = comm->c_index;

} else if (0 != comm->c_index_vec[dest]) {

*remote_cid = comm->c_index_vec[dest];

} else {

rc = ompi_comm_get_remote_cid_from_pmix(comm, dest, remote_cid);
}

return rc;
}

/**
* Get the extended context ID for the communicator, suitable for passing
* to ompi_comm_lookup_cid for getting the communicator back
Expand Down Expand Up @@ -626,8 +655,6 @@ static inline bool ompi_comm_instances_same(const ompi_communicator_t *comm1,
return comm1->instance == comm2->instance;
}

int ompi_comm_get_remote_cid (ompi_communicator_t *comm, int dest, uint32_t *remote_cid);

#if OPAL_ENABLE_FT_MPI
/*
* Support for MPI_ANY_SOURCE point-to-point operations
Expand Down

0 comments on commit 3e0ed25

Please sign in to comment.