Skip to content

Commit

Permalink
sharedfp_sm_file_component_query: add file open to ensure correct ope…
Browse files Browse the repository at this point in the history
…rations

try to actually open the sharedfp/sm file during the query operation to ensure
that the component can actually run. This is based on some reports on the mailing list that
the sharedfp/sm operation causes problems in certain circumstances.

Fixes issue #9656

Signed-off-by: Edgar Gabriel <[email protected]>
  • Loading branch information
edgargabriel committed Dec 21, 2021
1 parent f500f8d commit 5e6b71a
Showing 1 changed file with 31 additions and 0 deletions.
31 changes: 31 additions & 0 deletions ompi/mca/sharedfp/sm/sharedfp_sm.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,37 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_sm_component_file_query(o
return NULL;
}
}


/* Check that we can actually open the required file */
char *filename_basename = basename((char*)fh->f_filename);

/* format is "%s/%s_cid-%d-%d.sm", see below */
int sm_filename_length = strlen(ompi_process_info.job_session_dir) + 1 + strlen(filename_basename) + 5 + (3*sizeof(uint32_t)+1) + 4;
char *sm_filename = (char*) malloc( sizeof(char) * sm_filename_length);
if (NULL == sm_filename) {
opal_output(0, "mca_sharedfp_sm_component_file_query: Error, unable to malloc sm_filename\n");
return NULL;
}

int comm_cid = -1;
int pid = ompi_comm_rank (comm);

snprintf(sm_filename, sm_filename_length, "%s/%s_cid-%d-%d.sm", ompi_process_info.job_session_dir,
filename_basename, comm_cid, pid);

int sm_fd = open(sm_filename, O_RDWR | O_CREAT,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if ( sm_fd == -1){
/*error opening file*/
opal_output(0,"mca_sharedfp_sm_component_file_query: Error, unable to open file for mmap: %s\n",sm_filename);
free(sm_filename);
return NULL;
}
close (sm_fd);
unlink(sm_filename);
free (sm_filename);

/* This module can run */
*priority = mca_sharedfp_sm_priority;
return &sm;
Expand Down

0 comments on commit 5e6b71a

Please sign in to comment.