Skip to content

Commit

Permalink
Merge pull request #9787 from edgargabriel/pr/sharedfp-sm-init-v4.1
Browse files Browse the repository at this point in the history
v4.1.x: sharedfp_sm_file_component_query: add file open to ensure correct ope…
  • Loading branch information
bwbarrett authored Jan 10, 2022
2 parents 1ed0226 + 889092d commit 6aa9281
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 15 deletions.
29 changes: 28 additions & 1 deletion ompi/mca/sharedfp/sm/sharedfp_sm.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2013 University of Houston. All rights reserved.
* Copyright (c) 2008-2021 University of Houston. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -31,6 +32,8 @@
#include "ompi/mca/sharedfp/base/base.h"
#include "ompi/mca/sharedfp/sm/sharedfp_sm.h"

#include "opal/util/basename.h"

/*
* *******************************************************************
* ************************ actions structure ************************
Expand Down Expand Up @@ -94,6 +97,30 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_sm_component_file_query(o
return NULL;
}
}


/* Check that we can actually open the required file */
char *filename_basename = opal_basename((char*)fh->f_filename);
char *sm_filename;
int comm_cid = -1;
int pid = ompi_comm_rank (comm);

asprintf(&sm_filename, "%s/%s_cid-%d-%d.sm", ompi_process_info.job_session_dir,
filename_basename, comm_cid, pid);
free(filename_basename);

int sm_fd = open(sm_filename, O_RDWR | O_CREAT,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if ( sm_fd == -1){
/*error opening file*/
opal_output(0,"mca_sharedfp_sm_component_file_query: Error, unable to open file for mmap: %s\n",sm_filename);
free(sm_filename);
return NULL;
}
close (sm_fd);
unlink(sm_filename);
free (sm_filename);

/* This module can run */
*priority = mca_sharedfp_sm_priority;
return &sm;
Expand Down
28 changes: 14 additions & 14 deletions ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013-2018 University of Houston. All rights reserved.
* Copyright (c) 2013-2021 University of Houston. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2021 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -41,6 +41,8 @@
#include "ompi/mca/sharedfp/sharedfp.h"
#include "ompi/mca/sharedfp/base/base.h"

#include "opal/util/basename.h"

#include <semaphore.h>
#include <sys/mman.h>
#include <libgen.h>
Expand All @@ -57,7 +59,6 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
struct mca_sharedfp_sm_data * sm_data = NULL;
char * filename_basename;
char * sm_filename;
int sm_filename_length;
struct mca_sharedfp_sm_offset * sm_offset_ptr;
struct mca_sharedfp_sm_offset sm_offset;
int sm_fd;
Expand Down Expand Up @@ -101,16 +102,8 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
** and then mapping it to memory
** For sharedfp we also want to put the file backed shared memory into the tmp directory
*/
filename_basename = basename((char*)filename);
filename_basename = opal_basename((char*)filename);
/* format is "%s/%s_cid-%d-%d.sm", see below */
sm_filename_length = strlen(ompi_process_info.job_session_dir) + 1 + strlen(filename_basename) + 5 + (3*sizeof(uint32_t)+1) + 4;
sm_filename = (char*) malloc( sizeof(char) * sm_filename_length);
if (NULL == sm_filename) {
opal_output(0, "mca_sharedfp_sm_file_open: Error, unable to malloc sm_filename\n");
free(sm_data);
free(sh);
return OMPI_ERR_OUT_OF_RESOURCE;
}

comm_cid = ompi_comm_get_cid(comm);
if ( 0 == fh->f_rank ) {
Expand All @@ -120,20 +113,21 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
err = comm->c_coll->coll_bcast (&int_pid, 1, MPI_INT, 0, comm, comm->c_coll->coll_bcast_module );
if ( OMPI_SUCCESS != err ) {
opal_output(0,"mca_sharedfp_sm_file_open: Error in bcast operation \n");
free(sm_filename);
free(filename_basename);
free(sm_data);
free(sh);
return err;
}

snprintf(sm_filename, sm_filename_length, "%s/%s_cid-%d-%d.sm", ompi_process_info.job_session_dir,
asprintf(&sm_filename, "%s/%s_cid-%d-%d.sm", ompi_process_info.job_session_dir,
filename_basename, comm_cid, int_pid);
/* open shared memory file, initialize to 0, map into memory */
sm_fd = open(sm_filename, O_RDWR | O_CREAT,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if ( sm_fd == -1){
/*error opening file*/
opal_output(0,"mca_sharedfp_sm_file_open: Error, unable to open file for mmap: %s\n",sm_filename);
free(filename_basename);
free(sm_filename);
free(sm_data);
free(sh);
Expand All @@ -150,6 +144,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
err = comm->c_coll->coll_barrier (comm, comm->c_coll->coll_barrier_module );
if ( OMPI_SUCCESS != err ) {
opal_output(0,"mca_sharedfp_sm_file_open: Error in barrier operation \n");
free(filename_basename);
free(sm_filename);
free(sm_data);
free(sh);
Expand All @@ -167,6 +162,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
err = OMPI_ERROR;
opal_output(0, "mca_sharedfp_sm_file_open: Error, unable to mmap file: %s\n",sm_filename);
opal_output(0, "%s\n", strerror(errno));
free(filename_basename);
free(sm_filename);
free(sm_data);
free(sh);
Expand All @@ -185,6 +181,10 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
sm_data->sem_name = (char*) malloc( sizeof(char) * 253);
snprintf(sm_data->sem_name,252,"OMPIO_%s",filename_basename);
#endif
// We're now done with filename_basename. Free it here so that we
// don't have to keep freeing it in the error/return cases.
free(filename_basename);
filename_basename = NULL;

if( (sm_data->mutex = sem_open(sm_data->sem_name, O_CREAT, 0644, 1)) != SEM_FAILED ) {
#elif defined(HAVE_SEM_INIT)
Expand Down

0 comments on commit 6aa9281

Please sign in to comment.