Skip to content

Commit

Permalink
Merge pull request open-mpi#5193 from hjelmn/osc_sm_location
Browse files Browse the repository at this point in the history
Use /dev/shm for shared memory files in osc components
  • Loading branch information
hjelmn authored Jun 5, 2018
2 parents 948e38d + e9de425 commit 64a5baa
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 11 deletions.
3 changes: 3 additions & 0 deletions ompi/mca/osc/rdma/osc_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ struct ompi_osc_rdma_component_t {

/** aggregation free list */
opal_free_list_t aggregate;

/** directory where to place backing files */
char *backing_directory;
};
typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t;

Expand Down
18 changes: 15 additions & 3 deletions ompi/mca/osc/rdma/osc_rdma_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,18 @@ static int ompi_osc_rdma_component_register (void)
MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_mtl_names);
free(description_str);

if (0 == access ("/dev/shm", W_OK)) {
mca_osc_rdma_component.backing_directory = "/dev/shm";
} else {
mca_osc_rdma_component.backing_directory = ompi_process_info.proc_session_dir;
}

(void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "backing_directory",
"Directory to place backing files for memory windows. "
"This directory should be on a local filesystem such as /tmp or "
"/dev/shm (default: (linux) /dev/shm, (others) session directory)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_rdma_component.backing_directory);

/* register performance variables */

Expand Down Expand Up @@ -602,9 +614,9 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
}

/* allocate the shared memory segment */
ret = asprintf (&data_file, "%s"OPAL_PATH_SEP"window_%d.%s",
ompi_process_info.job_session_dir, ompi_comm_get_cid (module->comm),
ompi_process_info.nodename);
ret = asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_rdma.%s.%x.%d",
mca_osc_rdma_component.backing_directory, ompi_process_info.nodename,
OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm));
if (0 > ret) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
break;
Expand Down
2 changes: 2 additions & 0 deletions ompi/mca/osc/sm/osc_sm.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ typedef struct ompi_osc_sm_node_state_t ompi_osc_sm_node_state_t;

struct ompi_osc_sm_component_t {
ompi_osc_base_component_t super;

char *backing_directory;
};
typedef struct ompi_osc_sm_component_t ompi_osc_sm_component_t;
OMPI_DECLSPEC extern ompi_osc_sm_component_t mca_osc_sm_component;
Expand Down
50 changes: 42 additions & 8 deletions ompi/mca/osc/sm/osc_sm_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ static int component_finalize(void);
static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct opal_info_t *info,
int flavor);
static int component_register (void);
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
struct ompi_communicator_t *comm, struct opal_info_t *info,
int flavor, int *model);
Expand All @@ -51,6 +52,7 @@ ompi_osc_sm_component_t mca_osc_sm_component = {
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION),
.mca_open_component = component_open,
.mca_register_component_params = component_register,
},
.osc_data = { /* mca_base_component_data */
/* The component is not checkpoint ready */
Expand Down Expand Up @@ -105,6 +107,23 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = {
}
};

static int component_register (void)
{
if (0 == access ("/dev/shm", W_OK)) {
mca_osc_sm_component.backing_directory = "/dev/shm";
} else {
mca_osc_sm_component.backing_directory = ompi_process_info.proc_session_dir;
}

(void) mca_base_component_var_register (&mca_osc_sm_component.super.osc_version, "backing_directory",
"Directory to place backing files for shared memory windows. "
"This directory should be on a local filesystem such as /tmp or "
"/dev/shm (default: (linux) /dev/shm, (others) session directory)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_sm_component.backing_directory);

return OPAL_SUCCESS;
}

static int
component_open(void)
Expand Down Expand Up @@ -169,6 +188,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
{
ompi_osc_sm_module_t *module = NULL;
int comm_size = ompi_comm_size (comm);
bool unlink_needed = false;
int ret = OMPI_ERROR;

if (OMPI_SUCCESS != (ret = check_win_ok(comm, flavor))) {
Expand Down Expand Up @@ -262,10 +282,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
posts_size += OPAL_ALIGN_PAD_AMOUNT(posts_size, 64);
if (0 == ompi_comm_rank (module->comm)) {
char *data_file;
if (asprintf(&data_file, "%s"OPAL_PATH_SEP"shared_window_%d.%s",
ompi_process_info.proc_session_dir,
ompi_comm_get_cid(module->comm),
ompi_process_info.nodename) < 0) {
ret = asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_sm.%s.%x.%d",
mca_osc_sm_component.backing_directory, ompi_process_info.nodename,
OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm));
if (ret < 0) {
return OMPI_ERR_OUT_OF_RESOURCE;
}

Expand All @@ -274,6 +294,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
if (OPAL_SUCCESS != ret) {
goto error;
}

unlink_needed = true;
}

ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0,
Expand All @@ -287,6 +309,17 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
goto error;
}

ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0,
module->comm, module->comm->c_coll->coll_bcast_module);
if (OMPI_SUCCESS != ret) {
goto error;
}

if (0 == ompi_comm_rank (module->comm)) {
opal_shmem_unlink (&module->seg_ds);
unlink_needed = false;
}

module->sizes = malloc(sizeof(size_t) * comm_size);
if (NULL == module->sizes) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
module->bases = malloc(sizeof(void*) * comm_size);
Expand Down Expand Up @@ -399,6 +432,11 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
return OMPI_SUCCESS;

error:

if (0 == ompi_comm_rank (module->comm) && unlink_needed) {
opal_shmem_unlink (&module->seg_ds);
}

ompi_osc_sm_free (win);

return ret;
Expand Down Expand Up @@ -477,10 +515,6 @@ ompi_osc_sm_free(struct ompi_win_t *win)
module->comm->c_coll->coll_barrier(module->comm,
module->comm->c_coll->coll_barrier_module);

if (0 == ompi_comm_rank (module->comm)) {
opal_shmem_unlink (&module->seg_ds);
}

opal_shmem_segment_detach (&module->seg_ds);
} else {
free(module->node_states);
Expand Down

0 comments on commit 64a5baa

Please sign in to comment.