From d0d59b1d7daa6d636f697b2aaa0eee55c867072c Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 24 May 2018 08:24:01 -0600 Subject: [PATCH 1/2] osc/rdma: add support for controlling location of backing store This commit adds a new MCA variable to set the location of the backing store: osc_rdma_backing_directory. The default on Linux has been changed to use /dev/shm to improve performance in cases where /tmp is not a tmpfs. Signed-off-by: Nathan Hjelm --- ompi/mca/osc/rdma/osc_rdma.h | 3 +++ ompi/mca/osc/rdma/osc_rdma_component.c | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma.h b/ompi/mca/osc/rdma/osc_rdma.h index a33e0f332f8..d8192f1afda 100644 --- a/ompi/mca/osc/rdma/osc_rdma.h +++ b/ompi/mca/osc/rdma/osc_rdma.h @@ -106,6 +106,9 @@ struct ompi_osc_rdma_component_t { /** aggregation free list */ opal_free_list_t aggregate; + + /** directory where to place backing files */ + char *backing_directory; }; typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t; diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index b14539589be..522f953a2fb 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -268,6 +268,18 @@ static int ompi_osc_rdma_component_register (void) MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_mtl_names); free(description_str); + if (0 == access ("/dev/shm", W_OK)) { + mca_osc_rdma_component.backing_directory = "/dev/shm"; + } else { + mca_osc_rdma_component.backing_directory = ompi_process_info.proc_session_dir; + } + + (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "backing_directory", + "Directory to place backing files for memory windows. " + "This directory should be on a local filesystem such as /tmp or " + "/dev/shm (default: (linux) /dev/shm, (others) session directory)", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_rdma_component.backing_directory); /* register performance variables */ @@ -602,9 +614,9 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s } /* allocate the shared memory segment */ - ret = asprintf (&data_file, "%s"OPAL_PATH_SEP"window_%d.%s", - ompi_process_info.job_session_dir, ompi_comm_get_cid (module->comm), - ompi_process_info.nodename); + ret = asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_rdma.%s.%x.%d", + mca_osc_rdma_component.backing_directory, ompi_process_info.nodename, + OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm)); if (0 > ret) { ret = OMPI_ERR_OUT_OF_RESOURCE; break; From e9de42544e8e16ded2327bcd858ffcd83907f407 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 24 May 2018 08:26:31 -0600 Subject: [PATCH 2/2] osc/sm: add support for controlling location of backing store This commit adds a new MCA variable to set the location of the backing store: osc_sm_backing_directory. The default on Linux has been changed to use /dev/shm to improve performance in cases where /tmp is not a tmpfs. Signed-off-by: Nathan Hjelm --- ompi/mca/osc/sm/osc_sm.h | 2 ++ ompi/mca/osc/sm/osc_sm_component.c | 50 +++++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/ompi/mca/osc/sm/osc_sm.h b/ompi/mca/osc/sm/osc_sm.h index b27aa83365c..f0917b54cb0 100644 --- a/ompi/mca/osc/sm/osc_sm.h +++ b/ompi/mca/osc/sm/osc_sm.h @@ -61,6 +61,8 @@ typedef struct ompi_osc_sm_node_state_t ompi_osc_sm_node_state_t; struct ompi_osc_sm_component_t { ompi_osc_base_component_t super; + + char *backing_directory; }; typedef struct ompi_osc_sm_component_t ompi_osc_sm_component_t; OMPI_DECLSPEC extern ompi_osc_sm_component_t mca_osc_sm_component; diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index f7211cd93cc..4531ffc5be7 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -36,6 +36,7 @@ static int component_finalize(void); static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor); +static int component_register (void); static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model); @@ -51,6 +52,7 @@ ompi_osc_sm_component_t mca_osc_sm_component = { MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION), .mca_open_component = component_open, + .mca_register_component_params = component_register, }, .osc_data = { /* mca_base_component_data */ /* The component is not checkpoint ready */ @@ -105,6 +107,23 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = { } }; +static int component_register (void) +{ + if (0 == access ("/dev/shm", W_OK)) { + mca_osc_sm_component.backing_directory = "/dev/shm"; + } else { + mca_osc_sm_component.backing_directory = ompi_process_info.proc_session_dir; + } + + (void) mca_base_component_var_register (&mca_osc_sm_component.super.osc_version, "backing_directory", + "Directory to place backing files for shared memory windows. " + "This directory should be on a local filesystem such as /tmp or " + "/dev/shm (default: (linux) /dev/shm, (others) session directory)", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_sm_component.backing_directory); + + return OPAL_SUCCESS; +} static int component_open(void) @@ -169,6 +188,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit { ompi_osc_sm_module_t *module = NULL; int comm_size = ompi_comm_size (comm); + bool unlink_needed = false; int ret = OMPI_ERROR; if (OMPI_SUCCESS != (ret = check_win_ok(comm, flavor))) { @@ -262,10 +282,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit posts_size += OPAL_ALIGN_PAD_AMOUNT(posts_size, 64); if (0 == ompi_comm_rank (module->comm)) { char *data_file; - if (asprintf(&data_file, "%s"OPAL_PATH_SEP"shared_window_%d.%s", - ompi_process_info.proc_session_dir, - ompi_comm_get_cid(module->comm), - ompi_process_info.nodename) < 0) { + ret = asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_sm.%s.%x.%d", + mca_osc_sm_component.backing_directory, ompi_process_info.nodename, + OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm)); + if (ret < 0) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -274,6 +294,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit if (OPAL_SUCCESS != ret) { goto error; } + + unlink_needed = true; } ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0, @@ -287,6 +309,17 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit goto error; } + ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0, + module->comm, module->comm->c_coll->coll_bcast_module); + if (OMPI_SUCCESS != ret) { + goto error; + } + + if (0 == ompi_comm_rank (module->comm)) { + opal_shmem_unlink (&module->seg_ds); + unlink_needed = false; + } + module->sizes = malloc(sizeof(size_t) * comm_size); if (NULL == module->sizes) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; module->bases = malloc(sizeof(void*) * comm_size); @@ -399,6 +432,11 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit return OMPI_SUCCESS; error: + + if (0 == ompi_comm_rank (module->comm) && unlink_needed) { + opal_shmem_unlink (&module->seg_ds); + } + ompi_osc_sm_free (win); return ret; @@ -477,10 +515,6 @@ ompi_osc_sm_free(struct ompi_win_t *win) module->comm->c_coll->coll_barrier(module->comm, module->comm->c_coll->coll_barrier_module); - if (0 == ompi_comm_rank (module->comm)) { - opal_shmem_unlink (&module->seg_ds); - } - opal_shmem_segment_detach (&module->seg_ds); } else { free(module->node_states);