Skip to content

Commit

Permalink
Tracking framework for xpmem rcache registrations in acoll.
Browse files Browse the repository at this point in the history
A hash table, as part of the acoll modules struct, is used to track the
rcache registrations done as part of the register_and_cache api called
from acoll collective components. This hash table is then iterated over
during module destruct and each rcache registration is deregistered to
ensure that the rcache module destroy proceeds correctly.

Signed-off-by: Mithun Mohan <MithunMohan.KadavilMadanaMohanan@amd.com>
  • Loading branch information
MithunMohanKadavil committed Dec 17, 2024
1 parent 805c008 commit 8d8338e
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 0 deletions.
2 changes: 2 additions & 0 deletions ompi/mca/coll/acoll/coll_acoll.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#ifdef HAVE_XPMEM_H
#include "opal/mca/rcache/base/base.h"
#include "opal/class/opal_hash_table.h"
#include <xpmem.h>
#endif

Expand Down Expand Up @@ -125,6 +126,7 @@ typedef struct coll_acoll_data {
void **xpmem_raddr;
mca_rcache_base_module_t **rcache;
void *scratch;
opal_hash_table_t **xpmem_reg_tracker_ht;
#endif
opal_shmem_ds_t *allshmseg_id;
void **allshmmmap_sbuf;
Expand Down
18 changes: 18 additions & 0 deletions ompi/mca/coll/acoll/coll_acoll_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,24 @@ static void mca_coll_acoll_module_destruct(mca_coll_acoll_module_t *module)
if (ompi_comm_rank(subc->orig_comm) == j) {
continue;
}
// Dereg all rcache regs.
uint64_t key = 0;
uint64_t value = 0;
uint64_t zero_value = 0;
OPAL_HASH_TABLE_FOREACH(key,uint64,value,(data->xpmem_reg_tracker_ht[j])) {
mca_rcache_base_registration_t* reg =
(mca_rcache_base_registration_t*) key;

for (uint64_t d_i = 0; d_i < value; ++d_i) {
(data->rcache[j])->rcache_deregister(data->rcache[j], reg);
}
opal_hash_table_set_value_uint64(data->xpmem_reg_tracker_ht[j],
key, (void*)(zero_value));
}
xpmem_release(data->all_apid[j]);
mca_rcache_base_module_destroy(data->rcache[j]);
opal_hash_table_remove_all(data->xpmem_reg_tracker_ht[j]);
OBJ_RELEASE(data->xpmem_reg_tracker_ht[j]);
}
xpmem_remove(data->allseg_id[ompi_comm_rank(subc->orig_comm)]);

Expand All @@ -262,6 +278,8 @@ static void mca_coll_acoll_module_destruct(mca_coll_acoll_module_t *module)
data->xpmem_raddr = NULL;
free(data->scratch);
data->scratch = NULL;
free(data->xpmem_reg_tracker_ht);
data->xpmem_reg_tracker_ht = NULL;
free(data->rcache);
data->rcache = NULL;
#endif
Expand Down
35 changes: 35 additions & 0 deletions ompi/mca/coll/acoll/coll_acoll_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,14 @@ static inline int coll_acoll_init(mca_coll_base_module_t *module, ompi_communica
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto error_hndl;
}
data->xpmem_reg_tracker_ht = NULL;
data->xpmem_reg_tracker_ht = (opal_hash_table_t **) malloc(sizeof(opal_hash_table_t*) * size);
if (NULL == data->xpmem_reg_tracker_ht) {
line = __LINE__;
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto error_hndl;
}

seg_id = xpmem_make(0, XPMEM_MAXADDR_SIZE, XPMEM_PERMIT_MODE, (void *) 0666);
if (seg_id == -1) {
line = __LINE__;
Expand Down Expand Up @@ -733,6 +741,8 @@ static inline int coll_acoll_init(mca_coll_base_module_t *module, ompi_communica
line = __LINE__;
goto error_hndl;
}
data->xpmem_reg_tracker_ht[i] = OBJ_NEW(opal_hash_table_t);
opal_hash_table_init(data->xpmem_reg_tracker_ht[i], 2048);
}
}
#endif
Expand Down Expand Up @@ -831,6 +841,8 @@ static inline int coll_acoll_init(mca_coll_base_module_t *module, ompi_communica
data->xpmem_saddr = NULL;
free(data->xpmem_raddr);
data->xpmem_raddr = NULL;
free(data->xpmem_reg_tracker_ht);
data->xpmem_reg_tracker_ht = NULL;
free(data->rcache);
data->rcache = NULL;
free(data->scratch);
Expand All @@ -851,6 +863,25 @@ static inline int coll_acoll_init(mca_coll_base_module_t *module, ompi_communica
}

#ifdef HAVE_XPMEM_H
static inline void update_rcache_reg_hashtable_entry
(struct acoll_xpmem_rcache_reg_t *reg,
opal_hash_table_t* ht)
{
// Converting pointer to uint64 to use as key.
uint64_t key = (uint64_t)reg;
// Converting uint64_t to pointer type to use for value.
uint64_t value = 1;
int ht_ret = opal_hash_table_get_value_uint64(ht, key, (void**)(&value));

if (OPAL_ERR_NOT_FOUND == ht_ret) {
value = 1;
opal_hash_table_set_value_uint64(ht, key, (void*)(value));
} else if (OPAL_SUCCESS == ht_ret) {
value += 1;
opal_hash_table_set_value_uint64(ht, key, (void*)(value));
}
}

static inline void register_and_cache(int size, size_t total_dsize, int rank,
coll_acoll_data_t *data)
{
Expand All @@ -870,6 +901,8 @@ static inline void register_and_cache(int size, size_t total_dsize, int rank,
sbuf_reg = NULL;
return;
}
update_rcache_reg_hashtable_entry(sbuf_reg, data->xpmem_reg_tracker_ht[i]);

data->xpmem_saddr[i] = (void *) ((uintptr_t) sbuf_reg->xpmem_vaddr
+ ((uintptr_t) data->allshm_sbuf[i]
- (uintptr_t) sbuf_reg->base.base));
Expand All @@ -884,6 +917,8 @@ static inline void register_and_cache(int size, size_t total_dsize, int rank,
rbuf_reg = NULL;
return;
}
update_rcache_reg_hashtable_entry(rbuf_reg, data->xpmem_reg_tracker_ht[i]);

data->xpmem_raddr[i] = (void *) ((uintptr_t) rbuf_reg->xpmem_vaddr
+ ((uintptr_t) data->allshm_rbuf[i]
- (uintptr_t) rbuf_reg->base.base));
Expand Down

0 comments on commit 8d8338e

Please sign in to comment.