-
Notifications
You must be signed in to change notification settings - Fork 53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
OFI MR flags selection based on the provider #1077
base: main
Are you sure you want to change the base?
Changes from all commits
e0be06f
f3a0540
beff73e
52b95ef
5d05032
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -194,13 +194,15 @@ AM_CONDITIONAL([HAVE_LONG_FORTRAN_HEADER], [test "$enable_long_fortran_header" = | |
|
||
AC_ARG_ENABLE([ofi-mr], | ||
[AC_HELP_STRING([--enable-ofi-mr=MODE], | ||
[OFI memory registration mode: basic, scalable, or rma-event (default: scalable)])]) | ||
[OFI memory registration mode: none, basic, scalable, or rma-event (default: none)])]) | ||
|
||
AS_IF([test -z "$enable_ofi_mr"], [enable_ofi_mr="scalable"]) | ||
AS_IF([test -z "$enable_ofi_mr"], [enable_ofi_mr="none"]) | ||
|
||
AS_CASE([$enable_ofi_mr], | ||
[none], | ||
[AC_DEFINE([ENABLE_MR_NONE], [1], [If defined, the OFI transport will use MR mode based on provider])], | ||
[basic], | ||
[], | ||
[AC_DEFINE([ENABLE_MR_BASIC], [1], [If defined, the OFI transport will use FI_MR_BASIC])], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is |
||
[scalable], | ||
[AC_DEFINE([ENABLE_MR_SCALABLE], [1], [If defined, the OFI transport will use FI_MR_SCALABLE])], | ||
[rma?event], | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,14 +62,6 @@ struct fid_cq* shmem_transport_ofi_target_cq; | |
#if ENABLE_TARGET_CNTR | ||
struct fid_cntr* shmem_transport_ofi_target_cntrfd; | ||
#endif | ||
#ifdef ENABLE_MR_SCALABLE | ||
#ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING | ||
struct fid_mr* shmem_transport_ofi_target_mrfd; | ||
#else /* !ENABLE_REMOTE_VIRTUAL_ADDRESSING */ | ||
struct fid_mr* shmem_transport_ofi_target_heap_mrfd; | ||
struct fid_mr* shmem_transport_ofi_target_data_mrfd; | ||
#endif | ||
#else /* !ENABLE_MR_SCALABLE */ | ||
struct fid_mr* shmem_transport_ofi_target_heap_mrfd; | ||
struct fid_mr* shmem_transport_ofi_target_data_mrfd; | ||
uint64_t* shmem_transport_ofi_target_heap_keys; | ||
|
@@ -80,7 +72,6 @@ int shmem_transport_ofi_use_absolute_address; | |
uint8_t** shmem_transport_ofi_target_heap_addrs; | ||
uint8_t** shmem_transport_ofi_target_data_addrs; | ||
#endif /* ENABLE_REMOTE_VIRTUAL_ADDRESSING */ | ||
#endif /* ENABLE_MR_SCALABLE */ | ||
uint64_t shmem_transport_ofi_max_poll; | ||
long shmem_transport_ofi_put_poll_limit; | ||
long shmem_transport_ofi_get_poll_limit; | ||
|
@@ -620,6 +611,52 @@ int bind_enable_ep_resources(shmem_transport_ctx_t *ctx) | |
} | ||
|
||
|
||
static inline | ||
int allocate_separate_heap_data_mr(uint64_t *flags) { | ||
int ret = 0; | ||
|
||
/* Register separate data and heap segments using keys 0 and 1, | ||
* respectively. In MR_BASIC_MODE, the keys are ignored and selected by | ||
* the provider. */ | ||
ret = fi_mr_reg(shmem_transport_ofi_domainfd, shmem_internal_heap_base, | ||
shmem_internal_heap_length, | ||
FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 1ULL, *flags, | ||
&shmem_transport_ofi_target_heap_mrfd, NULL); | ||
OFI_CHECK_RETURN_STR(ret, "target memory (heap) registration failed"); | ||
|
||
ret = fi_mr_reg(shmem_transport_ofi_domainfd, shmem_internal_data_base, | ||
shmem_internal_data_length, | ||
FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0ULL, *flags, | ||
&shmem_transport_ofi_target_data_mrfd, NULL); | ||
OFI_CHECK_RETURN_STR(ret, "target memory (data) registration failed"); | ||
|
||
/* Bind counter with target memory region for incoming messages */ | ||
#if ENABLE_TARGET_CNTR | ||
ret = fi_mr_bind(shmem_transport_ofi_target_heap_mrfd, | ||
&shmem_transport_ofi_target_cntrfd->fid, | ||
FI_REMOTE_WRITE); | ||
OFI_CHECK_RETURN_STR(ret, "target CNTR binding to heap MR failed"); | ||
|
||
ret = fi_mr_bind(shmem_transport_ofi_target_data_mrfd, | ||
&shmem_transport_ofi_target_cntrfd->fid, | ||
FI_REMOTE_WRITE); | ||
OFI_CHECK_RETURN_STR(ret, "target CNTR binding to data MR failed"); | ||
|
||
#ifdef ENABLE_MR_RMA_EVENT | ||
if (shmem_transport_ofi_mr_rma_event) { | ||
ret = fi_mr_enable(shmem_transport_ofi_target_data_mrfd); | ||
OFI_CHECK_RETURN_STR(ret, "target data MR enable failed"); | ||
|
||
ret = fi_mr_enable(shmem_transport_ofi_target_heap_mrfd); | ||
OFI_CHECK_RETURN_STR(ret, "target heap MR enable failed"); | ||
} | ||
#endif /* ENABLE_MR_RMA_EVENT */ | ||
#endif /* ENABLE_TARGET_CNTR */ | ||
|
||
return ret; | ||
} | ||
|
||
|
||
static inline | ||
int allocate_recv_cntr_mr(void) | ||
{ | ||
|
@@ -653,65 +690,32 @@ int allocate_recv_cntr_mr(void) | |
} | ||
#endif | ||
|
||
#if defined(ENABLE_MR_SCALABLE) && defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) | ||
ret = fi_mr_reg(shmem_transport_ofi_domainfd, 0, UINT64_MAX, | ||
FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0ULL, flags, | ||
&shmem_transport_ofi_target_mrfd, NULL); | ||
OFI_CHECK_RETURN_STR(ret, "target memory (all) registration failed"); | ||
#if defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) | ||
if (shmem_transport_ofi_mr_mode == 0) { | ||
ret = fi_mr_reg(shmem_transport_ofi_domainfd, 0, UINT64_MAX, | ||
FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0ULL, flags, | ||
&shmem_transport_ofi_target_heap_mrfd, NULL); | ||
OFI_CHECK_RETURN_STR(ret, "target memory (all) registration failed"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm confused why virtual addressing now registers the heap now. Does it not need the data segment? |
||
|
||
/* Bind counter with target memory region for incoming messages */ | ||
#if ENABLE_TARGET_CNTR | ||
ret = fi_mr_bind(shmem_transport_ofi_target_mrfd, | ||
&shmem_transport_ofi_target_cntrfd->fid, | ||
FI_REMOTE_WRITE); | ||
OFI_CHECK_RETURN_STR(ret, "target CNTR binding to MR failed"); | ||
ret = fi_mr_bind(shmem_transport_ofi_target_heap_mrfd, | ||
&shmem_transport_ofi_target_cntrfd->fid, | ||
FI_REMOTE_WRITE); | ||
OFI_CHECK_RETURN_STR(ret, "target CNTR binding to MR failed"); | ||
|
||
#ifdef ENABLE_MR_RMA_EVENT | ||
if (shmem_transport_ofi_mr_rma_event) { | ||
ret = fi_mr_enable(shmem_transport_ofi_target_mrfd); | ||
OFI_CHECK_RETURN_STR(ret, "target MR enable failed"); | ||
} | ||
if (shmem_transport_ofi_mr_rma_event) { | ||
ret = fi_mr_enable(shmem_transport_ofi_target_heap_mrfd); | ||
OFI_CHECK_RETURN_STR(ret, "target MR enable failed"); | ||
} | ||
#endif /* ENABLE_MR_RMA_EVENT */ | ||
#endif /* ENABLE_TARGET_CNTR */ | ||
|
||
#else | ||
/* Register separate data and heap segments using keys 0 and 1, | ||
* respectively. In MR_BASIC_MODE, the keys are ignored and selected by | ||
* the provider. */ | ||
ret = fi_mr_reg(shmem_transport_ofi_domainfd, shmem_internal_heap_base, | ||
shmem_internal_heap_length, | ||
FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 1ULL, flags, | ||
&shmem_transport_ofi_target_heap_mrfd, NULL); | ||
OFI_CHECK_RETURN_STR(ret, "target memory (heap) registration failed"); | ||
|
||
ret = fi_mr_reg(shmem_transport_ofi_domainfd, shmem_internal_data_base, | ||
shmem_internal_data_length, | ||
FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0ULL, flags, | ||
&shmem_transport_ofi_target_data_mrfd, NULL); | ||
OFI_CHECK_RETURN_STR(ret, "target memory (data) registration failed"); | ||
|
||
/* Bind counter with target memory region for incoming messages */ | ||
#if ENABLE_TARGET_CNTR | ||
ret = fi_mr_bind(shmem_transport_ofi_target_heap_mrfd, | ||
&shmem_transport_ofi_target_cntrfd->fid, | ||
FI_REMOTE_WRITE); | ||
OFI_CHECK_RETURN_STR(ret, "target CNTR binding to heap MR failed"); | ||
|
||
ret = fi_mr_bind(shmem_transport_ofi_target_data_mrfd, | ||
&shmem_transport_ofi_target_cntrfd->fid, | ||
FI_REMOTE_WRITE); | ||
OFI_CHECK_RETURN_STR(ret, "target CNTR binding to data MR failed"); | ||
|
||
#ifdef ENABLE_MR_RMA_EVENT | ||
if (shmem_transport_ofi_mr_rma_event) { | ||
ret = fi_mr_enable(shmem_transport_ofi_target_data_mrfd); | ||
OFI_CHECK_RETURN_STR(ret, "target data MR enable failed"); | ||
|
||
ret = fi_mr_enable(shmem_transport_ofi_target_heap_mrfd); | ||
OFI_CHECK_RETURN_STR(ret, "target heap MR enable failed"); | ||
} else { | ||
ret = allocate_separate_heap_data_mr(&flags); | ||
} | ||
#endif /* ENABLE_MR_RMA_EVENT */ | ||
#endif /* ENABLE_TARGET_CNTR */ | ||
#else | ||
ret = allocate_separate_heap_data_mr(&flags); | ||
#endif | ||
|
||
return ret; | ||
|
@@ -720,7 +724,7 @@ int allocate_recv_cntr_mr(void) | |
static | ||
int publish_mr_info(void) | ||
{ | ||
#ifndef ENABLE_MR_SCALABLE | ||
if (shmem_transport_ofi_mr_mode != 0) | ||
{ | ||
int err; | ||
uint64_t heap_key, data_key; | ||
|
@@ -744,16 +748,15 @@ int publish_mr_info(void) | |
RAISE_WARN_STR("Put of data segment key to runtime KVS failed"); | ||
return 1; | ||
} | ||
} | ||
|
||
|
||
#ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING | ||
if (shmem_transport_ofi_info.p_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) | ||
shmem_transport_ofi_use_absolute_address = 1; | ||
else | ||
shmem_transport_ofi_use_absolute_address = 0; | ||
if (shmem_transport_ofi_info.p_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) | ||
shmem_transport_ofi_use_absolute_address = 1; | ||
else | ||
shmem_transport_ofi_use_absolute_address = 0; | ||
#else /* !ENABLE_REMOTE_VIRTUAL_ADDRESSING */ | ||
{ | ||
int err; | ||
|
||
void *heap_base, *data_base; | ||
|
||
if (shmem_transport_ofi_info.p_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) { | ||
|
@@ -775,17 +778,17 @@ int publish_mr_info(void) | |
RAISE_WARN_STR("Put of data segment address to runtime KVS failed"); | ||
return 1; | ||
} | ||
} | ||
|
||
#endif /* ENABLE_REMOTE_VIRTUAL_ADDRESSING */ | ||
#endif /* !ENABLE_MR_SCALABLE */ | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
static | ||
int populate_mr_tables(void) | ||
{ | ||
#ifndef ENABLE_MR_SCALABLE | ||
if (shmem_transport_ofi_mr_mode != 0) | ||
{ | ||
int i, err; | ||
|
||
|
@@ -818,11 +821,9 @@ int populate_mr_tables(void) | |
return 1; | ||
} | ||
} | ||
} | ||
|
||
|
||
#ifndef ENABLE_REMOTE_VIRTUAL_ADDRESSING | ||
{ | ||
int i, err; | ||
|
||
shmem_transport_ofi_target_heap_addrs = malloc(sizeof(uint8_t*) * shmem_internal_num_pes); | ||
if (NULL == shmem_transport_ofi_target_heap_addrs) { | ||
|
@@ -853,9 +854,9 @@ int populate_mr_tables(void) | |
return 1; | ||
} | ||
} | ||
} | ||
|
||
#endif /* ENABLE_REMOTE_VIRTUAL_ADDRESSING */ | ||
#endif /* !ENABLE_MR_SCALABLE */ | ||
} | ||
|
||
return 0; | ||
} | ||
|
@@ -1157,20 +1158,10 @@ int query_for_fabric(struct fabric_info *info) | |
hints.addr_format = FI_FORMAT_UNSPEC; | ||
domain_attr.data_progress = FI_PROGRESS_AUTO; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a note that #1059 possibly motivates a guard warning about and/or preventing the usage of |
||
domain_attr.resource_mgmt = FI_RM_ENABLED; | ||
#ifdef ENABLE_MR_SCALABLE | ||
/* Scalable, offset-based addressing, formerly FI_MR_SCALABLE */ | ||
domain_attr.mr_mode = 0; | ||
# if !defined(ENABLE_HARD_POLLING) && defined(ENABLE_MR_RMA_EVENT) | ||
domain_attr.mr_mode = FI_MR_RMA_EVENT; /* can support RMA_EVENT on MR */ | ||
# endif | ||
#else | ||
/* Portable, absolute addressing, formerly FI_MR_BASIC */ | ||
domain_attr.mr_mode = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; | ||
#endif | ||
#if !defined(ENABLE_MR_SCALABLE) || !defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) | ||
domain_attr.mr_key_size = 1; /* Heap and data use different MR keys, need | ||
at least 1 byte */ | ||
#endif | ||
|
||
shmem_transport_ofi_mr_mode = set_mr_flag(shmem_transport_ofi_info.prov_name, | ||
&domain_attr); | ||
|
||
#ifdef ENABLE_THREADS | ||
if (shmem_internal_thread_level == SHMEM_THREAD_MULTIPLE) { | ||
#ifdef USE_THREAD_COMPLETION | ||
|
@@ -1244,17 +1235,12 @@ int query_for_fabric(struct fabric_info *info) | |
shmem_transport_ofi_stx_max = 0; | ||
} | ||
|
||
#if defined(ENABLE_MR_SCALABLE) && defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) | ||
/* Only use a single MR, no keys required */ | ||
info->p_info->domain_attr->mr_key_size = 0; | ||
#else | ||
/* Heap and data use different MR keys, need at least 1 byte of key space | ||
* if using provider selected keys */ | ||
if (info->p_info->domain_attr->mr_mode & FI_MR_PROV_KEY) | ||
info->p_info->domain_attr->mr_key_size = 1; | ||
else | ||
info->p_info->domain_attr->mr_key_size = 0; | ||
#endif | ||
|
||
shmem_internal_assertp(info->p_info->tx_attr->inject_size >= shmem_transport_ofi_max_buffered_send); | ||
shmem_transport_ofi_max_buffered_send = info->p_info->tx_attr->inject_size; | ||
|
@@ -1417,10 +1403,14 @@ int shmem_transport_init(void) | |
|
||
if (shmem_internal_params.OFI_PROVIDER_provided) | ||
shmem_transport_ofi_info.prov_name = shmem_internal_params.OFI_PROVIDER; | ||
else if (shmem_internal_params.OFI_USE_PROVIDER_provided) | ||
shmem_transport_ofi_info.prov_name = shmem_internal_params.OFI_USE_PROVIDER; | ||
else | ||
else { | ||
shmem_transport_ofi_info.prov_name = NULL; | ||
char *fi_provider_provided = getenv("FI_PROVIDER"); | ||
if (fi_provider_provided != NULL) | ||
shmem_transport_ofi_info.prov_name = fi_provider_provided; | ||
else | ||
shmem_transport_ofi_info.prov_name = NULL; | ||
} | ||
|
||
if (shmem_internal_params.OFI_FABRIC_provided) | ||
shmem_transport_ofi_info.fabric_name = shmem_internal_params.OFI_FABRIC; | ||
|
@@ -1788,13 +1778,15 @@ int shmem_transport_fini(void) | |
ret = fi_close(&shmem_transport_ofi_target_cq->fid); | ||
OFI_CHECK_ERROR_MSG(ret, "Target CQ close failed (%s)\n", fi_strerror(errno)); | ||
|
||
#if defined(ENABLE_MR_SCALABLE) && defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) | ||
ret = fi_close(&shmem_transport_ofi_target_mrfd->fid); | ||
OFI_CHECK_ERROR_MSG(ret, "Target MR close failed (%s)\n", fi_strerror(errno)); | ||
#else | ||
ret = fi_close(&shmem_transport_ofi_target_heap_mrfd->fid); | ||
OFI_CHECK_ERROR_MSG(ret, "Target heap MR close failed (%s)\n", fi_strerror(errno)); | ||
|
||
#if defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) | ||
if (shmem_transport_ofi_mr_mode != 0) { | ||
ret = fi_close(&shmem_transport_ofi_target_data_mrfd->fid); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see a corresponding |
||
OFI_CHECK_ERROR_MSG(ret, "Target data MR close failed (%s)\n", fi_strerror(errno)); | ||
} | ||
#else | ||
ret = fi_close(&shmem_transport_ofi_target_data_mrfd->fid); | ||
OFI_CHECK_ERROR_MSG(ret, "Target data MR close failed (%s)\n", fi_strerror(errno)); | ||
#endif | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should "none" be called "auto" since it selects MR flags based on the provider?