Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OFI MR flags selection based on the provider #1077

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ jobs:
sos_config: --enable-ofi-mr=basic --enable-av-map --disable-cxx --enable-memcpy
--enable-pmi-simple
libfabric_version: v1.13.x
- config_name: MR-Scalable
sos_config: --enable-ofi-mr=scalable --enable-pmi-simple
libfabric_version: v1.13.x
- config_name: PMI MPI
sos_config: --disable-fortran --enable-pmi-mpi CC=mpicc
libfabric_version: v1.13.x
Expand Down
8 changes: 5 additions & 3 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,15 @@ AM_CONDITIONAL([HAVE_LONG_FORTRAN_HEADER], [test "$enable_long_fortran_header" =

AC_ARG_ENABLE([ofi-mr],
[AC_HELP_STRING([--enable-ofi-mr=MODE],
[OFI memory registration mode: basic, scalable, or rma-event (default: scalable)])])
[OFI memory registration mode: none, basic, scalable, or rma-event (default: none)])])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should "none" be called "auto" since it selects MR flags based on the provider?


AS_IF([test -z "$enable_ofi_mr"], [enable_ofi_mr="scalable"])
AS_IF([test -z "$enable_ofi_mr"], [enable_ofi_mr="none"])

AS_CASE([$enable_ofi_mr],
[none],
[AC_DEFINE([ENABLE_MR_NONE], [1], [If defined, the OFI transport will use MR mode based on provider])],
[basic],
[],
[AC_DEFINE([ENABLE_MR_BASIC], [1], [If defined, the OFI transport will use FI_MR_BASIC])],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is ENABLE_MR_BASIC used anywhere? Is it needed?

[scalable],
[AC_DEFINE([ENABLE_MR_SCALABLE], [1], [If defined, the OFI transport will use FI_MR_SCALABLE])],
[rma?event],
Expand Down
2 changes: 0 additions & 2 deletions src/shmem_env_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@ SHMEM_INTERNAL_ENV_DEF(OFI_ATOMIC_CHECKS_WARN, bool, false, SHMEM_INTERNAL_ENV_C
"Display warnings about unsupported atomic operations")
SHMEM_INTERNAL_ENV_DEF(OFI_PROVIDER, string, "auto", SHMEM_INTERNAL_ENV_CAT_TRANSPORT,
"Provider that should be used by the OFI transport")
SHMEM_INTERNAL_ENV_DEF(OFI_USE_PROVIDER, string, "auto", SHMEM_INTERNAL_ENV_CAT_TRANSPORT,
"Deprecated, replaced by SHMEM_OFI_PROVIDER")
SHMEM_INTERNAL_ENV_DEF(OFI_FABRIC, string, "auto", SHMEM_INTERNAL_ENV_CAT_TRANSPORT,
"Fabric that should be used by the OFI transport")
SHMEM_INTERNAL_ENV_DEF(OFI_DOMAIN, string, "auto", SHMEM_INTERNAL_ENV_CAT_TRANSPORT,
Expand Down
196 changes: 94 additions & 102 deletions src/transport_ofi.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,6 @@ struct fid_cq* shmem_transport_ofi_target_cq;
#if ENABLE_TARGET_CNTR
struct fid_cntr* shmem_transport_ofi_target_cntrfd;
#endif
#ifdef ENABLE_MR_SCALABLE
#ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING
struct fid_mr* shmem_transport_ofi_target_mrfd;
#else /* !ENABLE_REMOTE_VIRTUAL_ADDRESSING */
struct fid_mr* shmem_transport_ofi_target_heap_mrfd;
struct fid_mr* shmem_transport_ofi_target_data_mrfd;
#endif
#else /* !ENABLE_MR_SCALABLE */
struct fid_mr* shmem_transport_ofi_target_heap_mrfd;
struct fid_mr* shmem_transport_ofi_target_data_mrfd;
uint64_t* shmem_transport_ofi_target_heap_keys;
Expand All @@ -80,7 +72,6 @@ int shmem_transport_ofi_use_absolute_address;
uint8_t** shmem_transport_ofi_target_heap_addrs;
uint8_t** shmem_transport_ofi_target_data_addrs;
#endif /* ENABLE_REMOTE_VIRTUAL_ADDRESSING */
#endif /* ENABLE_MR_SCALABLE */
uint64_t shmem_transport_ofi_max_poll;
long shmem_transport_ofi_put_poll_limit;
long shmem_transport_ofi_get_poll_limit;
Expand Down Expand Up @@ -620,6 +611,52 @@ int bind_enable_ep_resources(shmem_transport_ctx_t *ctx)
}


static inline
int allocate_separate_heap_data_mr(uint64_t *flags) {
int ret = 0;

/* Register separate data and heap segments using keys 0 and 1,
* respectively. In MR_BASIC_MODE, the keys are ignored and selected by
* the provider. */
ret = fi_mr_reg(shmem_transport_ofi_domainfd, shmem_internal_heap_base,
shmem_internal_heap_length,
FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 1ULL, *flags,
&shmem_transport_ofi_target_heap_mrfd, NULL);
OFI_CHECK_RETURN_STR(ret, "target memory (heap) registration failed");

ret = fi_mr_reg(shmem_transport_ofi_domainfd, shmem_internal_data_base,
shmem_internal_data_length,
FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0ULL, *flags,
&shmem_transport_ofi_target_data_mrfd, NULL);
OFI_CHECK_RETURN_STR(ret, "target memory (data) registration failed");

/* Bind counter with target memory region for incoming messages */
#if ENABLE_TARGET_CNTR
ret = fi_mr_bind(shmem_transport_ofi_target_heap_mrfd,
&shmem_transport_ofi_target_cntrfd->fid,
FI_REMOTE_WRITE);
OFI_CHECK_RETURN_STR(ret, "target CNTR binding to heap MR failed");

ret = fi_mr_bind(shmem_transport_ofi_target_data_mrfd,
&shmem_transport_ofi_target_cntrfd->fid,
FI_REMOTE_WRITE);
OFI_CHECK_RETURN_STR(ret, "target CNTR binding to data MR failed");

#ifdef ENABLE_MR_RMA_EVENT
if (shmem_transport_ofi_mr_rma_event) {
ret = fi_mr_enable(shmem_transport_ofi_target_data_mrfd);
OFI_CHECK_RETURN_STR(ret, "target data MR enable failed");

ret = fi_mr_enable(shmem_transport_ofi_target_heap_mrfd);
OFI_CHECK_RETURN_STR(ret, "target heap MR enable failed");
}
#endif /* ENABLE_MR_RMA_EVENT */
#endif /* ENABLE_TARGET_CNTR */

return ret;
}


static inline
int allocate_recv_cntr_mr(void)
{
Expand Down Expand Up @@ -653,65 +690,32 @@ int allocate_recv_cntr_mr(void)
}
#endif

#if defined(ENABLE_MR_SCALABLE) && defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING)
ret = fi_mr_reg(shmem_transport_ofi_domainfd, 0, UINT64_MAX,
FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0ULL, flags,
&shmem_transport_ofi_target_mrfd, NULL);
OFI_CHECK_RETURN_STR(ret, "target memory (all) registration failed");
#if defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING)
if (shmem_transport_ofi_mr_mode == 0) {
ret = fi_mr_reg(shmem_transport_ofi_domainfd, 0, UINT64_MAX,
FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0ULL, flags,
&shmem_transport_ofi_target_heap_mrfd, NULL);
OFI_CHECK_RETURN_STR(ret, "target memory (all) registration failed");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused why virtual addressing now registers the heap now. Does it not need the data segment?


/* Bind counter with target memory region for incoming messages */
#if ENABLE_TARGET_CNTR
ret = fi_mr_bind(shmem_transport_ofi_target_mrfd,
&shmem_transport_ofi_target_cntrfd->fid,
FI_REMOTE_WRITE);
OFI_CHECK_RETURN_STR(ret, "target CNTR binding to MR failed");
ret = fi_mr_bind(shmem_transport_ofi_target_heap_mrfd,
&shmem_transport_ofi_target_cntrfd->fid,
FI_REMOTE_WRITE);
OFI_CHECK_RETURN_STR(ret, "target CNTR binding to MR failed");

#ifdef ENABLE_MR_RMA_EVENT
if (shmem_transport_ofi_mr_rma_event) {
ret = fi_mr_enable(shmem_transport_ofi_target_mrfd);
OFI_CHECK_RETURN_STR(ret, "target MR enable failed");
}
if (shmem_transport_ofi_mr_rma_event) {
ret = fi_mr_enable(shmem_transport_ofi_target_heap_mrfd);
OFI_CHECK_RETURN_STR(ret, "target MR enable failed");
}
#endif /* ENABLE_MR_RMA_EVENT */
#endif /* ENABLE_TARGET_CNTR */

#else
/* Register separate data and heap segments using keys 0 and 1,
* respectively. In MR_BASIC_MODE, the keys are ignored and selected by
* the provider. */
ret = fi_mr_reg(shmem_transport_ofi_domainfd, shmem_internal_heap_base,
shmem_internal_heap_length,
FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 1ULL, flags,
&shmem_transport_ofi_target_heap_mrfd, NULL);
OFI_CHECK_RETURN_STR(ret, "target memory (heap) registration failed");

ret = fi_mr_reg(shmem_transport_ofi_domainfd, shmem_internal_data_base,
shmem_internal_data_length,
FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0ULL, flags,
&shmem_transport_ofi_target_data_mrfd, NULL);
OFI_CHECK_RETURN_STR(ret, "target memory (data) registration failed");

/* Bind counter with target memory region for incoming messages */
#if ENABLE_TARGET_CNTR
ret = fi_mr_bind(shmem_transport_ofi_target_heap_mrfd,
&shmem_transport_ofi_target_cntrfd->fid,
FI_REMOTE_WRITE);
OFI_CHECK_RETURN_STR(ret, "target CNTR binding to heap MR failed");

ret = fi_mr_bind(shmem_transport_ofi_target_data_mrfd,
&shmem_transport_ofi_target_cntrfd->fid,
FI_REMOTE_WRITE);
OFI_CHECK_RETURN_STR(ret, "target CNTR binding to data MR failed");

#ifdef ENABLE_MR_RMA_EVENT
if (shmem_transport_ofi_mr_rma_event) {
ret = fi_mr_enable(shmem_transport_ofi_target_data_mrfd);
OFI_CHECK_RETURN_STR(ret, "target data MR enable failed");

ret = fi_mr_enable(shmem_transport_ofi_target_heap_mrfd);
OFI_CHECK_RETURN_STR(ret, "target heap MR enable failed");
} else {
ret = allocate_separate_heap_data_mr(&flags);
}
#endif /* ENABLE_MR_RMA_EVENT */
#endif /* ENABLE_TARGET_CNTR */
#else
ret = allocate_separate_heap_data_mr(&flags);
#endif

return ret;
Expand All @@ -720,7 +724,7 @@ int allocate_recv_cntr_mr(void)
static
int publish_mr_info(void)
{
#ifndef ENABLE_MR_SCALABLE
if (shmem_transport_ofi_mr_mode != 0)
{
int err;
uint64_t heap_key, data_key;
Expand All @@ -744,16 +748,15 @@ int publish_mr_info(void)
RAISE_WARN_STR("Put of data segment key to runtime KVS failed");
return 1;
}
}


#ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING
if (shmem_transport_ofi_info.p_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR)
shmem_transport_ofi_use_absolute_address = 1;
else
shmem_transport_ofi_use_absolute_address = 0;
if (shmem_transport_ofi_info.p_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR)
shmem_transport_ofi_use_absolute_address = 1;
else
shmem_transport_ofi_use_absolute_address = 0;
#else /* !ENABLE_REMOTE_VIRTUAL_ADDRESSING */
{
int err;

void *heap_base, *data_base;

if (shmem_transport_ofi_info.p_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) {
Expand All @@ -775,17 +778,17 @@ int publish_mr_info(void)
RAISE_WARN_STR("Put of data segment address to runtime KVS failed");
return 1;
}
}

#endif /* ENABLE_REMOTE_VIRTUAL_ADDRESSING */
#endif /* !ENABLE_MR_SCALABLE */
}

return 0;
}

static
int populate_mr_tables(void)
{
#ifndef ENABLE_MR_SCALABLE
if (shmem_transport_ofi_mr_mode != 0)
{
int i, err;

Expand Down Expand Up @@ -818,11 +821,9 @@ int populate_mr_tables(void)
return 1;
}
}
}


#ifndef ENABLE_REMOTE_VIRTUAL_ADDRESSING
{
int i, err;

shmem_transport_ofi_target_heap_addrs = malloc(sizeof(uint8_t*) * shmem_internal_num_pes);
if (NULL == shmem_transport_ofi_target_heap_addrs) {
Expand Down Expand Up @@ -853,9 +854,9 @@ int populate_mr_tables(void)
return 1;
}
}
}

#endif /* ENABLE_REMOTE_VIRTUAL_ADDRESSING */
#endif /* !ENABLE_MR_SCALABLE */
}

return 0;
}
Expand Down Expand Up @@ -1157,20 +1158,10 @@ int query_for_fabric(struct fabric_info *info)
hints.addr_format = FI_FORMAT_UNSPEC;
domain_attr.data_progress = FI_PROGRESS_AUTO;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note that #1059 possibly motivates a guard warning about and/or preventing the usage of FI_MR_ENDPOINT (and thus FI_PROGRESS_MANUAL) except for providers where it makes sense. But come to think of it, maybe this PR already accomplishes that?

domain_attr.resource_mgmt = FI_RM_ENABLED;
#ifdef ENABLE_MR_SCALABLE
/* Scalable, offset-based addressing, formerly FI_MR_SCALABLE */
domain_attr.mr_mode = 0;
# if !defined(ENABLE_HARD_POLLING) && defined(ENABLE_MR_RMA_EVENT)
domain_attr.mr_mode = FI_MR_RMA_EVENT; /* can support RMA_EVENT on MR */
# endif
#else
/* Portable, absolute addressing, formerly FI_MR_BASIC */
domain_attr.mr_mode = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY;
#endif
#if !defined(ENABLE_MR_SCALABLE) || !defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING)
domain_attr.mr_key_size = 1; /* Heap and data use different MR keys, need
at least 1 byte */
#endif

shmem_transport_ofi_mr_mode = set_mr_flag(shmem_transport_ofi_info.prov_name,
&domain_attr);

#ifdef ENABLE_THREADS
if (shmem_internal_thread_level == SHMEM_THREAD_MULTIPLE) {
#ifdef USE_THREAD_COMPLETION
Expand Down Expand Up @@ -1244,17 +1235,12 @@ int query_for_fabric(struct fabric_info *info)
shmem_transport_ofi_stx_max = 0;
}

#if defined(ENABLE_MR_SCALABLE) && defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING)
/* Only use a single MR, no keys required */
info->p_info->domain_attr->mr_key_size = 0;
#else
/* Heap and data use different MR keys, need at least 1 byte of key space
* if using provider selected keys */
if (info->p_info->domain_attr->mr_mode & FI_MR_PROV_KEY)
info->p_info->domain_attr->mr_key_size = 1;
else
info->p_info->domain_attr->mr_key_size = 0;
#endif

shmem_internal_assertp(info->p_info->tx_attr->inject_size >= shmem_transport_ofi_max_buffered_send);
shmem_transport_ofi_max_buffered_send = info->p_info->tx_attr->inject_size;
Expand Down Expand Up @@ -1417,10 +1403,14 @@ int shmem_transport_init(void)

if (shmem_internal_params.OFI_PROVIDER_provided)
shmem_transport_ofi_info.prov_name = shmem_internal_params.OFI_PROVIDER;
else if (shmem_internal_params.OFI_USE_PROVIDER_provided)
shmem_transport_ofi_info.prov_name = shmem_internal_params.OFI_USE_PROVIDER;
else
else {
shmem_transport_ofi_info.prov_name = NULL;
char *fi_provider_provided = getenv("FI_PROVIDER");
if (fi_provider_provided != NULL)
shmem_transport_ofi_info.prov_name = fi_provider_provided;
else
shmem_transport_ofi_info.prov_name = NULL;
}

if (shmem_internal_params.OFI_FABRIC_provided)
shmem_transport_ofi_info.fabric_name = shmem_internal_params.OFI_FABRIC;
Expand Down Expand Up @@ -1788,13 +1778,15 @@ int shmem_transport_fini(void)
ret = fi_close(&shmem_transport_ofi_target_cq->fid);
OFI_CHECK_ERROR_MSG(ret, "Target CQ close failed (%s)\n", fi_strerror(errno));

#if defined(ENABLE_MR_SCALABLE) && defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING)
ret = fi_close(&shmem_transport_ofi_target_mrfd->fid);
OFI_CHECK_ERROR_MSG(ret, "Target MR close failed (%s)\n", fi_strerror(errno));
#else
ret = fi_close(&shmem_transport_ofi_target_heap_mrfd->fid);
OFI_CHECK_ERROR_MSG(ret, "Target heap MR close failed (%s)\n", fi_strerror(errno));

#if defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING)
if (shmem_transport_ofi_mr_mode != 0) {
ret = fi_close(&shmem_transport_ofi_target_data_mrfd->fid);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see a corresponding mr_reg for shmem_transport_ofi_target_data_mrfd if only ENABLE_REMOTE_VIRTUAL_ADDRESSING is enabled...

OFI_CHECK_ERROR_MSG(ret, "Target data MR close failed (%s)\n", fi_strerror(errno));
}
#else
ret = fi_close(&shmem_transport_ofi_target_data_mrfd->fid);
OFI_CHECK_ERROR_MSG(ret, "Target data MR close failed (%s)\n", fi_strerror(errno));
#endif
Expand Down
Loading