From e0be06f49c0fbcb8ecda565defa08b317bc6fb43 Mon Sep 17 00:00:00 2001 From: "md.rahman" Date: Wed, 16 Nov 2022 14:33:10 -0800 Subject: [PATCH 1/5] Removed deprecated flag; added trial MR hint based on provider --- configure.ac | 8 +++--- src/shmem_env_defs.h | 2 -- src/transport_ofi.c | 59 +++++++++++++++++++++++++++++++------------- 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/configure.ac b/configure.ac index 99112ca9a..d93d26f32 100755 --- a/configure.ac +++ b/configure.ac @@ -194,13 +194,15 @@ AM_CONDITIONAL([HAVE_LONG_FORTRAN_HEADER], [test "$enable_long_fortran_header" = AC_ARG_ENABLE([ofi-mr], [AC_HELP_STRING([--enable-ofi-mr=MODE], - [OFI memory registration mode: basic, scalable, or rma-event (default: scalable)])]) + [OFI memory registration mode: none, basic, scalable, or rma-event (default: none)])]) -AS_IF([test -z "$enable_ofi_mr"], [enable_ofi_mr="scalable"]) +AS_IF([test -z "$enable_ofi_mr"], [enable_ofi_mr="none"]) AS_CASE([$enable_ofi_mr], + [none], + [AC_DEFINE([ENABLE_MR_NONE], [1], [If defined, the OFI transport will use MR mode based on provider])], [basic], - [], + [AC_DEFINE([ENABLE_MR_BASIC], [1], [If defined, the OFI transport will use FI_MR_BASIC])], [scalable], [AC_DEFINE([ENABLE_MR_SCALABLE], [1], [If defined, the OFI transport will use FI_MR_SCALABLE])], [rma?event], diff --git a/src/shmem_env_defs.h b/src/shmem_env_defs.h index 2ec66b89f..5cfffa065 100644 --- a/src/shmem_env_defs.h +++ b/src/shmem_env_defs.h @@ -87,8 +87,6 @@ SHMEM_INTERNAL_ENV_DEF(OFI_ATOMIC_CHECKS_WARN, bool, false, SHMEM_INTERNAL_ENV_C "Display warnings about unsupported atomic operations") SHMEM_INTERNAL_ENV_DEF(OFI_PROVIDER, string, "auto", SHMEM_INTERNAL_ENV_CAT_TRANSPORT, "Provider that should be used by the OFI transport") -SHMEM_INTERNAL_ENV_DEF(OFI_USE_PROVIDER, string, "auto", SHMEM_INTERNAL_ENV_CAT_TRANSPORT, - "Deprecated, replaced by SHMEM_OFI_PROVIDER") SHMEM_INTERNAL_ENV_DEF(OFI_FABRIC, string, "auto", SHMEM_INTERNAL_ENV_CAT_TRANSPORT, "Fabric that should be used by the OFI transport") SHMEM_INTERNAL_ENV_DEF(OFI_DOMAIN, string, "auto", SHMEM_INTERNAL_ENV_CAT_TRANSPORT, diff --git a/src/transport_ofi.c b/src/transport_ofi.c index 46a67bb19..24d587bb1 100644 --- a/src/transport_ofi.c +++ b/src/transport_ofi.c @@ -1129,6 +1129,42 @@ int allocate_fabric_resources(struct fabric_info *info) return ret; } +static inline +void select_mr_flags(struct fi_domain_attr *domain) { +#ifdef ENABLE_MR_NONE + char *ofi_provider = shmem_transport_ofi_info.prov_name; + if (0 == strcmp(ofi_provider, "cxi")) { + domain.mr_mode = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_ENDPOINT; + domain.mr_key_size = 1; + } else if (0 == strcmp(ofi_provider, "gni") || 0 == strcmp(ofi_provider, "verbs") + || 0 == strcmp(ofi_provider, "rxm") || strstr(ofi_provider, "verbs") != NULL + || strstr(ofi_provider, "rxm") != NULL) { + domain.mr_mode = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; + domain.mr_key_size = 1; + } else if (0 == strcmp(ofi_provider, "psm3") || 0 == strcmp(ofi_provider, "psm2") + || 0 == strcmp(ofi_provider, "tcp") || strstr(ofi_provider, "tcp") != NULL) { + domain.mr_mode = 0; + } else { /* unknown provider */ + domain.mr_mode = 0; + } +#else +# ifdef ENABLE_MR_SCALABLE + /* Scalable, offset-based addressing, formerly FI_MR_SCALABLE */ + domain.mr_mode = 0; +# if !defined(ENABLE_HARD_POLLING) && defined(ENABLE_MR_RMA_EVENT) + domain.mr_mode = FI_MR_RMA_EVENT; /* can support RMA_EVENT on MR */ +# endif +# else + /* Portable, absolute addressing, formerly FI_MR_BASIC */ + domain.mr_mode = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; +# endif +# if !defined(ENABLE_MR_SCALABLE) || !defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) + domain.mr_key_size = 1; /* Heap and data use different MR keys, need + at least 1 byte */ +# endif +#endif +} + static inline int query_for_fabric(struct fabric_info *info) { @@ -1157,20 +1193,9 @@ int query_for_fabric(struct fabric_info *info) hints.addr_format = FI_FORMAT_UNSPEC; domain_attr.data_progress = FI_PROGRESS_AUTO; domain_attr.resource_mgmt = FI_RM_ENABLED; -#ifdef ENABLE_MR_SCALABLE - /* Scalable, offset-based addressing, formerly FI_MR_SCALABLE */ - domain_attr.mr_mode = 0; -# if !defined(ENABLE_HARD_POLLING) && defined(ENABLE_MR_RMA_EVENT) - domain_attr.mr_mode = FI_MR_RMA_EVENT; /* can support RMA_EVENT on MR */ -# endif -#else - /* Portable, absolute addressing, formerly FI_MR_BASIC */ - domain_attr.mr_mode = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; -#endif -#if !defined(ENABLE_MR_SCALABLE) || !defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) - domain_attr.mr_key_size = 1; /* Heap and data use different MR keys, need - at least 1 byte */ -#endif + + select_mr_flags(&domain_attr); + #ifdef ENABLE_THREADS if (shmem_internal_thread_level == SHMEM_THREAD_MULTIPLE) { #ifdef USE_THREAD_COMPLETION @@ -1244,7 +1269,8 @@ int query_for_fabric(struct fabric_info *info) shmem_transport_ofi_stx_max = 0; } -#if defined(ENABLE_MR_SCALABLE) && defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) + if (info->p_info->domain_attr->mr_mode & FI_MR_SCALABLE != 0) { +#if defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) /* Only use a single MR, no keys required */ info->p_info->domain_attr->mr_key_size = 0; #else @@ -1255,6 +1281,7 @@ int query_for_fabric(struct fabric_info *info) else info->p_info->domain_attr->mr_key_size = 0; #endif + } shmem_internal_assertp(info->p_info->tx_attr->inject_size >= shmem_transport_ofi_max_buffered_send); shmem_transport_ofi_max_buffered_send = info->p_info->tx_attr->inject_size; @@ -1417,8 +1444,6 @@ int shmem_transport_init(void) if (shmem_internal_params.OFI_PROVIDER_provided) shmem_transport_ofi_info.prov_name = shmem_internal_params.OFI_PROVIDER; - else if (shmem_internal_params.OFI_USE_PROVIDER_provided) - shmem_transport_ofi_info.prov_name = shmem_internal_params.OFI_USE_PROVIDER; else shmem_transport_ofi_info.prov_name = NULL; From f3a05405c66806ce9940680449d9be50e9a1217b Mon Sep 17 00:00:00 2001 From: "md.rahman" Date: Thu, 1 Dec 2022 15:02:03 -0800 Subject: [PATCH 2/5] Initial changes to adopt provider based MR flag selection --- src/transport_ofi.c | 199 +++++++++++++++++++++----------------------- src/transport_ofi.h | 96 +++++++++++---------- 2 files changed, 144 insertions(+), 151 deletions(-) diff --git a/src/transport_ofi.c b/src/transport_ofi.c index 24d587bb1..fdf761ad6 100644 --- a/src/transport_ofi.c +++ b/src/transport_ofi.c @@ -62,14 +62,6 @@ struct fid_cq* shmem_transport_ofi_target_cq; #if ENABLE_TARGET_CNTR struct fid_cntr* shmem_transport_ofi_target_cntrfd; #endif -#ifdef ENABLE_MR_SCALABLE -#ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING -struct fid_mr* shmem_transport_ofi_target_mrfd; -#else /* !ENABLE_REMOTE_VIRTUAL_ADDRESSING */ -struct fid_mr* shmem_transport_ofi_target_heap_mrfd; -struct fid_mr* shmem_transport_ofi_target_data_mrfd; -#endif -#else /* !ENABLE_MR_SCALABLE */ struct fid_mr* shmem_transport_ofi_target_heap_mrfd; struct fid_mr* shmem_transport_ofi_target_data_mrfd; uint64_t* shmem_transport_ofi_target_heap_keys; @@ -80,7 +72,6 @@ int shmem_transport_ofi_use_absolute_address; uint8_t** shmem_transport_ofi_target_heap_addrs; uint8_t** shmem_transport_ofi_target_data_addrs; #endif /* ENABLE_REMOTE_VIRTUAL_ADDRESSING */ -#endif /* ENABLE_MR_SCALABLE */ uint64_t shmem_transport_ofi_max_poll; long shmem_transport_ofi_put_poll_limit; long shmem_transport_ofi_get_poll_limit; @@ -621,60 +612,10 @@ int bind_enable_ep_resources(shmem_transport_ctx_t *ctx) static inline -int allocate_recv_cntr_mr(void) -{ +int allocate_separate_heap_data_mr(void) { int ret = 0; uint64_t flags = 0; - /* ------------------------------------ */ - /* POST enable resources for to EP */ - /* ------------------------------------ */ - - /* since this is AFTER enable and RMA you must create memory regions for - * incoming reads/writes and outgoing non-blocking Puts, specifying entire - * VA range */ - -#if ENABLE_TARGET_CNTR - { - struct fi_cntr_attr cntr_attr = {0}; - - /* Create counter for incoming writes */ - cntr_attr.events = FI_CNTR_EVENTS_COMP; - cntr_attr.wait_obj = FI_WAIT_UNSPEC; - - ret = fi_cntr_open(shmem_transport_ofi_domainfd, &cntr_attr, - &shmem_transport_ofi_target_cntrfd, NULL); - OFI_CHECK_RETURN_STR(ret, "target CNTR open failed"); - -#ifdef ENABLE_MR_RMA_EVENT - if (shmem_transport_ofi_mr_rma_event) - flags |= FI_RMA_EVENT; -#endif /* ENABLE_MR_RMA_EVENT */ - } -#endif - -#if defined(ENABLE_MR_SCALABLE) && defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) - ret = fi_mr_reg(shmem_transport_ofi_domainfd, 0, UINT64_MAX, - FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0ULL, flags, - &shmem_transport_ofi_target_mrfd, NULL); - OFI_CHECK_RETURN_STR(ret, "target memory (all) registration failed"); - - /* Bind counter with target memory region for incoming messages */ -#if ENABLE_TARGET_CNTR - ret = fi_mr_bind(shmem_transport_ofi_target_mrfd, - &shmem_transport_ofi_target_cntrfd->fid, - FI_REMOTE_WRITE); - OFI_CHECK_RETURN_STR(ret, "target CNTR binding to MR failed"); - -#ifdef ENABLE_MR_RMA_EVENT - if (shmem_transport_ofi_mr_rma_event) { - ret = fi_mr_enable(shmem_transport_ofi_target_mrfd); - OFI_CHECK_RETURN_STR(ret, "target MR enable failed"); - } -#endif /* ENABLE_MR_RMA_EVENT */ -#endif /* ENABLE_TARGET_CNTR */ - -#else /* Register separate data and heap segments using keys 0 and 1, * respectively. In MR_BASIC_MODE, the keys are ignored and selected by * the provider. */ @@ -712,6 +653,69 @@ int allocate_recv_cntr_mr(void) } #endif /* ENABLE_MR_RMA_EVENT */ #endif /* ENABLE_TARGET_CNTR */ + + return ret; +} + + +static inline +int allocate_recv_cntr_mr(void) +{ + int ret = 0; + + /* ------------------------------------ */ + /* POST enable resources for to EP */ + /* ------------------------------------ */ + + /* since this is AFTER enable and RMA you must create memory regions for + * incoming reads/writes and outgoing non-blocking Puts, specifying entire + * VA range */ + +#if ENABLE_TARGET_CNTR + { + struct fi_cntr_attr cntr_attr = {0}; + + /* Create counter for incoming writes */ + cntr_attr.events = FI_CNTR_EVENTS_COMP; + cntr_attr.wait_obj = FI_WAIT_UNSPEC; + + ret = fi_cntr_open(shmem_transport_ofi_domainfd, &cntr_attr, + &shmem_transport_ofi_target_cntrfd, NULL); + OFI_CHECK_RETURN_STR(ret, "target CNTR open failed"); + +#ifdef ENABLE_MR_RMA_EVENT + if (shmem_transport_ofi_mr_rma_event) + flags |= FI_RMA_EVENT; +#endif /* ENABLE_MR_RMA_EVENT */ + } +#endif + +#if defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) + if (shmem_transport_ofi_mr_mode == 0) { + ret = fi_mr_reg(shmem_transport_ofi_domainfd, 0, UINT64_MAX, + FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0ULL, flags, + &shmem_transport_ofi_target_heap_mrfd, NULL); + OFI_CHECK_RETURN_STR(ret, "target memory (all) registration failed"); + + /* Bind counter with target memory region for incoming messages */ +#if ENABLE_TARGET_CNTR + ret = fi_mr_bind(shmem_transport_ofi_target_heap_mrfd, + &shmem_transport_ofi_target_cntrfd->fid, + FI_REMOTE_WRITE); + OFI_CHECK_RETURN_STR(ret, "target CNTR binding to MR failed"); + +#ifdef ENABLE_MR_RMA_EVENT + if (shmem_transport_ofi_mr_rma_event) { + ret = fi_mr_enable(shmem_transport_ofi_target_heap_mrfd); + OFI_CHECK_RETURN_STR(ret, "target MR enable failed"); + } +#endif /* ENABLE_MR_RMA_EVENT */ +#endif /* ENABLE_TARGET_CNTR */ + } else { + ret = allocate_separate_heap_data_mr(); + } +#else + ret = allocate_separate_heap_data_mr(); #endif return ret; @@ -720,7 +724,7 @@ int allocate_recv_cntr_mr(void) static int publish_mr_info(void) { -#ifndef ENABLE_MR_SCALABLE + if (shmem_transport_ofi_mr_mode != 0) { int err; uint64_t heap_key, data_key; @@ -744,16 +748,15 @@ int publish_mr_info(void) RAISE_WARN_STR("Put of data segment key to runtime KVS failed"); return 1; } - } + #ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING - if (shmem_transport_ofi_info.p_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) - shmem_transport_ofi_use_absolute_address = 1; - else - shmem_transport_ofi_use_absolute_address = 0; + if (shmem_transport_ofi_info.p_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) + shmem_transport_ofi_use_absolute_address = 1; + else + shmem_transport_ofi_use_absolute_address = 0; #else /* !ENABLE_REMOTE_VIRTUAL_ADDRESSING */ - { - int err; + void *heap_base, *data_base; if (shmem_transport_ofi_info.p_info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) { @@ -775,9 +778,9 @@ int publish_mr_info(void) RAISE_WARN_STR("Put of data segment address to runtime KVS failed"); return 1; } - } + #endif /* ENABLE_REMOTE_VIRTUAL_ADDRESSING */ -#endif /* !ENABLE_MR_SCALABLE */ + } return 0; } @@ -785,7 +788,7 @@ int publish_mr_info(void) static int populate_mr_tables(void) { -#ifndef ENABLE_MR_SCALABLE + if (shmem_transport_ofi_mr_mode != 0) { int i, err; @@ -818,11 +821,9 @@ int populate_mr_tables(void) return 1; } } - } + #ifndef ENABLE_REMOTE_VIRTUAL_ADDRESSING - { - int i, err; shmem_transport_ofi_target_heap_addrs = malloc(sizeof(uint8_t*) * shmem_internal_num_pes); if (NULL == shmem_transport_ofi_target_heap_addrs) { @@ -853,9 +854,9 @@ int populate_mr_tables(void) return 1; } } - } + #endif /* ENABLE_REMOTE_VIRTUAL_ADDRESSING */ -#endif /* !ENABLE_MR_SCALABLE */ + } return 0; } @@ -1130,37 +1131,33 @@ int allocate_fabric_resources(struct fabric_info *info) } static inline -void select_mr_flags(struct fi_domain_attr *domain) { +int get_mr_flag(void) { #ifdef ENABLE_MR_NONE char *ofi_provider = shmem_transport_ofi_info.prov_name; + if (ofi_provider == NULL) return 0; + if (0 == strcmp(ofi_provider, "cxi")) { - domain.mr_mode = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_ENDPOINT; - domain.mr_key_size = 1; + return FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_ENDPOINT; } else if (0 == strcmp(ofi_provider, "gni") || 0 == strcmp(ofi_provider, "verbs") || 0 == strcmp(ofi_provider, "rxm") || strstr(ofi_provider, "verbs") != NULL || strstr(ofi_provider, "rxm") != NULL) { - domain.mr_mode = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; - domain.mr_key_size = 1; + return FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; } else if (0 == strcmp(ofi_provider, "psm3") || 0 == strcmp(ofi_provider, "psm2") || 0 == strcmp(ofi_provider, "tcp") || strstr(ofi_provider, "tcp") != NULL) { - domain.mr_mode = 0; + return 0; } else { /* unknown provider */ - domain.mr_mode = 0; + return 0; } #else # ifdef ENABLE_MR_SCALABLE /* Scalable, offset-based addressing, formerly FI_MR_SCALABLE */ - domain.mr_mode = 0; + return 0; # if !defined(ENABLE_HARD_POLLING) && defined(ENABLE_MR_RMA_EVENT) - domain.mr_mode = FI_MR_RMA_EVENT; /* can support RMA_EVENT on MR */ + return FI_MR_RMA_EVENT; /* can support RMA_EVENT on MR */ # endif # else /* Portable, absolute addressing, formerly FI_MR_BASIC */ - domain.mr_mode = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; -# endif -# if !defined(ENABLE_MR_SCALABLE) || !defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) - domain.mr_key_size = 1; /* Heap and data use different MR keys, need - at least 1 byte */ + return FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; # endif #endif } @@ -1194,7 +1191,10 @@ int query_for_fabric(struct fabric_info *info) domain_attr.data_progress = FI_PROGRESS_AUTO; domain_attr.resource_mgmt = FI_RM_ENABLED; - select_mr_flags(&domain_attr); + shmem_transport_ofi_mr_mode = get_mr_flag(); + domain_attr.mr_mode = shmem_transport_ofi_mr_mode; + if (domain_attr.mr_mode != 0) + domain_attr.mr_key_size = 1; #ifdef ENABLE_THREADS if (shmem_internal_thread_level == SHMEM_THREAD_MULTIPLE) { @@ -1269,19 +1269,12 @@ int query_for_fabric(struct fabric_info *info) shmem_transport_ofi_stx_max = 0; } - if (info->p_info->domain_attr->mr_mode & FI_MR_SCALABLE != 0) { -#if defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) - /* Only use a single MR, no keys required */ - info->p_info->domain_attr->mr_key_size = 0; -#else /* Heap and data use different MR keys, need at least 1 byte of key space * if using provider selected keys */ if (info->p_info->domain_attr->mr_mode & FI_MR_PROV_KEY) info->p_info->domain_attr->mr_key_size = 1; else info->p_info->domain_attr->mr_key_size = 0; -#endif - } shmem_internal_assertp(info->p_info->tx_attr->inject_size >= shmem_transport_ofi_max_buffered_send); shmem_transport_ofi_max_buffered_send = info->p_info->tx_attr->inject_size; @@ -1813,13 +1806,15 @@ int shmem_transport_fini(void) ret = fi_close(&shmem_transport_ofi_target_cq->fid); OFI_CHECK_ERROR_MSG(ret, "Target CQ close failed (%s)\n", fi_strerror(errno)); -#if defined(ENABLE_MR_SCALABLE) && defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) - ret = fi_close(&shmem_transport_ofi_target_mrfd->fid); - OFI_CHECK_ERROR_MSG(ret, "Target MR close failed (%s)\n", fi_strerror(errno)); -#else ret = fi_close(&shmem_transport_ofi_target_heap_mrfd->fid); OFI_CHECK_ERROR_MSG(ret, "Target heap MR close failed (%s)\n", fi_strerror(errno)); +#if defined(ENABLE_REMOTE_VIRTUAL_ADDRESSING) + if (shmem_transport_ofi_mr_mode != 0) { + ret = fi_close(&shmem_transport_ofi_target_data_mrfd->fid); + OFI_CHECK_ERROR_MSG(ret, "Target data MR close failed (%s)\n", fi_strerror(errno)); + } +#else ret = fi_close(&shmem_transport_ofi_target_data_mrfd->fid); OFI_CHECK_ERROR_MSG(ret, "Target data MR close failed (%s)\n", fi_strerror(errno)); #endif diff --git a/src/transport_ofi.h b/src/transport_ofi.h index 099c6468c..b8bd55313 100644 --- a/src/transport_ofi.h +++ b/src/transport_ofi.h @@ -46,7 +46,7 @@ extern struct fid_cntr* shmem_transport_ofi_target_cntrfd; #if ENABLE_MANUAL_PROGRESS extern struct fid_cq* shmem_transport_ofi_target_cq; #endif -#ifndef ENABLE_MR_SCALABLE + extern uint64_t* shmem_transport_ofi_target_heap_keys; extern uint64_t* shmem_transport_ofi_target_data_keys; #ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING @@ -55,7 +55,6 @@ extern int shmem_transport_ofi_use_absolute_address extern uint8_t** shmem_transport_ofi_target_heap_addrs; extern uint8_t** shmem_transport_ofi_target_data_addrs; #endif /* ENABLE_REMOTE_VIRTUAL_ADDRESSING */ -#endif /* ENABLE_MR_SCALABLE */ extern uint64_t shmem_transport_ofi_max_poll; extern long shmem_transport_ofi_put_poll_limit; extern long shmem_transport_ofi_get_poll_limit; @@ -66,6 +65,8 @@ extern long shmem_transport_ofi_max_bounce_buffers; extern pthread_mutex_t shmem_transport_ofi_progress_lock; +static int shmem_transport_ofi_mr_mode = 0; + #ifndef MIN #define MIN(a,b) (((a)<(b))?(a):(b)) #endif @@ -118,73 +119,70 @@ extern pthread_mutex_t shmem_transport_ofi_progress_lock; } while (0) -#ifdef ENABLE_MR_SCALABLE static inline void shmem_transport_ofi_get_mr(const void *addr, int dest_pe, uint8_t **mr_addr, uint64_t *key) { + + if (shmem_transport_ofi_mr_mode == 0) { #ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING - *key = 0; - *mr_addr = (uint8_t*) addr; + *key = 0; + *mr_addr = (uint8_t*) addr; #else - if ((void*) addr >= shmem_internal_data_base && - (uint8_t*) addr < (uint8_t*) shmem_internal_data_base + shmem_internal_data_length) { + if ((void*) addr >= shmem_internal_data_base && + (uint8_t*) addr < (uint8_t*) shmem_internal_data_base + shmem_internal_data_length) { - *key = 0; - *mr_addr = (uint8_t*) ((uint8_t *) addr - (uint8_t *) shmem_internal_data_base); + *key = 0; + *mr_addr = (uint8_t*) ((uint8_t *) addr - (uint8_t *) shmem_internal_data_base); - } else if ((void*) addr >= shmem_internal_heap_base && - (uint8_t*) addr < (uint8_t*) shmem_internal_heap_base + shmem_internal_heap_length) { + } else if ((void*) addr >= shmem_internal_heap_base && + (uint8_t*) addr < (uint8_t*) shmem_internal_heap_base + shmem_internal_heap_length) { - *key = 1; - *mr_addr = (uint8_t*) ((uint8_t *) addr - (uint8_t *) shmem_internal_heap_base); - } else { - *key = 0; - *mr_addr = NULL; - RAISE_ERROR_MSG("address (%p) outside of symmetric areas\n", addr); - } + *key = 1; + *mr_addr = (uint8_t*) ((uint8_t *) addr - (uint8_t *) shmem_internal_heap_base); + } else { + *key = 0; + *mr_addr = NULL; + RAISE_ERROR_MSG("address (%p) outside of symmetric areas\n", addr); + } #endif /* ENABLE_REMOTE_VIRTUAL_ADDRESSING */ -} - -#else -static inline -void shmem_transport_ofi_get_mr(const void *addr, int dest_pe, - uint8_t **mr_addr, uint64_t *key) { - if ((void*) addr >= shmem_internal_data_base && - (uint8_t*) addr < (uint8_t*) shmem_internal_data_base + shmem_internal_data_length) { - *key = shmem_transport_ofi_target_data_keys[dest_pe]; + } + else { + if ((void*) addr >= shmem_internal_data_base && + (uint8_t*) addr < (uint8_t*) shmem_internal_data_base + shmem_internal_data_length) { + *key = shmem_transport_ofi_target_data_keys[dest_pe]; #ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING - if (shmem_transport_ofi_use_absolute_address) - *mr_addr = (uint8_t *) addr; - else - *mr_addr = (void *) ((uint8_t *) addr - (uint8_t *) shmem_internal_data_base); + if (shmem_transport_ofi_use_absolute_address) + *mr_addr = (uint8_t *) addr; + else + *mr_addr = (void *) ((uint8_t *) addr - (uint8_t *) shmem_internal_data_base); #else - *mr_addr = shmem_transport_ofi_target_data_addrs[dest_pe] + - ((uint8_t *) addr - (uint8_t *) shmem_internal_data_base); + *mr_addr = shmem_transport_ofi_target_data_addrs[dest_pe] + + ((uint8_t *) addr - (uint8_t *) shmem_internal_data_base); #endif - } + } - else if ((void*) addr >= shmem_internal_heap_base && - (uint8_t*) addr < (uint8_t*) shmem_internal_heap_base + shmem_internal_heap_length) { - *key = shmem_transport_ofi_target_heap_keys[dest_pe]; + else if ((void*) addr >= shmem_internal_heap_base && + (uint8_t*) addr < (uint8_t*) shmem_internal_heap_base + shmem_internal_heap_length) { + *key = shmem_transport_ofi_target_heap_keys[dest_pe]; #ifdef ENABLE_REMOTE_VIRTUAL_ADDRESSING - if (shmem_transport_ofi_use_absolute_address) - *mr_addr = (uint8_t *) addr; - else - *mr_addr = (void *) ((uint8_t *) addr - (uint8_t *) shmem_internal_heap_base); + if (shmem_transport_ofi_use_absolute_address) + *mr_addr = (uint8_t *) addr; + else + *mr_addr = (void *) ((uint8_t *) addr - (uint8_t *) shmem_internal_heap_base); #else - *mr_addr = shmem_transport_ofi_target_heap_addrs[dest_pe] + - ((uint8_t *) addr - (uint8_t *) shmem_internal_heap_base); + *mr_addr = shmem_transport_ofi_target_heap_addrs[dest_pe] + + ((uint8_t *) addr - (uint8_t *) shmem_internal_heap_base); #endif - } + } - else { - *key = -1; - *mr_addr = NULL; - RAISE_ERROR_MSG("address (%p) outside of symmetric areas\n", addr); + else { + *key = -1; + *mr_addr = NULL; + RAISE_ERROR_MSG("address (%p) outside of symmetric areas\n", addr); + } } } -#endif /* Datatypes */ extern int shmem_transport_dtype_table[]; From beff73ed905bb443d6a35e958c8d1630ca934118 Mon Sep 17 00:00:00 2001 From: "md.rahman" Date: Thu, 1 Dec 2022 15:20:53 -0800 Subject: [PATCH 3/5] ofi: added the missing flags declaration --- src/transport_ofi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transport_ofi.c b/src/transport_ofi.c index fdf761ad6..4df3a19ec 100644 --- a/src/transport_ofi.c +++ b/src/transport_ofi.c @@ -662,6 +662,7 @@ static inline int allocate_recv_cntr_mr(void) { int ret = 0; + uint64_t flags = 0; /* ------------------------------------ */ /* POST enable resources for to EP */ From 52b95eff3264d6d736dbc14f7ccc6d83662c8967 Mon Sep 17 00:00:00 2001 From: "md.rahman" Date: Fri, 2 Dec 2022 08:46:19 -0800 Subject: [PATCH 4/5] ofi: added fi_provider env variable, moved set_mr_flag to header --- src/transport_ofi.c | 57 ++++++++++----------------------------- src/transport_ofi.h | 65 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 44 deletions(-) diff --git a/src/transport_ofi.c b/src/transport_ofi.c index 4df3a19ec..3326e75b8 100644 --- a/src/transport_ofi.c +++ b/src/transport_ofi.c @@ -612,22 +612,21 @@ int bind_enable_ep_resources(shmem_transport_ctx_t *ctx) static inline -int allocate_separate_heap_data_mr(void) { +int allocate_separate_heap_data_mr(uint64_t *flags) { int ret = 0; - uint64_t flags = 0; /* Register separate data and heap segments using keys 0 and 1, * respectively. In MR_BASIC_MODE, the keys are ignored and selected by * the provider. */ ret = fi_mr_reg(shmem_transport_ofi_domainfd, shmem_internal_heap_base, shmem_internal_heap_length, - FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 1ULL, flags, + FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 1ULL, *flags, &shmem_transport_ofi_target_heap_mrfd, NULL); OFI_CHECK_RETURN_STR(ret, "target memory (heap) registration failed"); ret = fi_mr_reg(shmem_transport_ofi_domainfd, shmem_internal_data_base, shmem_internal_data_length, - FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0ULL, flags, + FI_REMOTE_READ | FI_REMOTE_WRITE, 0, 0ULL, *flags, &shmem_transport_ofi_target_data_mrfd, NULL); OFI_CHECK_RETURN_STR(ret, "target memory (data) registration failed"); @@ -713,10 +712,10 @@ int allocate_recv_cntr_mr(void) #endif /* ENABLE_MR_RMA_EVENT */ #endif /* ENABLE_TARGET_CNTR */ } else { - ret = allocate_separate_heap_data_mr(); + ret = allocate_separate_heap_data_mr(&flags); } #else - ret = allocate_separate_heap_data_mr(); + ret = allocate_separate_heap_data_mr(&flags); #endif return ret; @@ -1131,38 +1130,6 @@ int allocate_fabric_resources(struct fabric_info *info) return ret; } -static inline -int get_mr_flag(void) { -#ifdef ENABLE_MR_NONE - char *ofi_provider = shmem_transport_ofi_info.prov_name; - if (ofi_provider == NULL) return 0; - - if (0 == strcmp(ofi_provider, "cxi")) { - return FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_ENDPOINT; - } else if (0 == strcmp(ofi_provider, "gni") || 0 == strcmp(ofi_provider, "verbs") - || 0 == strcmp(ofi_provider, "rxm") || strstr(ofi_provider, "verbs") != NULL - || strstr(ofi_provider, "rxm") != NULL) { - return FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; - } else if (0 == strcmp(ofi_provider, "psm3") || 0 == strcmp(ofi_provider, "psm2") - || 0 == strcmp(ofi_provider, "tcp") || strstr(ofi_provider, "tcp") != NULL) { - return 0; - } else { /* unknown provider */ - return 0; - } -#else -# ifdef ENABLE_MR_SCALABLE - /* Scalable, offset-based addressing, formerly FI_MR_SCALABLE */ - return 0; -# if !defined(ENABLE_HARD_POLLING) && defined(ENABLE_MR_RMA_EVENT) - return FI_MR_RMA_EVENT; /* can support RMA_EVENT on MR */ -# endif -# else - /* Portable, absolute addressing, formerly FI_MR_BASIC */ - return FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; -# endif -#endif -} - static inline int query_for_fabric(struct fabric_info *info) { @@ -1192,10 +1159,8 @@ int query_for_fabric(struct fabric_info *info) domain_attr.data_progress = FI_PROGRESS_AUTO; domain_attr.resource_mgmt = FI_RM_ENABLED; - shmem_transport_ofi_mr_mode = get_mr_flag(); - domain_attr.mr_mode = shmem_transport_ofi_mr_mode; - if (domain_attr.mr_mode != 0) - domain_attr.mr_key_size = 1; + shmem_transport_ofi_mr_mode = set_mr_flag(shmem_transport_ofi_info.prov_name, + &domain_attr); #ifdef ENABLE_THREADS if (shmem_internal_thread_level == SHMEM_THREAD_MULTIPLE) { @@ -1438,8 +1403,14 @@ int shmem_transport_init(void) if (shmem_internal_params.OFI_PROVIDER_provided) shmem_transport_ofi_info.prov_name = shmem_internal_params.OFI_PROVIDER; - else + else { shmem_transport_ofi_info.prov_name = NULL; + char *fi_provider_provided = getenv("FI_PROVIDER"); + if (fi_provider_provided != NULL) + shmem_transport_ofi_info.prov_name = fi_provider_provided; + else + shmem_transport_ofi_info.prov_name = NULL; + } if (shmem_internal_params.OFI_FABRIC_provided) shmem_transport_ofi_info.fabric_name = shmem_internal_params.OFI_FABRIC; diff --git a/src/transport_ofi.h b/src/transport_ofi.h index b8bd55313..f1e800ca4 100644 --- a/src/transport_ofi.h +++ b/src/transport_ofi.h @@ -65,7 +65,7 @@ extern long shmem_transport_ofi_max_bounce_buffers; extern pthread_mutex_t shmem_transport_ofi_progress_lock; -static int shmem_transport_ofi_mr_mode = 0; +static uint64_t shmem_transport_ofi_mr_mode = 0; #ifndef MIN #define MIN(a,b) (((a)<(b))?(a):(b)) @@ -119,6 +119,69 @@ static int shmem_transport_ofi_mr_mode = 0; } while (0) +/* This helper routine is used to set the appropriate flags for + * MR mode w.r.t. provider in use. This can be extended in the + * future to handle other flags chosen by the implementation */ + +static inline +uint64_t set_mr_flag(char *ofi_provider, struct fi_domain_attr *domain_attr) { + uint64_t mr_flags = 0; + +#ifdef ENABLE_MR_NONE + if (ofi_provider == NULL) { + domain_attr->mr_mode = 0; + return 0; + } + DEBUG_MSG("Found OFI provider: %s\n", ofi_provider); + + if (0 == strcmp(ofi_provider, "cxi")) { + + mr_flags = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_ENDPOINT; + + } else if (0 == strcmp(ofi_provider, "gni")) { + + mr_flags = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; + + } else if (0 == strcmp(ofi_provider, "verbs") || strstr(ofi_provider, "verbs") != NULL) { + + mr_flags = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; + + } else if (0 == strcmp(ofi_provider, "psm3")) { + + mr_flags = 0; + + } else if (0 == strcmp(ofi_provider, "psm2")) { + + mr_flags = 0; + + } else if (0 == strcmp(ofi_provider, "tcp") || strstr(ofi_provider, "tcp") != NULL) { + + mr_flags = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; + + } else { /* unknown provider */ + + mr_flags = 0; + + } +#else +# ifdef ENABLE_MR_SCALABLE + /* Scalable, offset-based addressing, formerly FI_MR_SCALABLE */ + mr_flags = 0; +# if !defined(ENABLE_HARD_POLLING) && defined(ENABLE_MR_RMA_EVENT) + mr_flags = FI_MR_RMA_EVENT; /* can support RMA_EVENT on MR */ +# endif +# else + /* Portable, absolute addressing, formerly FI_MR_BASIC */ + mr_flags = FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY; +# endif +#endif + + domain_attr->mr_mode = mr_flags; + if (mr_flags) domain_attr->mr_key_size = 1; + + return mr_flags; +} + static inline void shmem_transport_ofi_get_mr(const void *addr, int dest_pe, uint8_t **mr_addr, uint64_t *key) { From 5d0503207f264d0950b587e89f0bd51fcd1058ed Mon Sep 17 00:00:00 2001 From: "md.rahman" Date: Fri, 2 Dec 2022 08:51:07 -0800 Subject: [PATCH 5/5] ci: test added for mr-scalable --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 949110e1d..ff7a10299 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,6 +45,9 @@ jobs: sos_config: --enable-ofi-mr=basic --enable-av-map --disable-cxx --enable-memcpy --enable-pmi-simple libfabric_version: v1.13.x + - config_name: MR-Scalable + sos_config: --enable-ofi-mr=scalable --enable-pmi-simple + libfabric_version: v1.13.x - config_name: PMI MPI sos_config: --disable-fortran --enable-pmi-mpi CC=mpicc libfabric_version: v1.13.x