diff --git a/src/ucp/core/ucp_ep.c b/src/ucp/core/ucp_ep.c index d7bba097d89..7288e5199c3 100644 --- a/src/ucp/core/ucp_ep.c +++ b/src/ucp/core/ucp_ep.c @@ -826,6 +826,7 @@ ucp_ep_create_to_worker_addr(ucp_worker_h worker, ucp_tl_bitmap_t ep_tl_bitmap; ucs_status_t status; ucp_ep_h ep; + int is_am_replaced; /* allocate endpoint */ status = ucp_ep_create_base(worker, ep_init_flags, remote_address->name, @@ -836,7 +837,8 @@ ucp_ep_create_to_worker_addr(ucp_worker_h worker, /* initialize transport endpoints */ status = ucp_wireup_init_lanes(ep, ep_init_flags, local_tl_bitmap, - remote_address, addr_indices); + remote_address, addr_indices, + &is_am_replaced); if (status != UCS_OK) { goto err_delete; } @@ -1859,10 +1861,9 @@ int ucp_ep_config_lane_is_peer_match(const ucp_ep_config_key_t *key1, config_lane2->dst_md_index); } -static ucp_lane_index_t -ucp_ep_config_find_match_lane(const ucp_ep_config_key_t *key1, - ucp_lane_index_t lane1, - const ucp_ep_config_key_t *key2) +ucp_lane_index_t ucp_ep_config_find_match_lane(const ucp_ep_config_key_t *key1, + const ucp_ep_config_key_t *key2, + ucp_lane_index_t lane1) { ucp_lane_index_t lane_idx; @@ -1892,7 +1893,7 @@ ucp_ep_config_find_reusable_lane(const ucp_ep_config_key_t *key1, return key2->cm_lane; } - new_lane = ucp_ep_config_find_match_lane(key1, old_lane, key2); + new_lane = ucp_ep_config_find_match_lane(key1, key2, old_lane); if (new_lane == UCP_NULL_LANE) { /* No matching lane was found */ return UCP_NULL_LANE; @@ -1936,9 +1937,9 @@ void ucp_ep_config_lanes_intersect(const ucp_ep_config_key_t *key1, } } -static int ucp_ep_config_lane_is_equal(const ucp_ep_config_key_t *key1, - const ucp_ep_config_key_t *key2, - ucp_lane_index_t lane) +int ucp_ep_config_lane_is_equal(const ucp_ep_config_key_t *key1, + const ucp_ep_config_key_t *key2, + ucp_lane_index_t lane) { const ucp_ep_config_key_lane_t *config_lane1 = &key1->lanes[lane]; const ucp_ep_config_key_lane_t *config_lane2 = &key2->lanes[lane]; diff --git a/src/ucp/core/ucp_ep.h b/src/ucp/core/ucp_ep.h index 50ff07e6cf9..0ddb5c7a657 100644 --- a/src/ucp/core/ucp_ep.h +++ b/src/ucp/core/ucp_ep.h @@ -749,6 +749,10 @@ int ucp_ep_config_lane_is_peer_match(const ucp_ep_config_key_t *key1, const ucp_ep_config_key_t *key2, ucp_lane_index_t lane2); +ucp_lane_index_t ucp_ep_config_find_match_lane(const ucp_ep_config_key_t *key1, + const ucp_ep_config_key_t *key2, + ucp_lane_index_t lane1); + void ucp_ep_config_lanes_intersect(const ucp_ep_config_key_t *key1, const ucp_ep_config_key_t *key2, const ucp_ep_h ep, @@ -756,6 +760,10 @@ void ucp_ep_config_lanes_intersect(const ucp_ep_config_key_t *key1, const unsigned *addr_indices, ucp_lane_index_t *lane_map); +int ucp_ep_config_lane_is_equal(const ucp_ep_config_key_t *key1, + const ucp_ep_config_key_t *key2, + ucp_lane_index_t lane); + int ucp_ep_config_is_equal(const ucp_ep_config_key_t *key1, const ucp_ep_config_key_t *key2); diff --git a/src/ucp/wireup/wireup.c b/src/ucp/wireup/wireup.c index 3fa73424c68..c313c74b239 100644 --- a/src/ucp/wireup/wireup.c +++ b/src/ucp/wireup/wireup.c @@ -566,6 +566,7 @@ ucp_wireup_process_pre_request(ucp_worker_h worker, ucp_ep_h ep, UCP_EP_INIT_CM_WIREUP_CLIENT | ucp_ep_err_mode_init_flags(msg->err_mode); unsigned addr_indices[UCP_MAX_LANES]; + int is_am_replaced; ucs_status_t status; UCP_WIREUP_MSG_CHECK(msg, ep, UCP_WIREUP_MSG_PRE_REQUEST); @@ -589,7 +590,8 @@ ucp_wireup_process_pre_request(ucp_worker_h worker, ucp_ep_h ep, /* initialize transport endpoints */ status = ucp_wireup_init_lanes(ep, ep_init_flags, &ucp_tl_bitmap_max, - remote_address, addr_indices); + remote_address, addr_indices, + &is_am_replaced); if (status != UCS_OK) { goto err_ep_set_failed; } @@ -613,7 +615,7 @@ ucp_wireup_process_request(ucp_worker_h worker, ucp_ep_h ep, ucp_lane_index_t lanes2remote[UCP_MAX_LANES]; unsigned addr_indices[UCP_MAX_LANES]; ucs_status_t status; - int has_cm_lane; + int has_cm_lane, is_am_replaced; UCP_WIREUP_MSG_CHECK(msg, ep, UCP_WIREUP_MSG_REQUEST); ucs_trace("got wireup request from 0x%"PRIx64" src_ep_id 0x%"PRIx64 @@ -684,7 +686,8 @@ ucp_wireup_process_request(ucp_worker_h worker, ucp_ep_h ep, /* Initialize lanes (possible destroy existing lanes) */ status = ucp_wireup_init_lanes(ep, ep_init_flags, &ucp_tl_bitmap_max, - remote_address, addr_indices); + remote_address, addr_indices, + &is_am_replaced); if (status != UCS_OK) { goto err_set_ep_failed; } @@ -696,8 +699,11 @@ ucp_wireup_process_request(ucp_worker_h worker, ucp_ep_h ep, */ send_reply = /* Always send the reply in case of CM, the client's EP has to * be marked as REMOTE_CONNECTED */ - has_cm_lane || (msg->dst_ep_id == UCS_PTR_MAP_KEY_INVALID) || - ucp_ep_config(ep)->p2p_lanes; + has_cm_lane || (msg->dst_ep_id == UCS_PTR_MAP_KEY_INVALID) || + ucp_ep_config(ep)->p2p_lanes || + /* Ensure AM messages sending order is kept by sending wireup + * reply, in case old lane is replaced */ + is_am_replaced; /* Connect p2p addresses to remote endpoint, if at least one is true: */ if (/* - EP has not been connected locally yet */ @@ -720,7 +726,10 @@ ucp_wireup_process_request(ucp_worker_h worker, ucp_ep_h ep, /* don't mark as connected to remote now in case of CM, since it destroys * CM wireup EP (if it is hidden in the CM lane) that is used for sending * WIREUP MSGs */ - if (!ucp_ep_config(ep)->p2p_lanes && !has_cm_lane) { + if (!ucp_ep_config(ep)->p2p_lanes && !has_cm_lane && + /* Ensure AM messages receiving order is kept by waiting for a wireup + * ack, in case old lane is replaced */ + !is_am_replaced) { /* mark the endpoint as connected to remote */ ucp_wireup_remote_connected(ep); } @@ -760,7 +769,6 @@ ucp_wireup_process_reply(ucp_worker_h worker, ucp_ep_h ep, const ucp_unpacked_address_t *remote_address) { ucs_status_t status; - int ack; UCP_WIREUP_MSG_CHECK(msg, ep, UCP_WIREUP_MSG_REPLY); ucs_trace("ep %p: got wireup reply src_ep_id 0x%"PRIx64 @@ -786,19 +794,14 @@ ucp_wireup_process_reply(ucp_worker_h worker, ucp_ep_h ep, } ucp_ep_update_flags(ep, UCP_EP_FLAG_LOCAL_CONNECTED, 0); - ack = 1; - } else { - ack = 0; } ucp_wireup_remote_connected(ep); - if (ack) { - /* Send `UCP_WIREUP_MSG_ACK` from progress function - * to avoid calling UCT routines from an async thread */ - ucs_callbackq_add_oneshot(&worker->uct->progress_q, ep, - ucp_wireup_send_msg_ack, ep); - } + /* Send `UCP_WIREUP_MSG_ACK` from progress function + * to avoid calling UCT routines from an async thread */ + ucs_callbackq_add_oneshot(&worker->uct->progress_q, ep, + ucp_wireup_send_msg_ack, ep); } static void ucp_ep_removed_flush_completion(ucp_request_t *req) @@ -835,6 +838,7 @@ ucp_wireup_send_ep_removed(ucp_worker_h worker, const ucp_wireup_msg_t *msg, ucp_ep_h reply_ep; unsigned addr_indices[UCP_MAX_LANES]; ucs_status_ptr_t req; + int is_am_replaced; /* If endpoint does not exist - create a temporary endpoint to send a * UCP_WIREUP_MSG_EP_REMOVED reply */ @@ -847,7 +851,8 @@ ucp_wireup_send_ep_removed(ucp_worker_h worker, const ucp_wireup_msg_t *msg, /* Initialize lanes of the reply EP */ status = ucp_wireup_init_lanes(reply_ep, ep_init_flags, &ucp_tl_bitmap_max, - remote_address, addr_indices); + remote_address, addr_indices, + &is_am_replaced); if (status != UCS_OK) { goto out_delete_ep; } @@ -1351,28 +1356,78 @@ static void ucp_wireup_discard_uct_eps(ucp_ep_h ep, uct_ep_h *uct_eps, } } +/* Checks if AM lane was replaced after being operational (wireup process + * completed) */ +static int +ucp_wireup_is_am_lane_replaced(ucp_ep_h ep, + const ucp_lane_index_t *reuse_lane_map) +{ + ucp_lane_index_t am_lane = ucp_ep_get_am_lane(ep); + + return !ucp_ep_has_cm_lane(ep) && + /* Verify AM lane exists */ + (am_lane != UCP_NULL_LANE) && + /* Lane is not being reused */ + (reuse_lane_map[am_lane] == UCP_NULL_LANE) && + /* Lane is not yet operational */ + !ucp_wireup_ep_test(ucp_ep_get_lane(ep, am_lane)); +} + static int ucp_wireup_check_is_reconfigurable(ucp_ep_h ep, const ucp_ep_config_key_t *new_key, const ucp_unpacked_address_t *remote_address, const unsigned *addr_indices) { - ucp_lane_index_t lane; + ucp_lane_index_t lane, wireup_lane, reuse_lane_map[UCP_MAX_LANES]; + const ucp_ep_config_key_t *old_key; if ((ep->cfg_index == UCP_WORKER_CFG_INDEX_NULL) || ucp_ep_has_cm_lane(ep)) { return 1; } - /* TODO: Support reconfiguration when lanes are created without a wireup_ep - * wrapper */ - for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { - if (!ucp_wireup_ep_test(ucp_ep_get_lane(ep, lane))) { + old_key = &ucp_ep_config(ep)->key; + + /* TODO: 1) Support lanes which are connected to the same remote MD, but + * different remote sys_dev (eg. TCP). */ + for (lane = 0; lane < old_key->num_lanes; ++lane) { + if ((ucp_ep_config_find_match_lane(old_key, new_key, lane) != + UCP_NULL_LANE) && + !ucp_ep_config_lane_is_equal(old_key, new_key, lane)) { return 0; } } - return 1; + ucp_ep_config_lanes_intersect(old_key, new_key, ep, remote_address, + addr_indices, reuse_lane_map); + wireup_lane = ucp_wireup_get_msg_lane(ep, UCP_WIREUP_MSG_REQUEST); + + /* TODO: 2) Support reconfiguration for separated wireup and AM lanes + * during wireup process (request sent). */ + return !(ep->flags & UCP_EP_FLAG_CONNECT_REQ_QUEUED) || + (ucp_ep_get_am_lane(ep) == wireup_lane) || + !ucp_wireup_is_am_lane_replaced(ep, reuse_lane_map); +} + +static int ucp_wireup_should_reconfigure(ucp_ep_h ep, + const ucp_lane_index_t *reuse_lane_map, + ucp_lane_index_t num_lanes) +{ + ucp_lane_index_t lane; + + if (ucp_ep_has_cm_lane(ep) || (ucp_ep_num_lanes(ep) != num_lanes)) { + return 1; + } + + /* Check whether all lanes are reused */ + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + if (reuse_lane_map[lane] == UCP_NULL_LANE) { + return 1; + } + } + + return 0; } static ucp_lane_index_t @@ -1414,41 +1469,46 @@ ucp_wireup_replace_wireup_msg_lane(ucp_ep_h ep, ucp_ep_config_key_t *key, const ucp_lane_index_t *reuse_lane_map) { uct_ep_h uct_ep = NULL; - ucp_lane_index_t old_lane, new_wireup_lane; - ucp_wireup_ep_t *old_wireup_ep, *new_wireup_ep; + int am_replaced = ucp_wireup_is_am_lane_replaced(ep, reuse_lane_map); + ucp_lane_index_t old_lane, new_wireup_lane, old_wireup_lane; + ucp_lane_index_t non_reused_lane; + ucp_wireup_ep_t *new_wireup_ep, *old_ep_wrapper; + uct_ep_h old_wireup_ep; ucp_rsc_index_t aux_rsc_index; - int is_p2p; + int is_p2p, is_wireup_ep; ucs_status_t status; - /* Get old wireup lane */ - old_lane = ucp_wireup_get_msg_lane(ep, UCP_WIREUP_MSG_REQUEST); - old_wireup_ep = ucp_wireup_ep(ucp_ep_get_lane(ep, old_lane)); - ucs_assert_always(old_wireup_ep != NULL); + old_wireup_lane = ucp_wireup_get_msg_lane(ep, UCP_WIREUP_MSG_REQUEST); + old_lane = am_replaced ? ucp_ep_get_am_lane(ep) : old_wireup_lane; + old_wireup_ep = ucp_ep_get_lane(ep, old_lane); + old_ep_wrapper = ucp_wireup_ep(old_wireup_ep); + is_wireup_ep = ucp_wireup_ep_test(old_wireup_ep); /* If wireup lane is required for new configuration, select it according * to the following priority: * 1) If CM lane exists, use it. - * 2) If any reused wireup_ep lane exists in old configuration, use it. - * 3) Otherwise select a non-reused lane (which must exist), and wrap - * it with a new wireup_ep wrapper. */ - + * 2) If old AM lane is replaced, use new AM lane. + * 3) If any non-reused lane exists, select it and wrap with a new + * wireup_ep wrapper. + * 4) Otherwise select a reused wireup_ep lane (which must exist). */ if (ucp_ep_has_cm_lane(ep)) { new_wireup_ep = ucp_ep_get_cm_wireup_ep(ep); new_wireup_lane = key->cm_lane; } else { - new_wireup_lane = ucp_wireup_find_reused_wireup_ep_lane(ep, - reuse_lane_map); - if (new_wireup_lane != UCP_NULL_LANE) { - uct_ep = ucp_ep_get_lane(ep, new_wireup_lane); - } else { - new_wireup_lane = ucp_wireup_find_non_reused_lane(ep, key, - reuse_lane_map); - ucs_assert(new_wireup_lane != UCP_NULL_LANE); + non_reused_lane = ucp_wireup_find_non_reused_lane(ep, key, + reuse_lane_map); + new_wireup_lane = am_replaced ? key->am_lane : non_reused_lane; + if (new_wireup_lane != UCP_NULL_LANE) { status = ucp_wireup_ep_create(ep, &uct_ep); if (status != UCS_OK) { return status; } + } else { + new_wireup_lane = + ucp_wireup_find_reused_wireup_ep_lane(ep, reuse_lane_map); + ucs_assert(new_wireup_lane != UCP_NULL_LANE); + uct_ep = ucp_ep_get_lane(ep, new_wireup_lane); } new_wireup_ep = ucp_wireup_ep(uct_ep); } @@ -1456,10 +1516,10 @@ ucp_wireup_replace_wireup_msg_lane(ucp_ep_h ep, ucp_ep_config_key_t *key, ucs_assert(new_wireup_ep != NULL); /* Get correct aux_rsc_index either from next_ep or aux_ep */ - aux_rsc_index = ucp_wireup_ep_is_next_ep_active(old_wireup_ep) ? + aux_rsc_index = (!is_wireup_ep || + ucp_wireup_ep_is_next_ep_active(old_ep_wrapper)) ? ucp_ep_get_rsc_index(ep, old_lane) : - ucp_wireup_ep_get_aux_rsc_index( - &old_wireup_ep->super.super); + ucp_wireup_ep_get_aux_rsc_index(old_wireup_ep); ucs_assert(aux_rsc_index != UCP_NULL_RESOURCE); is_p2p = ucp_ep_config_connect_p2p(ep->worker, &ucp_ep_config(ep)->key, @@ -1467,17 +1527,17 @@ ucp_wireup_replace_wireup_msg_lane(ucp_ep_h ep, ucp_ep_config_key_t *key, /* Move aux EP to new wireup lane */ ucp_wireup_ep_set_aux(new_wireup_ep, - ucp_wireup_ep_extract_msg_ep(old_wireup_ep), + is_wireup_ep ? + ucp_wireup_ep_extract_msg_ep(old_ep_wrapper) : + old_wireup_ep, aux_rsc_index, is_p2p); - /* Remove old wireup_ep as it's not needed anymore. - * NOTICE: Next two lines are intentionally not merged with the lane - * removal loop in ucp_wireup_check_config_intersect, because of future - * support for non-wireup EPs reconfiguration (which will modify this - * code). */ - uct_ep_destroy(&old_wireup_ep->super.super); - ucp_ep_set_lane(ep, old_lane, NULL); + if (is_wireup_ep) { + /* Remove old wireup_ep as it's not needed anymore. */ + uct_ep_destroy(old_wireup_ep); + } + ucp_ep_set_lane(ep, old_lane, NULL); new_uct_eps[new_wireup_lane] = &new_wireup_ep->super.super; key->wireup_msg_lane = new_wireup_lane; return UCS_OK; @@ -1488,7 +1548,8 @@ ucp_wireup_check_config_intersect(ucp_ep_h ep, ucp_ep_config_key_t *new_key, const ucp_unpacked_address_t *remote_address, const unsigned *addr_indices, ucp_lane_map_t *connect_lane_bitmap, - ucs_queue_head_t *replay_pending_queue) + ucs_queue_head_t *replay_pending_queue, + int *is_am_replaced_p) { uct_ep_h new_uct_eps[UCP_MAX_LANES] = {NULL}; ucp_lane_index_t reuse_lane_map[UCP_MAX_LANES] = {UCP_NULL_LANE}; @@ -1496,8 +1557,10 @@ ucp_wireup_check_config_intersect(ucp_ep_h ep, ucp_ep_config_key_t *new_key, ucp_lane_index_t lane, reuse_lane, wireup_lane; uct_ep_h uct_ep; ucs_status_t status; + int is_am_replaced; *connect_lane_bitmap = UCS_MASK(new_key->num_lanes); + *is_am_replaced_p = 0; if ((ep->cfg_index == UCP_WORKER_CFG_INDEX_NULL) || !ucp_wireup_check_is_reconfigurable(ep, new_key, remote_address, @@ -1516,6 +1579,11 @@ ucp_wireup_check_config_intersect(ucp_ep_h ep, ucp_ep_config_key_t *new_key, ucp_ep_config_lanes_intersect(old_key, new_key, ep, remote_address, addr_indices, reuse_lane_map); + if (!ucp_wireup_should_reconfigure(ep, reuse_lane_map, + new_key->num_lanes)) { + return UCS_OK; + } + if (ucp_ep_has_cm_lane(ep)) { /* CM lane has to be reused by the new EP configuration */ ucs_assert(reuse_lane_map[ucp_ep_get_cm_lane(ep)] != UCP_NULL_LANE); @@ -1525,7 +1593,10 @@ ucp_wireup_check_config_intersect(ucp_ep_h ep, ucp_ep_config_key_t *new_key, "new_key->wireup_msg_lane=%u", new_key->wireup_msg_lane); } - wireup_lane = ucp_wireup_get_msg_lane(ep, UCP_WIREUP_MSG_REQUEST); + is_am_replaced = ucp_wireup_is_am_lane_replaced(ep, reuse_lane_map); + wireup_lane = is_am_replaced ? + ucp_ep_get_am_lane(ep) : + ucp_wireup_get_msg_lane(ep, UCP_WIREUP_MSG_REQUEST); /* wireup lane has to be selected for the old configuration */ ucs_assert(wireup_lane != UCP_NULL_LANE); @@ -1586,6 +1657,7 @@ ucp_wireup_check_config_intersect(ucp_ep_h ep, ucp_ep_config_key_t *new_key, ucp_ep_set_lane(ep, lane, new_uct_eps[lane]); } + *is_am_replaced_p = is_am_replaced; return UCS_OK; } @@ -1622,9 +1694,18 @@ ucp_wireup_gather_pending_reqs(ucp_ep_h ep, ucs_queue_head_t *replay_pending_queue) { ucp_request_t *req; + ucp_lane_index_t lane; + + ucs_queue_head_init(replay_pending_queue); + + if (ep->cfg_index == UCP_WORKER_CFG_INDEX_NULL) { + return; + } + /* Handle wireup EPs */ ucp_wireup_eps_pending_extract(ep, replay_pending_queue); + /* rkey ptr requests */ ucs_queue_for_each(req, &ep->worker->rkey_ptr_reqs, send.rndv.rkey_ptr.queue_elem) { if (req->send.ep == ep) { @@ -1632,12 +1713,24 @@ ucp_wireup_gather_pending_reqs(ucp_ep_h ep, (ucs_queue_elem_t*)&req->send.uct.priv); } } + + /* Fully connected lanes */ + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + if (ucp_wireup_ep_test(ucp_ep_get_lane(ep, lane))) { + continue; + } + + uct_ep_pending_purge(ucp_ep_get_lane(ep, lane), + ucp_request_purge_enqueue_cb, + replay_pending_queue); + } } ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned ep_init_flags, const ucp_tl_bitmap_t *local_tl_bitmap, const ucp_unpacked_address_t *remote_address, - unsigned *addr_indices) + unsigned *addr_indices, + int *is_am_replaced_p) { ucp_worker_h worker = ep->worker; ucp_rsc_index_t cm_idx = UCP_NULL_RESOURCE; @@ -1710,7 +1803,8 @@ ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned ep_init_flags, status = ucp_wireup_check_config_intersect(ep, &key, remote_address, addr_indices, &connect_lane_bitmap, - &replay_pending_queue); + &replay_pending_queue, + is_am_replaced_p); if (status != UCS_OK) { goto out; } diff --git a/src/ucp/wireup/wireup.h b/src/ucp/wireup/wireup.h index 274f3c0c372..9445cc60903 100644 --- a/src/ucp/wireup/wireup.h +++ b/src/ucp/wireup/wireup.h @@ -180,7 +180,8 @@ int ucp_wireup_is_reachable(ucp_ep_h ep, unsigned ep_init_flags, ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned ep_init_flags, const ucp_tl_bitmap_t *local_tl_bitmap, const ucp_unpacked_address_t *remote_address, - unsigned *addr_indices); + unsigned *addr_indices, + int *is_am_replaced_p); ucs_status_t ucp_wireup_select_lanes(ucp_ep_h ep, unsigned ep_init_flags, diff --git a/src/ucp/wireup/wireup_cm.c b/src/ucp/wireup/wireup_cm.c index 53ab08f2591..cd399ffb65b 100644 --- a/src/ucp/wireup/wireup_cm.c +++ b/src/ucp/wireup/wireup_cm.c @@ -651,6 +651,7 @@ static unsigned ucp_cm_client_connect_progress(void *arg) unsigned addr_indices[UCP_MAX_RESOURCES]; ucs_status_t status; uint8_t sa_data_ver; + int is_am_replaced; UCS_ASYNC_BLOCK(&worker->async); @@ -695,8 +696,8 @@ static unsigned ucp_cm_client_connect_progress(void *arg) dev_index = ucp_cm_tl_bitmap_get_dev_idx(worker->context, &tl_bitmap); ucp_context_dev_idx_tl_bitmap(context, dev_index, &tl_bitmap); - status = ucp_wireup_init_lanes(ucp_ep, wireup_ep->ep_init_flags, - &tl_bitmap, &addr, addr_indices); + status = ucp_wireup_init_lanes(ucp_ep, wireup_ep->ep_init_flags, &tl_bitmap, + &addr, addr_indices, &is_am_replaced); if (status != UCS_OK) { ucs_debug("ep %p: failed to initialize lanes: %s", ucp_ep, ucs_status_string(status)); diff --git a/test/gtest/ucp/test_ucp_ep_reconfig.cc b/test/gtest/ucp/test_ucp_ep_reconfig.cc index 9752dbe3d3b..fdcef8ace16 100644 --- a/test/gtest/ucp/test_ucp_ep_reconfig.cc +++ b/test/gtest/ucp/test_ucp_ep_reconfig.cc @@ -57,6 +57,7 @@ class test_ucp_ep_reconfig : public ucp_test { bool is_lane_connected(ucp_ep_h ep, ucp_lane_index_t lane_idx, const entity &other) const; ucp_tl_bitmap_t reduced_tl_bitmap() const; + unsigned num_shm_rscs() const; ucp_worker_cfg_index_t m_cfg_index = UCP_WORKER_CFG_INDEX_NULL; unsigned m_num_reused_rscs = 0; @@ -74,12 +75,6 @@ class test_ucp_ep_reconfig : public ucp_test { return GetParam().transports.size() == 1; } - bool has_p2p_transport() - { - return has_resource(sender(), "rc_verbs") || - has_resource(sender(), "rc_mlx5"); - } - void create_entity(bool push_front, bool exclude_ifaces) { auto e = new entity(GetParam(), m_ucp_config, get_worker_params(), this, @@ -101,7 +96,7 @@ class test_ucp_ep_reconfig : public ucp_test { } public: - void init() + virtual void init() { ucp_test::init(); @@ -110,7 +105,11 @@ class test_ucp_ep_reconfig : public ucp_test { UCS_TEST_SKIP_R("test requires at least 2 ifaces to work"); } - ensure_reused_lanes_reconfigurable(); + if (has_transport("tcp")) { + UCS_TEST_SKIP_R("TODO: fix lane matching functionality in case " + "there's matching remote MDs and different " + "sys_devs"); + } } static void get_test_variants(std::vector &variants) @@ -120,8 +119,7 @@ class test_ucp_ep_reconfig : public ucp_test { } void run(bool bidirectional = false); - void skip_non_p2p(); - void ensure_reused_lanes_reconfigurable(); + virtual ucp_tl_bitmap_t tl_bitmap(); bool reuse_lanes() const { @@ -151,9 +149,9 @@ class test_ucp_ep_reconfig : public ucp_test { void send_recv(bool bidirectional) { -/* TODO: remove this when 100MB asan bug is solved */ +/* TODO: remove this when large messages asan bug is solved (size > ~70MB) */ #ifdef __SANITIZE_ADDRESS__ - static const size_t msg_sizes[] = {8, 1024, 16384, 65536}; + static const size_t msg_sizes[] = {8, 1024, 16384, 32768}; #else static const size_t msg_sizes[] = {8, 1024, 16384, UCS_MBYTE}; #endif @@ -200,10 +198,27 @@ void test_ucp_ep_reconfig::entity::store_config() /* Calculate number of reused resources by: * 1) Count number of resources used in EP configuration. - * 2) Take half of total resources to be reused. */ + * 2) Take half of total resources to be reused. + * 3) For asymmetric mode, only SHM resources are reused. */ auto num_reused = UCS_STATIC_BITMAP_POPCOUNT(ep_tl_bitmap()) / 2; auto test = static_cast(m_test); - m_num_reused_rscs = test->reuse_lanes() ? num_reused : 0; + m_num_reused_rscs = m_exclude_ifaces ? + (test->reuse_lanes() ? num_reused : 0) : + num_shm_rscs(); +} + +unsigned test_ucp_ep_reconfig::entity::num_shm_rscs() const +{ + unsigned num_shm = 0; + auto tl_bitmap = ep_tl_bitmap(); + ucp_rsc_index_t rsc_idx; + + UCS_STATIC_BITMAP_FOR_EACH_BIT(rsc_idx, &tl_bitmap) { + num_shm += (ucph()->tl_rscs[rsc_idx].tl_rsc.dev_type == + UCT_DEVICE_TYPE_SHM); + } + + return num_shm; } ucp_tl_bitmap_t @@ -231,7 +246,8 @@ test_ucp_ep_reconfig::entity::ep_tl_bitmap(unsigned max_num_rscs) const ucp_tl_bitmap_t test_ucp_ep_reconfig::entity::reduced_tl_bitmap() const { if ((ep() == NULL) || !m_exclude_ifaces) { - return ucp_tl_bitmap_max; + /* Take bitmap from test */ + return ((test_ucp_ep_reconfig*)m_test)->tl_bitmap(); } /* Use only resources not already in use, or part of reuse bitmap */ @@ -367,6 +383,27 @@ void test_ucp_ep_reconfig::pattern_check(const mem_buffer_vec_t &rbufs) const } } +ucp_tl_bitmap_t test_ucp_ep_reconfig::tl_bitmap() +{ + if (!is_single_transport()) { + return ucp_tl_bitmap_max; + } + + /* For single transport, half of the resources should be reserved for + * receiver side to use */ + ucp_tl_bitmap_t tl_bitmap = UCS_STATIC_BITMAP_ZERO_INITIALIZER; + size_t num_tls = 0; + ucp_rsc_index_t rsc_idx; + + UCS_STATIC_BITMAP_FOR_EACH_BIT(rsc_idx, &sender().ucph()->tl_bitmap) { + if (++num_tls > (sender().ucph()->num_tls / 2)) { + UCS_STATIC_BITMAP_SET(&tl_bitmap, rsc_idx); + } + } + + return tl_bitmap; +} + void test_ucp_ep_reconfig::run(bool bidirectional) { create_entities_and_connect(); @@ -381,61 +418,32 @@ void test_ucp_ep_reconfig::run(bool bidirectional) r_receiver->verify_configuration(*r_sender, r_sender->num_reused_rscs()); } -void test_ucp_ep_reconfig::skip_non_p2p() +UCS_TEST_P(test_ucp_ep_reconfig, basic) { - if (!has_p2p_transport()) { - UCS_TEST_SKIP_R("No p2p TLs available, config will be non-wireup"); - } -} - -void test_ucp_ep_reconfig::ensure_reused_lanes_reconfigurable() -{ - if (!reuse_lanes()) { - return; - } - - if (has_transport("tcp") || has_transport("dc_x") || has_transport("shm")) { - UCS_TEST_SKIP_R("non wired-up lanes are not supported yet"); + if (has_transport("shm")) { + UCS_TEST_SKIP_R("TODO: add support for reconfiguration of separate " + "wireup and AM lanes"); } -} -/* TODO: Remove skip condition after next PRs are merged. */ -UCS_TEST_SKIP_COND_P(test_ucp_ep_reconfig, basic, !has_transport("rc")) -{ run(); } UCS_TEST_P(test_ucp_ep_reconfig, request_reset, "PROTO_REQUEST_RESET=y") { - if (is_single_transport()) { - /* One side will consume all ifaces and the other side will have no ifaces left to use */ - UCS_TEST_SKIP_R("exclude_iface requires at least 2 transports to work " - "(for example DC + SHM)"); - } - - skip_non_p2p(); run(); } -UCS_TEST_SKIP_COND_P(test_ucp_ep_reconfig, resolve_remote_id, is_self(), - "MAX_RNDV_LANES=0") +UCS_TEST_P(test_ucp_ep_reconfig, resolve_remote_id) { - if (has_transport("tcp")) { - UCS_TEST_SKIP_R("asymmetric setup is not supported for this transport " - "due to a reachability issue - only matching " - "interfaces can connect"); - } - - if (has_transport("shm")) { - UCS_TEST_SKIP_R("AM messages might be sent before reconfiguration" - "(would be supported in next PR)"); + if (has_transport("dc_x")) { + /* Avoid creating odd number of lanes due to AM lane separation */ + modify_config("MAX_RNDV_LANES", "0"); } run(true); } UCP_INSTANTIATE_TEST_CASE(test_ucp_ep_reconfig); -UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_ep_reconfig, rc_x_v, "rc"); class test_reconfig_asymmetric : public test_ucp_ep_reconfig { protected: @@ -449,25 +457,43 @@ class test_reconfig_asymmetric : public test_ucp_ep_reconfig { sender().connect(&receiver(), get_ep_params()); receiver().connect(&sender(), get_ep_params()); } + + ucp_tl_bitmap_t tl_bitmap() override + { + return ucp_tl_bitmap_max; + } + + void init() override + { + static const std::vector ib_tls = {"rc_mlx5", "dc_mlx5", + "rc_verbs", "ud_verbs", + "ud_mlx5"}; + + test_ucp_ep_reconfig::init(); + bool has_ib = std::any_of(ib_tls.begin(), ib_tls.end(), + [&](const std::string &tl_name) { + return has_resource(sender(), tl_name); + }); + + if (!has_ib) { + /* In case there's no IB devices, new config will be identical to + * old config (thus no reconfiguration will be triggered). */ + UCS_TEST_SKIP_R("No IB devices found"); + } + } }; -/* Will be relevant when reuse + non-wireup is supported */ -UCS_TEST_SKIP_COND_P(test_reconfig_asymmetric, basic, has_transport("shm")) +UCS_TEST_P(test_reconfig_asymmetric, basic) { run(); } -/* Will be relevant when reuse + non-wireup is supported */ -UCS_TEST_SKIP_COND_P(test_reconfig_asymmetric, request_reset, - has_transport("shm"), "PROTO_REQUEST_RESET=y") +UCS_TEST_P(test_reconfig_asymmetric, request_reset, "PROTO_REQUEST_RESET=y") { - skip_non_p2p(); run(); } -/* SHM + single lane won't trigger reconfig. */ -UCS_TEST_SKIP_COND_P(test_reconfig_asymmetric, resolve_remote_id, - has_transport("shm") || is_self(), "MAX_RNDV_LANES=0") +UCS_TEST_P(test_reconfig_asymmetric, resolve_remote_id) { run(true); }