From 05a18e84959ea90b6c022520a481eaa088188c2f Mon Sep 17 00:00:00 2001 From: Shachar Hasson Date: Thu, 12 Dec 2024 21:38:32 +0200 Subject: [PATCH] UCP/WIREUP: Support EP reconfiguration for non wired-up scenarios --- src/ucp/core/ucp_ep.c | 13 ++- src/ucp/core/ucp_ep.h | 8 ++ src/ucp/wireup/wireup.c | 109 ++++++++++++++++++---- test/gtest/ucp/test_ucp_ep_reconfig.cc | 124 ++++++++++++++----------- 4 files changed, 172 insertions(+), 82 deletions(-) diff --git a/src/ucp/core/ucp_ep.c b/src/ucp/core/ucp_ep.c index 26e90aa14d1..93c09d9843f 100644 --- a/src/ucp/core/ucp_ep.c +++ b/src/ucp/core/ucp_ep.c @@ -1859,10 +1859,9 @@ int ucp_ep_config_lane_is_peer_match(const ucp_ep_config_key_t *key1, config_lane2->dst_md_index); } -static ucp_lane_index_t -ucp_ep_config_find_match_lane(const ucp_ep_config_key_t *key1, - ucp_lane_index_t lane1, - const ucp_ep_config_key_t *key2) +ucp_lane_index_t ucp_ep_config_find_match_lane(const ucp_ep_config_key_t *key1, + ucp_lane_index_t lane1, + const ucp_ep_config_key_t *key2) { ucp_lane_index_t lane_idx; @@ -1936,9 +1935,9 @@ void ucp_ep_config_lanes_intersect(const ucp_ep_config_key_t *key1, } } -static int ucp_ep_config_lane_is_equal(const ucp_ep_config_key_t *key1, - const ucp_ep_config_key_t *key2, - ucp_lane_index_t lane) +int ucp_ep_config_lane_is_equal(const ucp_ep_config_key_t *key1, + const ucp_ep_config_key_t *key2, + ucp_lane_index_t lane) { const ucp_ep_config_key_lane_t *config_lane1 = &key1->lanes[lane]; const ucp_ep_config_key_lane_t *config_lane2 = &key2->lanes[lane]; diff --git a/src/ucp/core/ucp_ep.h b/src/ucp/core/ucp_ep.h index 50ff07e6cf9..41190ebd211 100644 --- a/src/ucp/core/ucp_ep.h +++ b/src/ucp/core/ucp_ep.h @@ -749,6 +749,10 @@ int ucp_ep_config_lane_is_peer_match(const ucp_ep_config_key_t *key1, const ucp_ep_config_key_t *key2, ucp_lane_index_t lane2); +ucp_lane_index_t ucp_ep_config_find_match_lane(const ucp_ep_config_key_t *key1, + ucp_lane_index_t lane1, + const ucp_ep_config_key_t *key2); + void ucp_ep_config_lanes_intersect(const ucp_ep_config_key_t *key1, const ucp_ep_config_key_t *key2, const ucp_ep_h ep, @@ -756,6 +760,10 @@ void ucp_ep_config_lanes_intersect(const ucp_ep_config_key_t *key1, const unsigned *addr_indices, ucp_lane_index_t *lane_map); +int ucp_ep_config_lane_is_equal(const ucp_ep_config_key_t *key1, + const ucp_ep_config_key_t *key2, + ucp_lane_index_t lane); + int ucp_ep_config_is_equal(const ucp_ep_config_key_t *key1, const ucp_ep_config_key_t *key2); diff --git a/src/ucp/wireup/wireup.c b/src/ucp/wireup/wireup.c index 3fa73424c68..c6cf6180dee 100644 --- a/src/ucp/wireup/wireup.c +++ b/src/ucp/wireup/wireup.c @@ -1351,28 +1351,70 @@ static void ucp_wireup_discard_uct_eps(ucp_ep_h ep, uct_ep_h *uct_eps, } } +static int +ucp_wireup_is_am_lane_replaced(ucp_ep_h ep, + const ucp_lane_index_t *reuse_lane_map) +{ + return !ucp_ep_has_cm_lane(ep) && (ep->am_lane != UCP_NULL_LANE) && + (reuse_lane_map[ep->am_lane] == UCP_NULL_LANE); +} + static int ucp_wireup_check_is_reconfigurable(ucp_ep_h ep, const ucp_ep_config_key_t *new_key, const ucp_unpacked_address_t *remote_address, const unsigned *addr_indices) { - ucp_lane_index_t lane; + ucp_lane_index_t lane, wireup_lane, reuse_lane_map[UCP_MAX_LANES]; + const ucp_ep_config_key_t *old_key; if ((ep->cfg_index == UCP_WORKER_CFG_INDEX_NULL) || ucp_ep_has_cm_lane(ep)) { return 1; } - /* TODO: Support reconfiguration when lanes are created without a wireup_ep - * wrapper */ - for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { - if (!ucp_wireup_ep_test(ucp_ep_get_lane(ep, lane))) { + old_key = &ucp_ep_config(ep)->key; + + /* TODO: 1) Support lanes which are connected to the same remote MD, but + * different remote sys_dev (eg. TCP). */ + for (lane = 0; lane < old_key->num_lanes; ++lane) { + if ((ucp_ep_config_find_match_lane(old_key, lane, new_key) != + UCP_NULL_LANE) && + !ucp_ep_config_lane_is_equal(old_key, new_key, lane)) { return 0; } } - return 1; + ucp_ep_config_lanes_intersect(old_key, new_key, ep, remote_address, + addr_indices, reuse_lane_map); + wireup_lane = ucp_wireup_get_msg_lane(ep, UCP_WIREUP_MSG_REQUEST); + + /* TODO: 2) Support reconfiguration for separated wireup and AM lanes + * during wireup process (request sent). */ + return !(ep->flags & UCP_EP_FLAG_CONNECT_REQ_QUEUED) || + (ep->am_lane == wireup_lane) || + ucp_wireup_ep_test(ucp_ep_get_lane(ep, ep->am_lane)) || + !ucp_wireup_is_am_lane_replaced(ep, reuse_lane_map); +} + +static int ucp_wireup_should_reconfigure(ucp_ep_h ep, + const ucp_lane_index_t *reuse_lane_map, + ucp_lane_index_t num_lanes) +{ + ucp_lane_index_t lane; + + if (ucp_ep_has_cm_lane(ep)) { + return 1; + } + + /* Check whether all lanes are reused */ + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + if (reuse_lane_map[lane] == UCP_NULL_LANE) { + return 1; + } + } + + return ucp_ep_num_lanes(ep) != num_lanes; } static ucp_lane_index_t @@ -1415,15 +1457,16 @@ ucp_wireup_replace_wireup_msg_lane(ucp_ep_h ep, ucp_ep_config_key_t *key, { uct_ep_h uct_ep = NULL; ucp_lane_index_t old_lane, new_wireup_lane; - ucp_wireup_ep_t *old_wireup_ep, *new_wireup_ep; + ucp_wireup_ep_t *new_wireup_ep; + uct_ep_h old_wireup_ep; ucp_rsc_index_t aux_rsc_index; - int is_p2p; + int is_p2p, is_wireup_ep; ucs_status_t status; /* Get old wireup lane */ old_lane = ucp_wireup_get_msg_lane(ep, UCP_WIREUP_MSG_REQUEST); - old_wireup_ep = ucp_wireup_ep(ucp_ep_get_lane(ep, old_lane)); - ucs_assert_always(old_wireup_ep != NULL); + old_wireup_ep = ucp_ep_get_lane(ep, old_lane); + is_wireup_ep = ucp_wireup_ep_test(old_wireup_ep); /* If wireup lane is required for new configuration, select it according * to the following priority: @@ -1456,10 +1499,10 @@ ucp_wireup_replace_wireup_msg_lane(ucp_ep_h ep, ucp_ep_config_key_t *key, ucs_assert(new_wireup_ep != NULL); /* Get correct aux_rsc_index either from next_ep or aux_ep */ - aux_rsc_index = ucp_wireup_ep_is_next_ep_active(old_wireup_ep) ? + aux_rsc_index = (!is_wireup_ep || ucp_wireup_ep_is_next_ep_active( + ucp_wireup_ep(old_wireup_ep))) ? ucp_ep_get_rsc_index(ep, old_lane) : - ucp_wireup_ep_get_aux_rsc_index( - &old_wireup_ep->super.super); + ucp_wireup_ep_get_aux_rsc_index(old_wireup_ep); ucs_assert(aux_rsc_index != UCP_NULL_RESOURCE); is_p2p = ucp_ep_config_connect_p2p(ep->worker, &ucp_ep_config(ep)->key, @@ -1467,15 +1510,16 @@ ucp_wireup_replace_wireup_msg_lane(ucp_ep_h ep, ucp_ep_config_key_t *key, /* Move aux EP to new wireup lane */ ucp_wireup_ep_set_aux(new_wireup_ep, - ucp_wireup_ep_extract_msg_ep(old_wireup_ep), + is_wireup_ep ? ucp_wireup_ep_extract_msg_ep( + ucp_wireup_ep(old_wireup_ep)) : + old_wireup_ep, aux_rsc_index, is_p2p); - /* Remove old wireup_ep as it's not needed anymore. - * NOTICE: Next two lines are intentionally not merged with the lane - * removal loop in ucp_wireup_check_config_intersect, because of future - * support for non-wireup EPs reconfiguration (which will modify this - * code). */ - uct_ep_destroy(&old_wireup_ep->super.super); + if (is_wireup_ep) { + /* Remove old wireup_ep as it's not needed anymore. */ + uct_ep_destroy(old_wireup_ep); + } + ucp_ep_set_lane(ep, old_lane, NULL); new_uct_eps[new_wireup_lane] = &new_wireup_ep->super.super; @@ -1516,6 +1560,11 @@ ucp_wireup_check_config_intersect(ucp_ep_h ep, ucp_ep_config_key_t *new_key, ucp_ep_config_lanes_intersect(old_key, new_key, ep, remote_address, addr_indices, reuse_lane_map); + if (!ucp_wireup_should_reconfigure(ep, reuse_lane_map, + new_key->num_lanes)) { + return UCS_OK; + } + if (ucp_ep_has_cm_lane(ep)) { /* CM lane has to be reused by the new EP configuration */ ucs_assert(reuse_lane_map[ucp_ep_get_cm_lane(ep)] != UCP_NULL_LANE); @@ -1622,9 +1671,18 @@ ucp_wireup_gather_pending_reqs(ucp_ep_h ep, ucs_queue_head_t *replay_pending_queue) { ucp_request_t *req; + ucp_lane_index_t lane; + + ucs_queue_head_init(replay_pending_queue); + if (ep->cfg_index == UCP_WORKER_CFG_INDEX_NULL) { + return; + } + + /* Handle wireup EPs */ ucp_wireup_eps_pending_extract(ep, replay_pending_queue); + /* rkey ptr requests */ ucs_queue_for_each(req, &ep->worker->rkey_ptr_reqs, send.rndv.rkey_ptr.queue_elem) { if (req->send.ep == ep) { @@ -1632,6 +1690,17 @@ ucp_wireup_gather_pending_reqs(ucp_ep_h ep, (ucs_queue_elem_t*)&req->send.uct.priv); } } + + /* Fully connected lanes */ + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + if (ucp_wireup_ep_test(ucp_ep_get_lane(ep, lane))) { + continue; + } + + uct_ep_pending_purge(ucp_ep_get_lane(ep, lane), + ucp_request_purge_enqueue_cb, + replay_pending_queue); + } } ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned ep_init_flags, diff --git a/test/gtest/ucp/test_ucp_ep_reconfig.cc b/test/gtest/ucp/test_ucp_ep_reconfig.cc index 2969d1c6c79..2e67ff9661b 100644 --- a/test/gtest/ucp/test_ucp_ep_reconfig.cc +++ b/test/gtest/ucp/test_ucp_ep_reconfig.cc @@ -55,6 +55,8 @@ class test_ucp_ep_reconfig : public ucp_test { bool is_lane_connected(ucp_ep_h ep, ucp_lane_index_t lane_idx, const entity &other) const; ucp_tl_bitmap_t reduced_tl_bitmap() const; + ucp_tl_bitmap_t initial_tl_bitmap() const; + unsigned num_shm_lanes() const; ucp_worker_cfg_index_t m_cfg_index = UCP_WORKER_CFG_INDEX_NULL; unsigned m_num_reused_rscs = 0; @@ -62,17 +64,11 @@ class test_ucp_ep_reconfig : public ucp_test { bool m_exclude_ifaces; }; - bool is_single_transport() + bool is_single_transport() const { return GetParam().transports.size() == 1; } - bool has_p2p_transport() - { - return has_resource(sender(), "rc_verbs") || - has_resource(sender(), "rc_mlx5"); - } - void create_entity(bool push_front, bool exclude_ifaces) { auto e = new entity(GetParam(), m_ucp_config, get_worker_params(), this, @@ -103,7 +99,11 @@ class test_ucp_ep_reconfig : public ucp_test { UCS_TEST_SKIP_R("test requires at least 2 ifaces to work"); } - ensure_reused_lanes_reconfigurable(); + if (has_transport("tcp")) { + UCS_TEST_SKIP_R("TODO: fix lane matching functionality in case " + "there's matching remote MDs and different " + "sys_devs"); + } } static void get_test_variants(std::vector &variants) @@ -113,8 +113,6 @@ class test_ucp_ep_reconfig : public ucp_test { } void run(bool bidirectional = false); - void skip_non_p2p(); - void ensure_reused_lanes_reconfigurable(); bool reuse_lanes() const { @@ -196,9 +194,27 @@ void test_ucp_ep_reconfig::entity::store_config() /* Calculate number of reused resources by: * 1) Count number of resources used in EP configuration. * 2) Take half of total resources to be reused. */ - auto num_reused = UCS_STATIC_BITMAP_POPCOUNT(ep_tl_bitmap()) / 2; - auto test = static_cast(m_test); - m_num_reused_rscs = test->reuse_lanes() ? num_reused : 0; + auto num_reused = UCS_STATIC_BITMAP_POPCOUNT(ep_tl_bitmap()) / 2; + auto test = static_cast(m_test); + + if (!m_exclude_ifaces) { + m_num_reused_rscs = num_shm_lanes(); + } else { + m_num_reused_rscs = test->reuse_lanes() ? num_reused : 0; + } +} + +unsigned test_ucp_ep_reconfig::entity::num_shm_lanes() const +{ + unsigned num_shm = 0; + + for (auto lane = 0; lane < ucp_ep_num_lanes(ep()); ++lane) { + auto rsc_idx = ucp_ep_get_rsc_index(ep(), lane); + num_shm += (ucph()->tl_rscs[rsc_idx].tl_rsc.dev_type == + UCT_DEVICE_TYPE_SHM); + } + + return num_shm; } ucp_tl_bitmap_t @@ -223,12 +239,40 @@ test_ucp_ep_reconfig::entity::ep_tl_bitmap(unsigned max_num_rscs) const return tl_bitmap; } +ucp_tl_bitmap_t test_ucp_ep_reconfig::entity::initial_tl_bitmap() const +{ + auto test = static_cast(m_test); + + if (!test->is_single_transport()) { + return ucp_tl_bitmap_max; + } + + /* For single transport, half of the resources should be reserved for + * receiver side to use */ + ucp_tl_bitmap_t tl_bitmap = UCS_STATIC_BITMAP_ZERO_INITIALIZER; + size_t num_tls = 0; + ucp_rsc_index_t rsc_idx; + + UCS_STATIC_BITMAP_FOR_EACH_BIT(rsc_idx, &ucph()->tl_bitmap) { + if (++num_tls > (ucph()->num_tls / 2)) { + UCS_STATIC_BITMAP_SET(&tl_bitmap, rsc_idx); + } + } + + return tl_bitmap; +} + ucp_tl_bitmap_t test_ucp_ep_reconfig::entity::reduced_tl_bitmap() const { - if ((ep() == NULL) || !m_exclude_ifaces) { + if (!m_exclude_ifaces) { return ucp_tl_bitmap_max; } + if (ep() == NULL) { + /* Handle first entity's bitmap */ + return initial_tl_bitmap(); + } + /* Use only resources not already in use, or part of reuse bitmap */ auto reused_bitmap = ep_tl_bitmap(num_reused_rscs()); return UCS_STATIC_BITMAP_OR(UCS_STATIC_BITMAP_NOT(ep_tl_bitmap()), @@ -355,56 +399,32 @@ void test_ucp_ep_reconfig::run(bool bidirectional) r_receiver->verify_configuration(*r_sender, r_sender->num_reused_rscs()); } -void test_ucp_ep_reconfig::skip_non_p2p() +UCS_TEST_P(test_ucp_ep_reconfig, basic) { - if (!has_p2p_transport()) { - UCS_TEST_SKIP_R("No p2p TLs available, config will be non-wireup"); + if (has_transport("shm")) { + UCS_TEST_SKIP_R("TODO: add support for reconfiguration of separate " + "wireup and AM lanes"); } -} -void test_ucp_ep_reconfig::ensure_reused_lanes_reconfigurable() -{ - if (!reuse_lanes()) { - return; - } - - if (has_transport("tcp") || has_transport("dc_x") || has_transport("shm")) { - UCS_TEST_SKIP_R("non wired-up lanes are not supported yet"); - } -} - -/* TODO: Remove skip condition after next PRs are merged. */ -UCS_TEST_SKIP_COND_P(test_ucp_ep_reconfig, basic, !has_transport("rc")) -{ run(); } UCS_TEST_P(test_ucp_ep_reconfig, request_reset, "PROTO_REQUEST_RESET=y") { - if (is_single_transport()) { - /* One side will consume all ifaces and the other side will have no ifaces left to use */ - UCS_TEST_SKIP_R("exclude_iface requires at least 2 transports to work " - "(for example DC + SHM)"); - } - - skip_non_p2p(); run(); } -UCS_TEST_SKIP_COND_P(test_ucp_ep_reconfig, resolve_remote_id, is_self(), - "MAX_RNDV_LANES=0") +UCS_TEST_P(test_ucp_ep_reconfig, resolve_remote_id) { - if (has_transport("tcp")) { - UCS_TEST_SKIP_R("asymmetric setup is not supported for this transport " - "due to a reachability issue - only matching " - "interfaces can connect"); + if (has_transport("dc_x")) { + /* Avoid creating odd number of lanes due to AM lane separation */ + modify_config("MAX_RNDV_LANES", "0"); } run(true); } UCP_INSTANTIATE_TEST_CASE(test_ucp_ep_reconfig); -UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_ep_reconfig, rc_x_v, "rc"); class test_reconfig_asymmetric : public test_ucp_ep_reconfig { protected: @@ -420,23 +440,17 @@ class test_reconfig_asymmetric : public test_ucp_ep_reconfig { } }; -/* Will be relevant when reuse + non-wireup is supported */ -UCS_TEST_SKIP_COND_P(test_reconfig_asymmetric, basic, has_transport("shm")) +UCS_TEST_P(test_reconfig_asymmetric, basic) { run(); } -/* Will be relevant when reuse + non-wireup is supported */ -UCS_TEST_SKIP_COND_P(test_reconfig_asymmetric, request_reset, - has_transport("shm"), "PROTO_REQUEST_RESET=y") +UCS_TEST_P(test_reconfig_asymmetric, request_reset, "PROTO_REQUEST_RESET=y") { - skip_non_p2p(); run(); } -/* SHM + single lane won't trigger reconfig. */ -UCS_TEST_SKIP_COND_P(test_reconfig_asymmetric, resolve_remote_id, - has_transport("shm") || is_self(), "MAX_RNDV_LANES=0") +UCS_TEST_P(test_reconfig_asymmetric, resolve_remote_id) { run(true); }