From 6ea25b086dc01bfa3857e39316f1bf64ab96401a Mon Sep 17 00:00:00 2001 From: Shi Jin Date: Thu, 14 Nov 2024 22:34:59 +0000 Subject: [PATCH] [v1.22.x] prov/efa: Fix the ep list scan in cq/cntr read We cannot only iterate eps and post initial batch of internal rx pkt once, as there can be more eps joining later after the cq read call. This patch fixes by introducing a bit in cq/ctnr that indicates whether a ep list scan is needed. This bit is set as true when a new ep is bind to the cq, and will be set as false every time when a scan is done. Signed-off-by: Shi Jin (cherry picked from commit c6085d101f5d60a694bac934da61f029f0fa02c5) --- prov/efa/src/efa_cntr.c | 6 +++--- prov/efa/src/efa_cntr.h | 2 +- prov/efa/src/rdm/efa_rdm_cq.c | 6 +++--- prov/efa/src/rdm/efa_rdm_cq.h | 2 +- prov/efa/src/rdm/efa_rdm_ep_fiops.c | 9 +++++++++ prov/efa/test/efa_unit_test_cntr.c | 6 ++++-- prov/efa/test/efa_unit_test_cq.c | 6 ++++-- 7 files changed, 25 insertions(+), 12 deletions(-) diff --git a/prov/efa/src/efa_cntr.c b/prov/efa/src/efa_cntr.c index 776a7da4530..a1a3444146b 100644 --- a/prov/efa/src/efa_cntr.c +++ b/prov/efa/src/efa_cntr.c @@ -161,13 +161,13 @@ static void efa_rdm_cntr_progress(struct util_cntr *cntr) * some idle endpoints and never poll completions for them. Move these initial posts to * the first polling before having a long term fix. */ - if (!efa_cntr->initial_rx_to_all_eps_posted) { + if (efa_cntr->need_to_scan_ep_list) { dlist_foreach(&cntr->ep_list, item) { fid_entry = container_of(item, struct fid_list_entry, entry); efa_rdm_ep = container_of(fid_entry->fid, struct efa_rdm_ep, base_ep.util_ep.ep_fid.fid); efa_rdm_ep_post_internal_rx_pkts(efa_rdm_ep); } - efa_cntr->initial_rx_to_all_eps_posted = true; + efa_cntr->need_to_scan_ep_list = false; } dlist_foreach(&efa_cntr->ibv_cq_poll_list, item) { @@ -193,7 +193,7 @@ int efa_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, return -FI_ENOMEM; dlist_init(&cntr->ibv_cq_poll_list); - cntr->initial_rx_to_all_eps_posted = false; + cntr->need_to_scan_ep_list = false; efa_domain = container_of(domain, struct efa_domain, util_domain.domain_fid); diff --git a/prov/efa/src/efa_cntr.h b/prov/efa/src/efa_cntr.h index 05227159d49..bcfde8784a2 100644 --- a/prov/efa/src/efa_cntr.h +++ b/prov/efa/src/efa_cntr.h @@ -13,7 +13,7 @@ struct efa_cntr { struct fid_cntr *shm_cntr; struct dlist_entry ibv_cq_poll_list; /* Only used by RDM EP type */ - bool initial_rx_to_all_eps_posted; + bool need_to_scan_ep_list; }; int efa_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, diff --git a/prov/efa/src/rdm/efa_rdm_cq.c b/prov/efa/src/rdm/efa_rdm_cq.c index ae48a37807b..d7380623cd9 100644 --- a/prov/efa/src/rdm/efa_rdm_cq.c +++ b/prov/efa/src/rdm/efa_rdm_cq.c @@ -631,13 +631,13 @@ static void efa_rdm_cq_progress(struct util_cq *cq) * some idle endpoints and never poll completions for them. Move these initial posts to * the first cq read call before having a long term fix. */ - if (!efa_rdm_cq->initial_rx_to_all_eps_posted) { + if (efa_rdm_cq->need_to_scan_ep_list) { dlist_foreach(&cq->ep_list, item) { fid_entry = container_of(item, struct fid_list_entry, entry); efa_rdm_ep = container_of(fid_entry->fid, struct efa_rdm_ep, base_ep.util_ep.ep_fid.fid); efa_rdm_ep_post_internal_rx_pkts(efa_rdm_ep); } - efa_rdm_cq->initial_rx_to_all_eps_posted = true; + efa_rdm_cq->need_to_scan_ep_list = false; } dlist_foreach(&efa_rdm_cq->ibv_cq_poll_list, item) { @@ -683,7 +683,7 @@ int efa_rdm_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, attr->size = MAX(efa_domain->rdm_cq_size, attr->size); dlist_init(&cq->ibv_cq_poll_list); - cq->initial_rx_to_all_eps_posted = false; + cq->need_to_scan_ep_list = false; ret = ofi_cq_init(&efa_prov, domain, attr, &cq->util_cq, &efa_rdm_cq_progress, context); diff --git a/prov/efa/src/rdm/efa_rdm_cq.h b/prov/efa/src/rdm/efa_rdm_cq.h index 5bb7b2b80c0..4e88a8b7f63 100644 --- a/prov/efa/src/rdm/efa_rdm_cq.h +++ b/prov/efa/src/rdm/efa_rdm_cq.h @@ -12,7 +12,7 @@ struct efa_rdm_cq { struct fid_cq *shm_cq; struct efa_ibv_cq ibv_cq; struct dlist_entry ibv_cq_poll_list; - bool initial_rx_to_all_eps_posted; + bool need_to_scan_ep_list; }; /* diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 282a384e325..5adcec05dff 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -1174,6 +1174,9 @@ int efa_rdm_ep_insert_cntr_ibv_cq_poll_list(struct efa_rdm_ep *ep) if (ret) return ret; } + ofi_genlock_lock(&efa_cntr->util_cntr.ep_list_lock); + efa_cntr->need_to_scan_ep_list = true; + ofi_genlock_unlock(&efa_cntr->util_cntr.ep_list_lock); } } @@ -1199,6 +1202,9 @@ int efa_rdm_ep_insert_cq_ibv_cq_poll_list(struct efa_rdm_ep *ep) if (ret) return ret; } + ofi_genlock_lock(&tx_cq->util_cq.ep_list_lock); + tx_cq->need_to_scan_ep_list = true; + ofi_genlock_unlock(&tx_cq->util_cq.ep_list_lock); } if (rx_cq) { @@ -1211,6 +1217,9 @@ int efa_rdm_ep_insert_cq_ibv_cq_poll_list(struct efa_rdm_ep *ep) if (ret) return ret; } + ofi_genlock_lock(&rx_cq->util_cq.ep_list_lock); + rx_cq->need_to_scan_ep_list = true; + ofi_genlock_unlock(&rx_cq->util_cq.ep_list_lock); } return FI_SUCCESS; diff --git a/prov/efa/test/efa_unit_test_cntr.c b/prov/efa/test/efa_unit_test_cntr.c index aeb44d51195..2aa2ea60927 100644 --- a/prov/efa/test/efa_unit_test_cntr.c +++ b/prov/efa/test/efa_unit_test_cntr.c @@ -121,7 +121,8 @@ void test_efa_cntr_post_initial_rx_pkts(struct efa_resource **state) efa_cntr = container_of(cntr, struct efa_cntr, util_cntr.cntr_fid); - assert_false(efa_cntr->initial_rx_to_all_eps_posted); + /* cntr read need to scan the ep list since a ep is bind */ + assert_true(efa_cntr->need_to_scan_ep_list); cnt = fi_cntr_read(cntr); /* No completion should be read */ @@ -132,7 +133,8 @@ void test_efa_cntr_post_initial_rx_pkts(struct efa_resource **state) assert_int_equal(efa_rdm_ep->efa_rx_pkts_to_post, 0); assert_int_equal(efa_rdm_ep->efa_rx_pkts_held, 0); - assert_true(efa_cntr->initial_rx_to_all_eps_posted); + /* scan is done */ + assert_false(efa_cntr->need_to_scan_ep_list); /* ep must be closed before cq/av/eq... */ fi_close(&resource->ep->fid); resource->ep = NULL; diff --git a/prov/efa/test/efa_unit_test_cq.c b/prov/efa/test/efa_unit_test_cq.c index a34f8e3f9bf..a130ad460c2 100644 --- a/prov/efa/test/efa_unit_test_cq.c +++ b/prov/efa/test/efa_unit_test_cq.c @@ -580,7 +580,8 @@ void test_efa_rdm_cq_post_initial_rx_pkts(struct efa_resource **state) assert_int_equal(efa_rdm_ep->efa_rx_pkts_posted, 0); assert_int_equal(efa_rdm_ep->efa_rx_pkts_held, 0); - assert_false(efa_rdm_cq->initial_rx_to_all_eps_posted); + /* cq read need to scan the ep list since a ep is bind */ + assert_true(efa_rdm_cq->need_to_scan_ep_list); fi_cq_read(resource->cq, NULL, 0); /* At this time, rx pool size number of rx pkts are posted */ @@ -588,7 +589,8 @@ void test_efa_rdm_cq_post_initial_rx_pkts(struct efa_resource **state) assert_int_equal(efa_rdm_ep->efa_rx_pkts_to_post, 0); assert_int_equal(efa_rdm_ep->efa_rx_pkts_held, 0); - assert_true(efa_rdm_cq->initial_rx_to_all_eps_posted); + /* scan is done */ + assert_false(efa_rdm_cq->need_to_scan_ep_list); } #if HAVE_EFADV_CQ_EX /**