Skip to content

Commit

Permalink
[v1.22.x] prov/efa: Fix the ep list scan in cq/cntr read
Browse files Browse the repository at this point in the history
We cannot only iterate eps and post initial batch of internal rx pkt once,
as there can be more eps joining later after the cq read call.
This patch fixes by introducing a bit in cq/ctnr that indicates whether
a ep list scan is needed. This bit is set as true when a new ep is
bind to the cq, and will be set as false every time when a scan is done.

Signed-off-by: Shi Jin <sjina@amazon.com>
(cherry picked from commit c6085d1)
  • Loading branch information
shijin-aws authored and d314159 committed Nov 18, 2024
1 parent 9643e09 commit 7623bc5
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 12 deletions.
6 changes: 3 additions & 3 deletions prov/efa/src/efa_cntr.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,13 @@ static void efa_rdm_cntr_progress(struct util_cntr *cntr)
* some idle endpoints and never poll completions for them. Move these initial posts to
* the first polling before having a long term fix.
*/
if (!efa_cntr->initial_rx_to_all_eps_posted) {
if (efa_cntr->need_to_scan_ep_list) {
dlist_foreach(&cntr->ep_list, item) {
fid_entry = container_of(item, struct fid_list_entry, entry);
efa_rdm_ep = container_of(fid_entry->fid, struct efa_rdm_ep, base_ep.util_ep.ep_fid.fid);
efa_rdm_ep_post_internal_rx_pkts(efa_rdm_ep);
}
efa_cntr->initial_rx_to_all_eps_posted = true;
efa_cntr->need_to_scan_ep_list = false;
}

dlist_foreach(&efa_cntr->ibv_cq_poll_list, item) {
Expand All @@ -193,7 +193,7 @@ int efa_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
return -FI_ENOMEM;

dlist_init(&cntr->ibv_cq_poll_list);
cntr->initial_rx_to_all_eps_posted = false;
cntr->need_to_scan_ep_list = false;
efa_domain = container_of(domain, struct efa_domain,
util_domain.domain_fid);

Expand Down
2 changes: 1 addition & 1 deletion prov/efa/src/efa_cntr.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ struct efa_cntr {
struct fid_cntr *shm_cntr;
struct dlist_entry ibv_cq_poll_list;
/* Only used by RDM EP type */
bool initial_rx_to_all_eps_posted;
bool need_to_scan_ep_list;
};

int efa_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
Expand Down
6 changes: 3 additions & 3 deletions prov/efa/src/rdm/efa_rdm_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -631,13 +631,13 @@ static void efa_rdm_cq_progress(struct util_cq *cq)
* some idle endpoints and never poll completions for them. Move these initial posts to
* the first cq read call before having a long term fix.
*/
if (!efa_rdm_cq->initial_rx_to_all_eps_posted) {
if (efa_rdm_cq->need_to_scan_ep_list) {
dlist_foreach(&cq->ep_list, item) {
fid_entry = container_of(item, struct fid_list_entry, entry);
efa_rdm_ep = container_of(fid_entry->fid, struct efa_rdm_ep, base_ep.util_ep.ep_fid.fid);
efa_rdm_ep_post_internal_rx_pkts(efa_rdm_ep);
}
efa_rdm_cq->initial_rx_to_all_eps_posted = true;
efa_rdm_cq->need_to_scan_ep_list = false;
}

dlist_foreach(&efa_rdm_cq->ibv_cq_poll_list, item) {
Expand Down Expand Up @@ -683,7 +683,7 @@ int efa_rdm_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
attr->size = MAX(efa_domain->rdm_cq_size, attr->size);

dlist_init(&cq->ibv_cq_poll_list);
cq->initial_rx_to_all_eps_posted = false;
cq->need_to_scan_ep_list = false;
ret = ofi_cq_init(&efa_prov, domain, attr, &cq->util_cq,
&efa_rdm_cq_progress, context);

Expand Down
2 changes: 1 addition & 1 deletion prov/efa/src/rdm/efa_rdm_cq.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ struct efa_rdm_cq {
struct fid_cq *shm_cq;
struct efa_ibv_cq ibv_cq;
struct dlist_entry ibv_cq_poll_list;
bool initial_rx_to_all_eps_posted;
bool need_to_scan_ep_list;
};

/*
Expand Down
9 changes: 9 additions & 0 deletions prov/efa/src/rdm/efa_rdm_ep_fiops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1174,6 +1174,9 @@ int efa_rdm_ep_insert_cntr_ibv_cq_poll_list(struct efa_rdm_ep *ep)
if (ret)
return ret;
}
ofi_genlock_lock(&efa_cntr->util_cntr.ep_list_lock);
efa_cntr->need_to_scan_ep_list = true;
ofi_genlock_unlock(&efa_cntr->util_cntr.ep_list_lock);
}
}

Expand All @@ -1199,6 +1202,9 @@ int efa_rdm_ep_insert_cq_ibv_cq_poll_list(struct efa_rdm_ep *ep)
if (ret)
return ret;
}
ofi_genlock_lock(&tx_cq->util_cq.ep_list_lock);
tx_cq->need_to_scan_ep_list = true;
ofi_genlock_unlock(&tx_cq->util_cq.ep_list_lock);
}

if (rx_cq) {
Expand All @@ -1211,6 +1217,9 @@ int efa_rdm_ep_insert_cq_ibv_cq_poll_list(struct efa_rdm_ep *ep)
if (ret)
return ret;
}
ofi_genlock_lock(&rx_cq->util_cq.ep_list_lock);
rx_cq->need_to_scan_ep_list = true;
ofi_genlock_unlock(&rx_cq->util_cq.ep_list_lock);
}

return FI_SUCCESS;
Expand Down
6 changes: 4 additions & 2 deletions prov/efa/test/efa_unit_test_cntr.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ void test_efa_cntr_post_initial_rx_pkts(struct efa_resource **state)

efa_cntr = container_of(cntr, struct efa_cntr, util_cntr.cntr_fid);

assert_false(efa_cntr->initial_rx_to_all_eps_posted);
/* cntr read need to scan the ep list since a ep is bind */
assert_true(efa_cntr->need_to_scan_ep_list);

cnt = fi_cntr_read(cntr);
/* No completion should be read */
Expand All @@ -132,7 +133,8 @@ void test_efa_cntr_post_initial_rx_pkts(struct efa_resource **state)
assert_int_equal(efa_rdm_ep->efa_rx_pkts_to_post, 0);
assert_int_equal(efa_rdm_ep->efa_rx_pkts_held, 0);

assert_true(efa_cntr->initial_rx_to_all_eps_posted);
/* scan is done */
assert_false(efa_cntr->need_to_scan_ep_list);
/* ep must be closed before cq/av/eq... */
fi_close(&resource->ep->fid);
resource->ep = NULL;
Expand Down
6 changes: 4 additions & 2 deletions prov/efa/test/efa_unit_test_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -580,15 +580,17 @@ void test_efa_rdm_cq_post_initial_rx_pkts(struct efa_resource **state)
assert_int_equal(efa_rdm_ep->efa_rx_pkts_posted, 0);
assert_int_equal(efa_rdm_ep->efa_rx_pkts_held, 0);

assert_false(efa_rdm_cq->initial_rx_to_all_eps_posted);
/* cq read need to scan the ep list since a ep is bind */
assert_true(efa_rdm_cq->need_to_scan_ep_list);
fi_cq_read(resource->cq, NULL, 0);

/* At this time, rx pool size number of rx pkts are posted */
assert_int_equal(efa_rdm_ep->efa_rx_pkts_posted, efa_rdm_ep_get_rx_pool_size(efa_rdm_ep));
assert_int_equal(efa_rdm_ep->efa_rx_pkts_to_post, 0);
assert_int_equal(efa_rdm_ep->efa_rx_pkts_held, 0);

assert_true(efa_rdm_cq->initial_rx_to_all_eps_posted);
/* scan is done */
assert_false(efa_rdm_cq->need_to_scan_ep_list);
}
#if HAVE_EFADV_CQ_EX
/**
Expand Down

0 comments on commit 7623bc5

Please sign in to comment.