Skip to content

Commit

Permalink
TL/UCP: fix completed counter race
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergei-Lebedev committed Feb 28, 2024
1 parent ce9821c commit 48eb6a8
Show file tree
Hide file tree
Showing 7 changed files with 13 additions and 11 deletions.
3 changes: 2 additions & 1 deletion src/components/tl/ucp/alltoallv/alltoallv_hybrid.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "tl_ucp_sendrecv.h"
#include "components/mc/ucc_mc.h"
#include "coll_patterns/bruck_alltoall.h"
#include "utils/ucc_atomic.h"

/*
scratch structure
Expand Down Expand Up @@ -122,7 +123,7 @@ static void send_completion(void *request, ucs_status_t status,
ucp_request_free(request);
}

bin->task->tagged.send_completed++;
ucc_atomic_add32(&bin->task->tagged.send_completed, 1);
bin->len = 0;
}

Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/ucp/bcast/bcast_dbt.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ static void recv_completion_common(void *request, ucs_status_t status,
ucs_status_string(status));
task->super.status = ucs_status_to_ucc_status(status);
}
task->tagged.recv_completed++;
ucc_atomic_add32(&task->tagged.recv_completed, 1);
if (request) {
ucp_request_free(request);
}
Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/ucp/reduce/reduce_dbt.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ static void recv_completion_common(void *request, ucs_status_t status,
ucs_status_string(status));
task->super.status = ucs_status_to_ucc_status(status);
}
task->tagged.recv_completed++;
ucc_atomic_add32(&task->tagged.recv_completed, 1);
if (request) {
ucp_request_free(request);
}
Expand Down
3 changes: 2 additions & 1 deletion src/components/tl/ucp/reduce_scatter/reduce_scatter_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "utils/ucc_math.h"
#include "utils/ucc_coll_utils.h"
#include "utils/ucc_dt_reduce.h"
#include "utils/ucc_atomic.h"

#define REVERSED_FRAG 1

Expand All @@ -24,7 +25,7 @@ static inline void send_completion_common(void *request, ucs_status_t status,
ucs_status_string(status));
task->super.status = ucs_status_to_ucc_status(status);
}
task->tagged.send_completed++;
ucc_atomic_add32(&task->tagged.send_completed, 1);
if (request) {
ucp_request_free(request);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ static inline void send_completion_common(void *request, ucs_status_t status,
ucs_status_string(status));
task->super.status = ucs_status_to_ucc_status(status);
}
task->tagged.send_completed++;
ucc_atomic_add32(&task->tagged.send_completed, 1);
if (request) {
ucp_request_free(request);
}
Expand Down
4 changes: 2 additions & 2 deletions src/components/tl/ucp/tl_ucp_coll.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ void ucc_tl_ucp_send_completion_cb(void *request, ucs_status_t status,
ucs_status_string(status));
task->super.status = ucs_status_to_ucc_status(status);
}
task->tagged.send_completed++;
ucc_atomic_add32(&task->tagged.send_completed, 1);
ucp_request_free(request);
}

Expand Down Expand Up @@ -142,7 +142,7 @@ void ucc_tl_ucp_recv_completion_cb(void *request, ucs_status_t status,
ucs_status_string(status));
task->super.status = ucs_status_to_ucc_status(status);
}
task->tagged.recv_completed++;
ucc_atomic_add32(&task->tagged.recv_completed, 1);
ucp_request_free(request);
}

Expand Down
8 changes: 4 additions & 4 deletions src/components/tl/ucp/tl_ucp_sendrecv.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ ucc_tl_ucp_send_nb(void *buffer, size_t msglen, ucc_memory_type_t mtype,
if (UCS_OK != ucp_status) {
UCC_TL_UCP_CHECK_REQ_STATUS();
} else {
task->tagged.send_completed++;
ucc_atomic_add32(&task->tagged.send_completed, 1);
}
return UCC_OK;
}
Expand Down Expand Up @@ -170,7 +170,7 @@ ucc_tl_ucp_recv_nb(void *buffer, size_t msglen, ucc_memory_type_t mtype,
if (UCS_OK != ucp_status) {
UCC_TL_UCP_CHECK_REQ_STATUS();
} else {
task->tagged.recv_completed++;
ucc_atomic_add32(&task->tagged.recv_completed, 1);
}
return UCC_OK;

Expand Down Expand Up @@ -202,7 +202,7 @@ static inline ucc_status_t ucc_tl_ucp_recv_nz(void *buffer, size_t msglen,
{
if (msglen == 0) {
task->tagged.recv_posted++;
task->tagged.recv_completed++;
ucc_atomic_add32(&task->tagged.recv_completed, 1);
return UCC_OK;
}
return ucc_tl_ucp_recv_nb(buffer, msglen, mtype,
Expand All @@ -218,7 +218,7 @@ static inline ucc_status_t ucc_tl_ucp_send_nz(void *buffer, size_t msglen,
{
if (msglen == 0) {
task->tagged.send_posted++;
task->tagged.send_completed++;
ucc_atomic_add32(&task->tagged.send_completed, 1);
return UCC_OK;
}
return ucc_tl_ucp_send_nb(buffer, msglen, mtype,
Expand Down

0 comments on commit 48eb6a8

Please sign in to comment.