From 338ce5db6151d2e36a6c97346d4764c270c10d10 Mon Sep 17 00:00:00 2001 From: Eyal Chocron Date: Tue, 3 Sep 2024 13:27:59 +0300 Subject: [PATCH] clean code --- src/components/cl/hier/barrier/barrier.c | 7 +- src/components/cl/hier/cl_hier.h | 15 +- src/components/tl/ucp/allgather/allgather.c | 6 +- src/components/tl/ucp/allgatherv/allgatherv.c | 8 +- .../tl/ucp/allgatherv/allgatherv_ring.c | 62 ++----- src/components/tl/ucp/bcast/bcast_knomial.c | 17 -- .../tl/ucp/gatherv/gatherv_linear.c | 87 +++------ src/components/topo/ucc_sbgp.c | 2 - src/ucc/api/ucc.h | 36 ++-- test/gtest/coll/test_reduce_scatter.cc | 46 ++--- test/mpi/main.cc | 1 - test/mpi/test_allgatherv.cc | 60 ++---- test/mpi/test_mpi.cc | 171 ++++++++---------- 13 files changed, 185 insertions(+), 333 deletions(-) diff --git a/src/components/cl/hier/barrier/barrier.c b/src/components/cl/hier/barrier/barrier.c index 3e9f673c14..c0a1ecf94a 100644 --- a/src/components/cl/hier/barrier/barrier.c +++ b/src/components/cl/hier/barrier/barrier.c @@ -20,7 +20,8 @@ static ucc_status_t ucc_cl_hier_barrier_finalize(ucc_coll_task_t *task) ucc_schedule_t *schedule = ucc_derived_of(task, ucc_schedule_t); ucc_status_t status; - UCC_CL_HIER_PROFILE_REQUEST_EVENT(task, "cl_hier_barrier_finalize", 0); + UCC_CL_HIER_PROFILE_REQUEST_EVENT(task, "cl_hier_barrier_finalize", + 0); status = ucc_schedule_finalize(task); ucc_cl_hier_put_schedule(schedule); return status; @@ -31,7 +32,6 @@ UCC_CL_HIER_PROFILE_FUNC(ucc_status_t, ucc_cl_hier_barrier_init, ucc_base_coll_args_t *coll_args, ucc_base_team_t *team, ucc_coll_task_t **task) { - ucc_cl_hier_team_t *cl_team = ucc_derived_of(team, ucc_cl_hier_team_t); ucc_coll_task_t *tasks[MAX_BARRIER_TASKS] = {NULL}; ucc_schedule_t *schedule; @@ -39,7 +39,6 @@ UCC_CL_HIER_PROFILE_FUNC(ucc_status_t, ucc_cl_hier_barrier_init, ucc_base_coll_args_t args; int n_tasks, i; - schedule = &ucc_cl_hier_get_schedule(cl_team)->super.super; if (ucc_unlikely(!schedule)) { return UCC_ERR_NO_MEMORY; @@ -72,6 +71,7 @@ UCC_CL_HIER_PROFILE_FUNC(ucc_status_t, ucc_cl_hier_barrier_init, n_tasks++; } + if (SBGP_ENABLED(cl_team, NODE) && cl_team->top_sbgp != UCC_HIER_SBGP_NODE) { args.args.coll_type = UCC_COLL_TYPE_FANOUT; @@ -97,7 +97,6 @@ UCC_CL_HIER_PROFILE_FUNC(ucc_status_t, ucc_cl_hier_barrier_init, schedule->super.post = ucc_cl_hier_barrier_start; schedule->super.finalize = ucc_cl_hier_barrier_finalize; *task = &schedule->super; - return UCC_OK; out: diff --git a/src/components/cl/hier/cl_hier.h b/src/components/cl/hier/cl_hier.h index e0b331745d..b18bb23b26 100644 --- a/src/components/cl/hier/cl_hier.h +++ b/src/components/cl/hier/cl_hier.h @@ -20,10 +20,10 @@ #include "utils/profile/ucc_profile_off.h" #endif -#define UCC_CL_HIER_PROFILE_FUNC UCC_PROFILE_FUNC -#define UCC_CL_HIER_PROFILE_REQUEST_NEW UCC_PROFILE_REQUEST_NEW +#define UCC_CL_HIER_PROFILE_FUNC UCC_PROFILE_FUNC +#define UCC_CL_HIER_PROFILE_REQUEST_NEW UCC_PROFILE_REQUEST_NEW #define UCC_CL_HIER_PROFILE_REQUEST_EVENT UCC_PROFILE_REQUEST_EVENT -#define UCC_CL_HIER_PROFILE_REQUEST_FREE UCC_PROFILE_REQUEST_FREE +#define UCC_CL_HIER_PROFILE_REQUEST_FREE UCC_PROFILE_REQUEST_FREE #ifndef UCC_CL_HIER_DEFAULT_SCORE #define UCC_CL_HIER_DEFAULT_SCORE 50 @@ -112,9 +112,12 @@ UCC_CLASS_DECLARE(ucc_cl_hier_team_t, ucc_base_context_t *, const ucc_base_team_params_t *); #define UCC_CL_HIER_SUPPORTED_COLLS \ - (UCC_COLL_TYPE_ALLTOALL | UCC_COLL_TYPE_ALLTOALLV | \ - UCC_COLL_TYPE_ALLGATHERV | UCC_COLL_TYPE_ALLREDUCE | \ - UCC_COLL_TYPE_BARRIER | UCC_COLL_TYPE_BCAST) + (UCC_COLL_TYPE_ALLTOALL | \ + UCC_COLL_TYPE_ALLTOALLV | \ + UCC_COLL_TYPE_ALLREDUCE | \ + UCC_COLL_TYPE_BARRIER | \ + UCC_COLL_TYPE_ALLGATHERV | \ + UCC_COLL_TYPE_BCAST) ucc_status_t ucc_cl_hier_coll_init(ucc_base_coll_args_t *coll_args, ucc_base_team_t *team, diff --git a/src/components/tl/ucp/allgather/allgather.c b/src/components/tl/ucp/allgather/allgather.c index 769c4fb981..9b9d823a06 100644 --- a/src/components/tl/ucp/allgather/allgather.c +++ b/src/components/tl/ucp/allgather/allgather.c @@ -14,7 +14,7 @@ ucc_base_coll_alg_info_t [UCC_TL_UCP_ALLGATHER_ALG_KNOMIAL] = {.id = UCC_TL_UCP_ALLGATHER_ALG_KNOMIAL, .name = "knomial", - .desc = "recursive k-ing with arbitrary radix"}, + .desc = "recursive k-ing with arbitrary radix "}, [UCC_TL_UCP_ALLGATHER_ALG_RING] = {.id = UCC_TL_UCP_ALLGATHER_ALG_RING, .name = "ring", @@ -23,11 +23,11 @@ ucc_base_coll_alg_info_t {.id = UCC_TL_UCP_ALLGATHER_ALG_NEIGHBOR, .name = "neighbor", .desc = "O(N) Neighbor Exchange N/2 steps"}, - [UCC_TL_UCP_ALLGATHER_ALG_BRUCK] = + [UCC_TL_UCP_ALLGATHER_ALG_BRUCK] = {.id = UCC_TL_UCP_ALLGATHER_ALG_BRUCK, .name = "bruck", .desc = "O(log(N)) Variation of Bruck algorithm"}, - [UCC_TL_UCP_ALLGATHER_ALG_SPARBIT] = + [UCC_TL_UCP_ALLGATHER_ALG_SPARBIT] = {.id = UCC_TL_UCP_ALLGATHER_ALG_SPARBIT, .name = "sparbit", .desc = "O(log(N)) SPARBIT algorithm"}, diff --git a/src/components/tl/ucp/allgatherv/allgatherv.c b/src/components/tl/ucp/allgatherv/allgatherv.c index b6722dc219..39fbc5472d 100644 --- a/src/components/tl/ucp/allgatherv/allgatherv.c +++ b/src/components/tl/ucp/allgatherv/allgatherv.c @@ -11,10 +11,10 @@ ucc_base_coll_alg_info_t ucc_tl_ucp_allgatherv_algs[UCC_TL_UCP_ALLGATHERV_ALG_LAST + 1] = { - [UCC_TL_UCP_ALLGATHERV_ALG_RING] = {.id = - UCC_TL_UCP_ALLGATHERV_ALG_RING, - .name = "ring", - .desc = "O(N) Ring"}, + [UCC_TL_UCP_ALLGATHERV_ALG_RING] = + {.id = UCC_TL_UCP_ALLGATHERV_ALG_RING, + .name = "ring", + .desc = "O(N) Ring"}, [UCC_TL_UCP_ALLGATHERV_ALG_LAST] = { .id = 0, .name = NULL, .desc = NULL}}; diff --git a/src/components/tl/ucp/allgatherv/allgatherv_ring.c b/src/components/tl/ucp/allgatherv/allgatherv_ring.c index f646146207..efc3a06099 100644 --- a/src/components/tl/ucp/allgatherv/allgatherv_ring.c +++ b/src/components/tl/ucp/allgatherv/allgatherv_ring.c @@ -11,7 +11,6 @@ #include "utils/ucc_math.h" #include "utils/ucc_coll_utils.h" #include "tl_ucp_sendrecv.h" -#include void ucc_tl_ucp_allgatherv_ring_progress(ucc_coll_task_t *coll_task) { @@ -34,22 +33,23 @@ void ucc_tl_ucp_allgatherv_ring_progress(ucc_coll_task_t *coll_task) recvfrom = ucc_ep_map_eval(task->subset.map, (trank - 1 + tsize) % tsize); while (task->tagged.send_posted < tsize) { - send_idx = ucc_ep_map_eval( - task->subset.map, - (trank - task->tagged.send_posted + 1 + tsize) % tsize); + send_idx = + ucc_ep_map_eval(task->subset.map, (trank - + task->tagged.send_posted + 1 + + tsize) % tsize); data_displ = ucc_coll_args_get_displacement( args, args->dst.info_v.displacements, send_idx) * rdt_size; data_size = - ucc_coll_args_get_count(args, args->dst.info_v.counts, send_idx) * rdt_size; UCPCHECK_GOTO(ucc_tl_ucp_send_nb((void *)(rbuf + data_displ), data_size, rmem, sendto, team, task), task, out); - recv_idx = - ucc_ep_map_eval(task->subset.map, - (trank - task->tagged.recv_posted + tsize) % tsize); + recv_idx = + ucc_ep_map_eval(task->subset.map, (trank - + task->tagged.recv_posted + + tsize) % tsize); data_displ = ucc_coll_args_get_displacement( args, args->dst.info_v.displacements, recv_idx) * rdt_size; @@ -63,18 +63,6 @@ void ucc_tl_ucp_allgatherv_ring_progress(ucc_coll_task_t *coll_task) return; } } - // DEBUG - int _i, _total_counts; - _total_counts=0; - for (_i=0; _i < tsize; _i++){ - _total_counts += ucc_coll_args_get_count(args, args->dst.info_v.counts, _i); - } - printf("[%d] end-allgatherv rbuf: [", trank); - for (_i=0; _i < _total_counts; _i++){ - printf("%u, ", ((uint32_t *)rbuf)[_i]); - } - printf("]\n"); - //-- ucc_assert(UCC_TL_UCP_TASK_P2P_COMPLETE(task)); task->super.status = UCC_OK; out: @@ -86,8 +74,8 @@ ucc_status_t ucc_tl_ucp_allgatherv_ring_start(ucc_coll_task_t *coll_task) ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t); ucc_tl_ucp_team_t *team = TASK_TEAM(task); ucc_coll_args_t *args = &TASK_ARGS(task); - void *sbuf = args->src.info.buffer; - void *rbuf = args->dst.info_v.buffer; + void * sbuf = args->src.info.buffer; + void * rbuf = args->dst.info_v.buffer; ucc_memory_type_t smem = args->src.info.mem_type; ucc_memory_type_t rmem = args->dst.info_v.mem_type; ucc_rank_t grank = UCC_TL_TEAM_RANK(team); @@ -98,18 +86,15 @@ ucc_status_t ucc_tl_ucp_allgatherv_ring_start(ucc_coll_task_t *coll_task) if (!UCC_IS_INPLACE(*args)) { /* TODO replace local sendrecv with memcpy? */ rdt_size = ucc_dt_size(args->dst.info_v.datatype); - data_displ = ucc_coll_args_get_displacement( - args, args->dst.info_v.displacements, grank) * - rdt_size; - data_size = - ucc_coll_args_get_count(args, args->dst.info_v.counts, grank) * - rdt_size; - UCPCHECK_GOTO(ucc_tl_ucp_recv_nb(PTR_OFFSET(rbuf, data_displ), - data_size, rmem, grank, team, task), + data_displ = ucc_coll_args_get_displacement(args, + args->dst.info_v.displacements, grank) * rdt_size; + data_size = ucc_coll_args_get_count(args, + args->dst.info_v.counts, grank) * rdt_size; + UCPCHECK_GOTO(ucc_tl_ucp_recv_nb(PTR_OFFSET(rbuf, data_displ), data_size, + rmem, grank, team, task), + task, error); + UCPCHECK_GOTO(ucc_tl_ucp_send_nb(sbuf, data_size, smem, grank, team, task), task, error); - UCPCHECK_GOTO( - ucc_tl_ucp_send_nb(sbuf, data_size, smem, grank, team, task), task, - error); } else { /* to simplify progress fucnction and make it identical for in-place and non in-place */ @@ -117,15 +102,6 @@ ucc_status_t ucc_tl_ucp_allgatherv_ring_start(ucc_coll_task_t *coll_task) task->tagged.recv_posted = task->tagged.recv_completed = 1; } - // DEBUG - // int _i; - // printf("[%d] start-allgatherv sbuf: [", grank); - // for (_i=0; _i < 4; _i++){ - // printf("%u, ", ((uint32_t *)sbuf)[_i]); - // } - // printf("]\n"); - // ----- - return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super); error: return task->super.status; @@ -134,7 +110,7 @@ ucc_status_t ucc_tl_ucp_allgatherv_ring_start(ucc_coll_task_t *coll_task) ucc_status_t ucc_tl_ucp_allgatherv_ring_init_common(ucc_tl_ucp_task_t *task) { ucc_tl_ucp_team_t *team = TASK_TEAM(task); - ucc_sbgp_t *sbgp; + ucc_sbgp_t *sbgp; if (!ucc_coll_args_is_predefined_dt(&TASK_ARGS(task), UCC_RANK_INVALID)) { tl_error(UCC_TASK_LIB(task), "user defined datatype is not supported"); diff --git a/src/components/tl/ucp/bcast/bcast_knomial.c b/src/components/tl/ucp/bcast/bcast_knomial.c index 607d8a7534..1ca08893e3 100644 --- a/src/components/tl/ucp/bcast/bcast_knomial.c +++ b/src/components/tl/ucp/bcast/bcast_knomial.c @@ -75,14 +75,6 @@ void ucc_tl_ucp_bcast_knomial_progress(ucc_coll_task_t *coll_task) return; } - // DEBUG - // int _i; - // printf("[%d] end-bcast buffer: [", task->subset.myrank); - // for (_i=0; _i < 8; _i++){ - // printf("%u, ", ((uint32_t *)TASK_ARGS(task).src.info.buffer)[_i]); - // } - // printf("]\n"); - // ----- ucc_assert(UCC_TL_UCP_TASK_P2P_COMPLETE(task)); task->super.status = UCC_OK; UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_bcast_kn_done", 0); @@ -101,14 +93,5 @@ ucc_status_t ucc_tl_ucp_bcast_knomial_start(ucc_coll_task_t *coll_task) CALC_KN_TREE_DIST(size, task->bcast_kn.radix, task->bcast_kn.dist); - // DEBUG - // int _i; - // printf("[%d] start-bcast buffer: [", task->subset.myrank); - // for (_i=0; _i < 8; _i++){ - // printf("%u, ", ((uint32_t *)TASK_ARGS(task).src.info.buffer)[_i]); - // } - // printf("]\n"); - // ----- - return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super); } diff --git a/src/components/tl/ucp/gatherv/gatherv_linear.c b/src/components/tl/ucp/gatherv/gatherv_linear.c index 070a0bf737..a6d5ccc60b 100644 --- a/src/components/tl/ucp/gatherv/gatherv_linear.c +++ b/src/components/tl/ucp/gatherv/gatherv_linear.c @@ -7,7 +7,6 @@ #include "config.h" #include "tl_ucp.h" #include "gatherv.h" -#include #include "core/ucc_progress_queue.h" #include "tl_ucp_sendrecv.h" @@ -22,7 +21,7 @@ void ucc_tl_ucp_gatherv_linear_progress(ucc_coll_task_t *coll_task) ucc_tl_ucp_task_t *task = ucc_derived_of(coll_task, ucc_tl_ucp_task_t); ucc_tl_ucp_team_t *team = TASK_TEAM(task); ucc_coll_args_t *args = &TASK_ARGS(task); - void *rbuf = args->dst.info_v.buffer; + void* rbuf = args->dst.info_v.buffer; ucc_memory_type_t rmem = args->dst.info_v.mem_type; ucc_rank_t grank = UCC_TL_TEAM_RANK(team); ucc_rank_t gsize = UCC_TL_TEAM_SIZE(team); @@ -40,17 +39,14 @@ void ucc_tl_ucp_gatherv_linear_progress(ucc_coll_task_t *coll_task) while ((task->tagged.recv_posted < gsize) && ((task->tagged.recv_posted - task->tagged.recv_completed) < nreqs)) { - peer = get_peer(grank, gsize, task->tagged.recv_posted); - data_size = ucc_coll_args_get_count( - args, args->dst.info_v.counts, peer) * - dt_size; - data_displ = ucc_coll_args_get_displacement( - args, args->dst.info_v.displacements, peer) * - dt_size; + peer = get_peer(grank, gsize, task->tagged.recv_posted); + data_size = ucc_coll_args_get_count(args, + args->dst.info_v.counts, peer) * dt_size; + data_displ = ucc_coll_args_get_displacement(args, + args->dst.info_v.displacements, peer) * dt_size; UCPCHECK_GOTO(ucc_tl_ucp_recv_nz(PTR_OFFSET(rbuf, data_displ), - data_size, rmem, peer, team, - task), - task, out); + data_size, rmem, peer, team, task), + task, out); polls = 0; } } @@ -62,19 +58,8 @@ void ucc_tl_ucp_gatherv_linear_progress(ucc_coll_task_t *coll_task) task->super.status = ucc_tl_ucp_test(task); out: if (task->super.status != UCC_INPROGRESS) { - // DEBUG - int _i, _total_counts; - _total_counts=0; - for (_i=0; _i < gsize; _i++){ - _total_counts += ucc_coll_args_get_count(args, args->dst.info_v.counts, _i); - } - printf("[%d] end-gatherv rbuf (pid=%d): [", grank, getpid()); - for (_i=0; _i < 4; _i++){ - printf("%u, ", ((uint32_t *)rbuf)[_i]); - } - printf("]\n"); - // ---- - UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, "ucp_gatherv_linear_done", 0); + UCC_TL_UCP_PROFILE_REQUEST_EVENT(coll_task, + "ucp_gatherv_linear_done", 0); } } @@ -86,7 +71,7 @@ ucc_status_t ucc_tl_ucp_gatherv_linear_start(ucc_coll_task_t *coll_task) ucc_rank_t grank = UCC_TL_TEAM_RANK(team); ucc_memory_type_t smem = args->src.info.mem_type; ucc_memory_type_t rmem = args->dst.info_v.mem_type; - void *sbuf = args->src.info.buffer; + void * sbuf = args->src.info.buffer; void *rbuf; size_t dt_size, data_displ, data_size; @@ -95,21 +80,19 @@ ucc_status_t ucc_tl_ucp_gatherv_linear_start(ucc_coll_task_t *coll_task) if (UCC_IS_ROOT(*args, grank)) { if (!UCC_IS_INPLACE(*args)) { - dt_size = ucc_dt_size(args->dst.info_v.datatype); - data_size = - ucc_coll_args_get_count(args, args->dst.info_v.counts, grank) * - dt_size; - data_displ = ucc_coll_args_get_displacement( - args, args->dst.info_v.displacements, grank) * - dt_size; - rbuf = PTR_OFFSET(args->dst.info_v.buffer, data_displ); + dt_size = ucc_dt_size(args->dst.info_v.datatype); + data_size = ucc_coll_args_get_count(args, + args->dst.info_v.counts, grank) * dt_size; + data_displ = ucc_coll_args_get_displacement(args, + args->dst.info_v.displacements, grank) * dt_size; + rbuf = PTR_OFFSET(args->dst.info_v.buffer, data_displ); - UCPCHECK_GOTO( - ucc_tl_ucp_recv_nz(rbuf, data_size, rmem, grank, team, task), - task, error); - UCPCHECK_GOTO( - ucc_tl_ucp_send_nz(sbuf, data_size, smem, grank, team, task), - task, error); + UCPCHECK_GOTO(ucc_tl_ucp_recv_nz(rbuf, data_size, rmem, grank, team, + task), + task, error); + UCPCHECK_GOTO(ucc_tl_ucp_send_nz(sbuf, data_size, smem, grank, team, + task), + task, error); } else { /* to simplify progress fucnction and make it identical for in-place and non in-place */ @@ -117,30 +100,18 @@ ucc_status_t ucc_tl_ucp_gatherv_linear_start(ucc_coll_task_t *coll_task) task->tagged.recv_posted = task->tagged.recv_completed = 1; } } else { - dt_size = ucc_dt_size(args->src.info.datatype); - data_size = args->src.info.count * dt_size; - - UCPCHECK_GOTO( - ucc_tl_ucp_send_nz(sbuf, data_size, smem, args->root, team, task), - task, error); - } + dt_size = ucc_dt_size(args->src.info.datatype); + data_size = args->src.info.count * dt_size; - // DEBUG - int _i, _total_counts; - _total_counts=0; - for (_i=0; _i < 4; _i++){ - _total_counts += ucc_coll_args_get_count(args, args->dst.info_v.counts, _i); - } - printf("[%d] (%d) start-gatherv sbuf: [", grank, _total_counts); - for (_i=0; _i < 4; _i++){ - printf("%u, ", ((uint32_t *)sbuf)[_i]); + UCPCHECK_GOTO(ucc_tl_ucp_send_nz(sbuf, data_size, smem, args->root, + team, task), + task, error); } - printf("]\n"); - // ---- return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super); error: return task->super.status; + } ucc_status_t ucc_tl_ucp_gatherv_linear_init(ucc_tl_ucp_task_t *task) diff --git a/src/components/topo/ucc_sbgp.c b/src/components/topo/ucc_sbgp.c index 724468d136..a79dbeae68 100644 --- a/src/components/topo/ucc_sbgp.c +++ b/src/components/topo/ucc_sbgp.c @@ -28,7 +28,6 @@ const char* ucc_sbgp_str(ucc_sbgp_type_t type) static inline int ucc_ranks_on_local_sn(ucc_rank_t rank1, ucc_rank_t rank2, ucc_topo_t *topo, ucc_sbgp_type_t type) { - // EYAL ucc_rank_t ctx_rank1 = ucc_ep_map_eval(topo->set.map, rank1); ucc_rank_t ctx_rank2 = ucc_ep_map_eval(topo->set.map, rank2); ucc_proc_info_t *proc1 = &topo->topo->procs[ctx_rank1]; @@ -137,7 +136,6 @@ static inline ucc_status_t sbgp_create_node(ucc_topo_t *topo, ucc_sbgp_t *sbgp) node_size++; } } - if (0 == node_size) { /* We should always have at least 1 local rank */ ucc_free(local_ranks); diff --git a/src/ucc/api/ucc.h b/src/ucc/api/ucc.h index 38a6d1fd3f..cc9ebedfe4 100644 --- a/src/ucc/api/ucc.h +++ b/src/ucc/api/ucc.h @@ -1715,24 +1715,24 @@ typedef enum { * @ingroup UCC_COLLECTIVES_DT */ typedef enum { - UCC_COLL_ARGS_HINT_OPTIMIZE_OVERLAP_CPU = UCC_BIT(24), /*!< When the flag is - set, the user - prefers the library - to choose an - algorithm - implementation - optimized for the - best overlap of CPU - resources. */ - UCC_COLL_ARGS_HINT_OPTIMIZE_OVERLAP_GPU = UCC_BIT(25), /*!< When the flag is - set, the user - prefers the library - to choose an - algorithm - implementation - optimized for the - best overlap of GPU - resources. */ + UCC_COLL_ARGS_HINT_OPTMIZE_OVERLAP_CPU = UCC_BIT(24), /*!< When the flag is + set, the user + prefers the library + to choose an + algorithm + implementation + optimized for the + best overlap of CPU + resources. */ + UCC_COLL_ARGS_HINT_OPTMIZE_OVERLAP_GPU = UCC_BIT(25), /*!< When the flag is + set, the user + prefers the library + to choose an + algorithm + implementation + optimized for the + best overlap of GPU + resources. */ UCC_COLL_ARGS_HINT_OPTIMIZE_LATENCY = UCC_BIT(26), /*!< When the flag is set, the user prefers the library to choose diff --git a/test/gtest/coll/test_reduce_scatter.cc b/test/gtest/coll/test_reduce_scatter.cc index bcd2f41d83..fb106b5a21 100644 --- a/test/gtest/coll/test_reduce_scatter.cc +++ b/test/gtest/coll/test_reduce_scatter.cc @@ -1,6 +1,5 @@ /** - * Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * + * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * See file LICENSE for terms. */ @@ -316,21 +315,24 @@ TYPED_TEST(test_reduce_scatter_cuda, multiple_inplace_managed) } #endif -using Param_0 = std::tuple; class test_reduce_scatter_alg : public ucc::test, - public ::testing::WithParamInterface { + public ::testing::WithParamInterface { }; -UCC_TEST_P(test_reduce_scatter_alg,) +UCC_TEST_P(test_reduce_scatter_alg, ring) { test_reduce_scatter> rs_test; int n_procs = 15; - const ucc_job_env_t env = std::get<0>(GetParam()); - UccJob job(n_procs, UccJob::UCC_JOB_CTX_GLOBAL, env); - UccTeam_h team = job.create_team(n_procs); - int repeat = 3; - UccCollCtxVec ctxs; + std::string bidir = GetParam(); + ucc_job_env_t env = {{"UCC_CL_BASIC_TUNE", "inf"}, + {"UCC_TL_UCP_TUNE", "reduce_scatter:@ring:inf"}, + {"REDUCE_SCATTER_RING_BIDIRECTIONAL", + bidir == "bidirectional" ? "y" : "n"}}; + UccJob job(n_procs, UccJob::UCC_JOB_CTX_GLOBAL, env); + UccTeam_h team = job.create_team(n_procs); + int repeat = 3; + UccCollCtxVec ctxs; std::vector mt = {UCC_MEMORY_TYPE_HOST}; if (UCC_OK == ucc_mc_available(UCC_MEMORY_TYPE_CUDA)) { @@ -359,25 +361,5 @@ UCC_TEST_P(test_reduce_scatter_alg,) } } } - -ucc_job_env_t ring_unidir_env = {{"name", "ring_unidirectional"}, - {"UCC_CL_BASIC_TUNE", "inf"}, - {"UCC_TL_UCP_TUNE", "reduce_scatter:@ring:inf"}, - {"UCC_TL_UCP_REDUCE_SCATTER_RING_BIDIRECTIONAL", "n"}}; - -ucc_job_env_t ring_bidir_env = {{"name", "ring_bidirectional"}, - {"UCC_CL_BASIC_TUNE", "inf"}, - {"UCC_TL_UCP_TUNE", "reduce_scatter:@ring:inf"}, - {"UCC_TL_UCP_REDUCE_SCATTER_RING_BIDIRECTIONAL", "y"}}; - -ucc_job_env_t knomial = {{"name", "knomial"}, - {"UCC_CL_BASIC_TUNE", "inf"}, - {"UCC_TL_UCP_TUNE", "reduce_scatter:@knomial:inf"}}; - -INSTANTIATE_TEST_CASE_P( - , test_reduce_scatter_alg, - ::testing::Combine( - ::testing::Values(ring_unidir_env, ring_bidir_env, knomial)), - [](const testing::TestParamInfo& info) { - const ucc_job_env_t env = std::get<0>(info.param); - return env[0].second;}); +INSTANTIATE_TEST_CASE_P(, test_reduce_scatter_alg, + ::testing::Values("bidirectional", "unidirectional")); diff --git a/test/mpi/main.cc b/test/mpi/main.cc index 5a3f074b84..716d1d4b50 100644 --- a/test/mpi/main.cc +++ b/test/mpi/main.cc @@ -626,7 +626,6 @@ int main(int argc, char *argv[]) goto test_exit; } } - test->create_teams(teams); if (has_onesided) { test->create_teams(teams, true); diff --git a/test/mpi/test_allgatherv.cc b/test/mpi/test_allgatherv.cc index 76c86a85f9..a3bfa55d93 100644 --- a/test/mpi/test_allgatherv.cc +++ b/test/mpi/test_allgatherv.cc @@ -7,8 +7,8 @@ #include "test_mpi.h" #include "mpi_util.h" -static void fill_counts_and_displacements(int size, int count, int *counts, - int *displs) +static void fill_counts_and_displacements(int size, int count, + int *counts, int *displs) { int bias = count / 2; int i; @@ -29,18 +29,10 @@ static void fill_counts_and_displacements(int size, int count, int *counts, counts[size - 1] = count; } displs[size - 1] = displs[size - 2] + counts[size - 2]; - - //printf("Original counts:\n"); - //for (i = 0; i < size; i++) { - // printf("counts[%d]=%d\n", i, counts[i]); - //} - //for (i = 0; i < size; i++) { - // printf("displs[%d]=%d\n", i, displs[i]); - //} } -TestAllgatherv::TestAllgatherv(ucc_test_team_t &_team, TestCaseParams ¶ms) - : TestCase(_team, UCC_COLL_TYPE_ALLGATHERV, params) +TestAllgatherv::TestAllgatherv(ucc_test_team_t &_team, TestCaseParams ¶ms) : + TestCase(_team, UCC_COLL_TYPE_ALLGATHERV, params) { int rank, size; size_t dt_size, count; @@ -59,10 +51,9 @@ TestAllgatherv::TestAllgatherv(ucc_test_team_t &_team, TestCaseParams ¶ms) return; } - counts = (int *)ucc_malloc(size * sizeof(uint32_t), "counts buf"); + counts = (int *) ucc_malloc(size * sizeof(uint32_t), "counts buf"); UCC_MALLOC_CHECK(counts); - displacements = - (int *)ucc_malloc(size * sizeof(uint32_t), "displacements buf"); + displacements = (int *) ucc_malloc(size * sizeof(uint32_t), "displacements buf"); UCC_MALLOC_CHECK(displacements); UCC_CHECK(ucc_mc_alloc(&rbuf_mc_header, msgsize * size, mem_type)); rbuf = rbuf_mc_header->addr; @@ -71,8 +62,7 @@ TestAllgatherv::TestAllgatherv(ucc_test_team_t &_team, TestCaseParams ¶ms) fill_counts_and_displacements(size, count, counts, displacements); if (!inplace) { - UCC_CHECK( - ucc_mc_alloc(&sbuf_mc_header, counts[rank] * dt_size, mem_type)); + UCC_CHECK(ucc_mc_alloc(&sbuf_mc_header, counts[rank] * dt_size, mem_type)); sbuf = sbuf_mc_header->addr; args.src.info.buffer = sbuf; args.src.info.datatype = dt; @@ -80,28 +70,11 @@ TestAllgatherv::TestAllgatherv(ucc_test_team_t &_team, TestCaseParams ¶ms) args.src.info.count = counts[rank]; } args.dst.info_v.buffer = rbuf; - args.dst.info_v.counts = (ucc_count_t *)counts; - args.dst.info_v.displacements = (ucc_aint_t *)displacements; + args.dst.info_v.counts = (ucc_count_t*)counts; + args.dst.info_v.displacements = (ucc_aint_t*)displacements; args.dst.info_v.datatype = dt; args.dst.info_v.mem_type = mem_type; UCC_CHECK(set_input()); - - // --- DEBUG --- - // int wait = 1; - // printf("Waiting pid=%d\n", getpid()); - // while (wait){ - // sleep(1); - // } - - // printf("[%d] send buf: [", rank); - - // for (int i=0; i < counts[rank]; i++){ - // printf("%u, ", ((uint32_t *)sbuf)[i]); - // } - // printf("]\n"); - - // ----- - UCC_CHECK_SKIP(ucc_collective_init(&args, &req, team.team), test_skip); } @@ -123,8 +96,7 @@ ucc_status_t TestAllgatherv::set_input(int iter_persistent) return UCC_OK; } -TestAllgatherv::~TestAllgatherv() -{ +TestAllgatherv::~TestAllgatherv() { if (counts) { ucc_free(counts); } @@ -139,7 +111,7 @@ ucc_status_t TestAllgatherv::check() int size, i; MPI_Comm_size(team.comm, &size); - for (i = 0; i < size; i++) { + for (i = 0 ; i < size; i++) { total_count += counts[i]; } @@ -149,15 +121,5 @@ ucc_status_t TestAllgatherv::check() i * (iter_persistent + 1)); } - // DEBUG // - int rank; - MPI_Comm_rank(team.comm, &rank); - printf("[%d] (total count=%d) End --> rbuf: [", rank, total_count); - for (i = 0; i < total_count; i++) { - printf("%d, ",((uint32_t *)rbuf)[i]); - } - printf("]\n"); - // ----- // - return compare_buffers(rbuf, check_buf, total_count, dt, mem_type); } diff --git a/test/mpi/test_mpi.cc b/test/mpi/test_mpi.cc index 8a5d974e24..147ce1fd7d 100644 --- a/test/mpi/test_mpi.cc +++ b/test/mpi/test_mpi.cc @@ -52,7 +52,8 @@ UccTestMpi::UccTestMpi(int argc, char *argv[], ucc_thread_mode_t _tm, /* Init ucc library */ ucc_lib_params_t lib_params = { - .mask = UCC_LIB_PARAM_FIELD_THREAD_MODE, .thread_mode = _tm, + .mask = UCC_LIB_PARAM_FIELD_THREAD_MODE, + .thread_mode = _tm, /* .coll_types = coll_types, */ }; tm = _tm; //TODO check ucc provided @@ -60,11 +61,11 @@ UccTestMpi::UccTestMpi(int argc, char *argv[], ucc_thread_mode_t _tm, ucc_context_params_t ctx_params = {}; ucc_context_params_t onesided_ctx_params = {}; if (!is_local) { - ctx_params.mask |= UCC_CONTEXT_PARAM_FIELD_OOB; + ctx_params.mask |= UCC_CONTEXT_PARAM_FIELD_OOB; ctx_params.oob.allgather = oob_allgather; ctx_params.oob.req_test = oob_allgather_test; ctx_params.oob.req_free = oob_allgather_free; - ctx_params.oob.coll_info = (void *)(uintptr_t)MPI_COMM_WORLD; + ctx_params.oob.coll_info = (void*)(uintptr_t)MPI_COMM_WORLD; ctx_params.oob.n_oob_eps = size; ctx_params.oob.oob_ep = rank; @@ -95,8 +96,7 @@ UccTestMpi::UccTestMpi(int argc, char *argv[], ucc_thread_mode_t _tm, ucc_context_config_release(ctx_config); if (with_onesided) { prev_env = getenv("UCC_TL_UCP_TUNE"); - setenv("UCC_TL_UCP_TUNE", - "alltoall:0-inf:@onesided#alltoallv:0-inf:@onesided", 1); + setenv("UCC_TL_UCP_TUNE", "alltoall:0-inf:@onesided#alltoallv:0-inf:@onesided", 1); UCC_CHECK(ucc_lib_config_read(NULL, NULL, &lib_config)); UCC_CHECK(ucc_init(&lib_params, lib_config, &onesided_lib)); ucc_lib_config_release(lib_config); @@ -115,11 +115,11 @@ UccTestMpi::UccTestMpi(int argc, char *argv[], ucc_thread_mode_t _tm, } set_msgsizes(8, ((1ULL) << 21), 8); dtypes = {UCC_DT_INT16, UCC_DT_INT32, - UCC_DT_INT64, UCC_DT_UINT16, - UCC_DT_UINT32, UCC_DT_UINT64, - UCC_DT_FLOAT32, UCC_DT_FLOAT64, - UCC_DT_FLOAT128, UCC_DT_FLOAT32_COMPLEX, - UCC_DT_FLOAT64_COMPLEX, UCC_DT_FLOAT128_COMPLEX}; + UCC_DT_INT64, UCC_DT_UINT16, + UCC_DT_UINT32, UCC_DT_UINT64, + UCC_DT_FLOAT32, UCC_DT_FLOAT64, + UCC_DT_FLOAT128, UCC_DT_FLOAT32_COMPLEX, + UCC_DT_FLOAT64_COMPLEX, UCC_DT_FLOAT128_COMPLEX}; ops = {UCC_OP_SUM, UCC_OP_MAX}; colls = {UCC_COLL_TYPE_BARRIER, UCC_COLL_TYPE_ALLREDUCE}; mtypes = {UCC_MEMORY_TYPE_HOST}; @@ -150,9 +150,9 @@ void UccTestMpi::create_teams(std::vector &test_teams, for (auto &t : test_teams) { if (size < 4 && (t == TEAM_SPLIT_HALF || t == TEAM_SPLIT_ODD_EVEN)) { if (rank == 0) { - std::cout << "size of the world=" << size - << " is too small to create team " << team_str(t) - << ", skipping ...\n"; + std::cout << "size of the world=" << size << + " is too small to create team " << team_str(t) << + ", skipping ...\n"; } continue; } @@ -190,12 +190,13 @@ ucc_team_h UccTestMpi::create_ucc_team(MPI_Comm comm, bool is_onesided) MPI_Comm_size(comm, &size); /* Create UCC TEAM for comm world */ - team_params.mask = UCC_TEAM_PARAM_FIELD_EP | UCC_TEAM_PARAM_FIELD_EP_RANGE | - UCC_TEAM_PARAM_FIELD_OOB; + team_params.mask = UCC_TEAM_PARAM_FIELD_EP | + UCC_TEAM_PARAM_FIELD_EP_RANGE | + UCC_TEAM_PARAM_FIELD_OOB; team_params.oob.allgather = oob_allgather; team_params.oob.req_test = oob_allgather_test; team_params.oob.req_free = oob_allgather_free; - team_params.oob.coll_info = (void *)(uintptr_t)comm; + team_params.oob.coll_info = (void*)(uintptr_t)comm; team_params.oob.n_oob_eps = size; team_params.oob.oob_ep = rank; team_params.ep = rank; @@ -209,8 +210,8 @@ ucc_team_h UccTestMpi::create_ucc_team(MPI_Comm comm, bool is_onesided) UCC_CHECK(ucc_team_create_post(&team_ctx, 1, &team_params, &team)); MPI_Request req; - int tmp; - int completed; + int tmp; + int completed; MPI_Irecv(&tmp, 1, MPI_INT, rank, 123, comm, &req); while (UCC_INPROGRESS == (status = ucc_team_create_test(team))) { ucc_context_progress(team_ctx); @@ -228,13 +229,12 @@ ucc_team_h UccTestMpi::create_ucc_team(MPI_Comm comm, bool is_onesided) void UccTestMpi::create_team(ucc_test_mpi_team_t t, bool is_onesided) { ucc_team_h team; - MPI_Comm comm = create_mpi_comm(t); + MPI_Comm comm = create_mpi_comm(t); if (is_onesided) { MPI_Comm comm_dup; MPI_Comm_dup(comm, &comm_dup); team = create_ucc_team(comm_dup, true); - onesided_teams.push_back( - ucc_test_team_t(t, comm_dup, team, onesided_ctx)); + onesided_teams.push_back(ucc_test_team_t(t, comm_dup, team, onesided_ctx)); } else { team = create_ucc_team(comm); teams.push_back(ucc_test_team_t(t, comm, team, ctx)); @@ -246,8 +246,7 @@ void UccTestMpi::destroy_team(ucc_test_team_t &team) ucc_status_t status; team.free_ee(); - while (UCC_INPROGRESS == (status = ucc_team_destroy(team.team))) { - } + while (UCC_INPROGRESS == (status = ucc_team_destroy(team.team))) {} if (UCC_OK != status) { std::cerr << "ucc_team_destroy failed\n"; } @@ -319,7 +318,7 @@ int ucc_coll_reduce_supported(ucc_reduction_op_t op, ucc_datatype_t dt) int ucc_coll_inplace_supported(ucc_coll_type_t c) { - switch (c) { + switch(c) { case UCC_COLL_TYPE_BARRIER: case UCC_COLL_TYPE_BCAST: case UCC_COLL_TYPE_FANIN: @@ -327,7 +326,7 @@ int ucc_coll_inplace_supported(ucc_coll_type_t c) /* remove alltoall [v] from here once it starts supporting inplace */ case UCC_COLL_TYPE_ALLTOALL: case UCC_COLL_TYPE_ALLTOALLV: - /**/ + /**/ return 0; default: return 1; @@ -345,7 +344,7 @@ bool ucc_coll_triggered_supported(ucc_memory_type_t mt) int ucc_coll_is_rooted(ucc_coll_type_t c) { - switch (c) { + switch(c) { case UCC_COLL_TYPE_ALLREDUCE: case UCC_COLL_TYPE_ALLGATHER: case UCC_COLL_TYPE_ALLGATHERV: @@ -362,7 +361,7 @@ int ucc_coll_is_rooted(ucc_coll_type_t c) bool ucc_coll_has_memtype(ucc_coll_type_t c) { - switch (c) { + switch(c) { case UCC_COLL_TYPE_BARRIER: case UCC_COLL_TYPE_FANIN: case UCC_COLL_TYPE_FANOUT: @@ -374,7 +373,7 @@ bool ucc_coll_has_memtype(ucc_coll_type_t c) bool ucc_coll_has_msgrange(ucc_coll_type_t c) { - switch (c) { + switch(c) { case UCC_COLL_TYPE_BARRIER: case UCC_COLL_TYPE_FANIN: case UCC_COLL_TYPE_FANOUT: @@ -386,7 +385,7 @@ bool ucc_coll_has_msgrange(ucc_coll_type_t c) bool ucc_coll_has_datatype(ucc_coll_type_t c) { - switch (c) { + switch(c) { case UCC_COLL_TYPE_BARRIER: case UCC_COLL_TYPE_FANIN: case UCC_COLL_TYPE_FANOUT: @@ -398,7 +397,7 @@ bool ucc_coll_has_datatype(ucc_coll_type_t c) bool ucc_coll_has_op(ucc_coll_type_t c) { - switch (c) { + switch(c) { case UCC_COLL_TYPE_ALLREDUCE: case UCC_COLL_TYPE_REDUCE: case UCC_COLL_TYPE_REDUCE_SCATTER: @@ -411,7 +410,7 @@ bool ucc_coll_has_op(ucc_coll_type_t c) bool ucc_coll_has_bits(ucc_coll_type_t c) { - switch (c) { + switch(c) { case UCC_COLL_TYPE_ALLTOALLV: return true; default: @@ -419,14 +418,12 @@ bool ucc_coll_has_bits(ucc_coll_type_t c) } } -void UccTestMpi::set_count_vsizes( - std::vector &_counts_vsize) +void UccTestMpi::set_count_vsizes(std::vector &_counts_vsize) { - counts_vsize = _counts_vsize; + counts_vsize = _counts_vsize; } -void UccTestMpi::set_displ_vsizes( - std::vector &_displs_vsize) +void UccTestMpi::set_displ_vsizes(std::vector &_displs_vsize) { displs_vsize = _displs_vsize; } @@ -453,10 +450,9 @@ void set_gpu_device(test_set_gpu_device_t set_device) #endif switch (set_device) { case TEST_SET_DEV_LRANK: - if (local_rank >= gpu_dev_count) { - std::cerr - << "*** UCC TEST FAIL: " - << "not enough GPU devices on the node to map processes.\n"; + if(local_rank >= gpu_dev_count) { + std::cerr << "*** UCC TEST FAIL: " + << "not enough GPU devices on the node to map processes.\n"; MPI_Abort(MPI_COMM_WORLD, -1); } device_id = local_rank; @@ -473,37 +469,30 @@ void set_gpu_device(test_set_gpu_device_t set_device) #elif defined(HAVE_HIP) HIP_CHECK(hipSetDevice(device_id)); #endif + } #endif -std::vector -UccTestMpi::exec_tests(std::vector> tcs, - bool triggered, bool persistent) +std::vector UccTestMpi::exec_tests( + std::vector> tcs, bool triggered, + bool persistent) { - int n_persistent = persistent ? UCC_TEST_N_PERSISTENT : 1; - int world_rank, num_done, i; + int n_persistent = persistent ? UCC_TEST_N_PERSISTENT : 1; + int world_rank, num_done, i; ucc_status_t status; MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); std::vector rst; - // if (world_rank == 0){ - // printf("Waiting, pid=%d\n", getpid()); - // int wait = 1; - // while (wait) { - // sleep(1); - // } - // } - for (i = 0; i < n_persistent; i++) { - for (auto tc : tcs) { + for (auto tc: tcs) { if (TEST_SKIP_NONE == tc->test_skip) { if (verbose && 0 == world_rank) { if (triggered) { - std::cout << "Triggered " << tc->str() << std::endl; + std::cout << "Triggered "<str() << std::endl; } else { - std::cout << tc->str() << std::endl; + std::cout << tc->str() << std::endl; } } tc->run(triggered); @@ -512,20 +501,19 @@ UccTestMpi::exec_tests(std::vector> tcs, std::cout << "SKIPPED: " << skip_str(tc->test_skip) << ": " << tc->str() << " " << std::endl; } - rst.push_back( - std::make_tuple(tc->args.coll_type, UCC_ERR_LAST)); + rst.push_back(std::make_tuple(tc->args.coll_type, UCC_ERR_LAST)); return rst; } } do { num_done = 0; - for (auto tc : tcs) { + for (auto tc: tcs) { tc->mpi_progress(); status = tc->test(); if (status < 0) { std::cerr << "error during coll test: " - << ucc_status_string(status) << " (" << status - << ")" << std::endl; + << ucc_status_string(status) + << " ("<> tcs, tc->tc_progress_ctx(); } } while (num_done != tcs.size()); - for (auto tc : tcs) { + for (auto tc: tcs) { status = tc->check(); tc->set_input(i + 1); if (UCC_OK != status) { std::cerr << "FAILURE in: " << tc->str() << std::endl; } rst.push_back(std::make_tuple(tc->args.coll_type, status)); - } + } } return rst; } -void UccTestMpi::run_all_at_team(ucc_test_team_t &team, +void UccTestMpi::run_all_at_team(ucc_test_team_t &team, std::vector &rst) { TestCaseParams params; - params.max_size = test_max_size; params.inplace = inplace; params.persistent = persistent; - for (auto i = 0; i < iterations; i++) { for (auto &c : colls) { - std::vector roots = {0}; - std::vector test_memtypes = { - UCC_MEMORY_TYPE_LAST}; - std::vector test_msgsizes = {0}; - std::vector test_dtypes = {(ucc_datatype_t)-1}; + std::vector roots = {0}; + std::vector test_memtypes = {UCC_MEMORY_TYPE_LAST}; + std::vector test_msgsizes = {0}; + std::vector test_dtypes = {(ucc_datatype_t)-1}; std::vector test_ops = {(ucc_reduction_op_t)-1}; - std::vector test_counts_vsize = { - TEST_FLAG_VSIZE_64BIT}; - std::vector test_displ_vsize = { - TEST_FLAG_VSIZE_64BIT}; + std::vector test_counts_vsize = {TEST_FLAG_VSIZE_64BIT}; + std::vector test_displ_vsize = {TEST_FLAG_VSIZE_64BIT}; void **onesided_bufs; if (inplace && !ucc_coll_inplace_supported(c)) { @@ -597,14 +580,13 @@ void UccTestMpi::run_all_at_team(ucc_test_team_t &team, if (ucc_coll_has_bits(c)) { test_counts_vsize = counts_vsize; - test_displ_vsize = displs_vsize; + test_displ_vsize = displs_vsize; } for (auto r : roots) { - for (auto mt : test_memtypes) { + for (auto mt: test_memtypes) { if (triggered && !ucc_coll_triggered_supported(mt)) { - rst.push_back( - std::make_tuple(c, UCC_ERR_NOT_IMPLEMENTED)); + rst.push_back(std::make_tuple(c, UCC_ERR_NOT_IMPLEMENTED)); continue; } @@ -621,9 +603,9 @@ void UccTestMpi::run_all_at_team(ucc_test_team_t &team, onesided_bufs = nullptr; } - for (auto m : test_msgsizes) { - for (auto dt : test_dtypes) { - for (auto op : test_ops) { + for (auto m: test_msgsizes) { + for (auto dt: test_dtypes) { + for (auto op: test_ops) { if (ucc_coll_args_is_reduction(c) && !ucc_coll_reduce_supported(op, dt)) { continue; @@ -634,8 +616,8 @@ void UccTestMpi::run_all_at_team(ucc_test_team_t &team, dt == UCC_DT_FLOAT128_COMPLEX)) { continue; } - for (auto count_bits : test_counts_vsize) { - for (auto displ_bits : test_displ_vsize) { + for (auto count_bits: test_counts_vsize) { + for (auto displ_bits: test_displ_vsize) { params.root = r; params.mt = mt; params.msgsize = m; @@ -645,12 +627,9 @@ void UccTestMpi::run_all_at_team(ucc_test_team_t &team, params.displ_bits = displ_bits; params.buffers = onesided_bufs; - auto tcs = - TestCase::init(team, c, nt, params); - auto res = exec_tests(tcs, triggered, - persistent); - rst.insert(rst.end(), res.begin(), - res.end()); + auto tcs = TestCase::init(team, c, nt, params); + auto res = exec_tests(tcs, triggered, persistent); + rst.insert(rst.end(), res.begin(), res.end()); } } } @@ -665,7 +644,7 @@ void UccTestMpi::run_all_at_team(ucc_test_team_t &team, typedef struct ucc_test_thread { pthread_t thread; int id; - UccTestMpi *test; + UccTestMpi * test; std::vector rst; } ucc_test_thread_t; @@ -685,7 +664,7 @@ void UccTestMpi::run_all(bool is_onesided) if (UCC_THREAD_MULTIPLE == tm) { int n_threads = teams.size(); std::vector threads(n_threads); - void *ret; + void * ret; for (int i = 0; i < n_threads; i++) { threads[i].id = i; threads[i].test = this; @@ -712,16 +691,16 @@ void UccTestMpi::run_all(bool is_onesided) std::vector UccTestMpi::gen_roots(ucc_test_team_t &team) { - int size; + int size; std::vector _roots; MPI_Comm_size(team.comm, &size); std::default_random_engine eng; eng.seed(123); - std::uniform_int_distribution urd(0, size - 1); + std::uniform_int_distribution urd(0, size-1); - switch (root_type) { + switch(root_type) { case ROOT_SINGLE: - _roots = std::vector({ucc_min(root_value, size - 1)}); + _roots = std::vector({ucc_min(root_value, size-1)}); break; case ROOT_RANDOM: _roots.resize(root_value);