From a8ae7184c471365db7281fa06be4556dc2e5acf2 Mon Sep 17 00:00:00 2001 From: Sergey Lebedev Date: Mon, 4 Mar 2024 13:52:23 +0000 Subject: [PATCH] TL/UCP: select bigger knomial radix for ppn1 --- src/coll_patterns/recursive_knomial.h | 8 ++++---- src/components/ec/cpu/ec_cpu.c | 4 ++-- src/components/tl/ucp/tl_ucp_team.c | 14 +++++++++----- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/coll_patterns/recursive_knomial.h b/src/coll_patterns/recursive_knomial.h index ebf9a0981b..298a7a78ad 100644 --- a/src/coll_patterns/recursive_knomial.h +++ b/src/coll_patterns/recursive_knomial.h @@ -8,7 +8,6 @@ #define RECURSIVE_KNOMIAL_H_ #define UCC_KN_PEER_NULL ((ucc_rank_t)-1) -#define UCC_KN_MIN_RADIX 2 typedef uint16_t ucc_kn_radix_t; enum { @@ -240,15 +239,16 @@ ucc_knomial_calc_recv_dist(ucc_rank_t team_size, ucc_rank_t rank, /* Calculates (sub) opt radix for Allreduce SRA and Bcast SAG, by minimizing n_extra ranks */ static inline ucc_rank_t ucc_kn_get_opt_radix(ucc_rank_t team_size, + ucc_kn_radix_t min_radix, ucc_kn_radix_t max_radix) { ucc_rank_t n_extra = 0, min_val = team_size; - ucc_kn_radix_t min_i = UCC_KN_MIN_RADIX; - ucc_kn_radix_t max_r = ucc_max(max_radix, UCC_KN_MIN_RADIX); + ucc_kn_radix_t min_i = min_radix; + ucc_kn_radix_t max_r = ucc_max(max_radix, min_radix); ucc_kn_radix_t r; ucc_rank_t fs; - for (r = UCC_KN_MIN_RADIX; r <= max_r; r++) { + for (r = min_radix; r <= max_r; r++) { fs = r; while (fs < team_size) { fs = fs * r; diff --git a/src/components/ec/cpu/ec_cpu.c b/src/components/ec/cpu/ec_cpu.c index b94052002e..df0dc3779f 100644 --- a/src/components/ec/cpu/ec_cpu.c +++ b/src/components/ec/cpu/ec_cpu.c @@ -70,7 +70,7 @@ ucc_status_t ucc_cpu_executor_init(const ucc_ee_executor_params_t *params, { ucc_ee_executor_t *eee = ucc_mpool_get(&ucc_ec_cpu.executors); - ec_debug(&ucc_ec_cpu.super, "executor init, eee: %p", eee); + ec_trace(&ucc_ec_cpu.super, "executor init, eee: %p", eee); if (ucc_unlikely(!eee)) { ec_error(&ucc_ec_cpu.super, "failed to allocate executor"); return UCC_ERR_NO_MEMORY; @@ -187,7 +187,7 @@ ucc_status_t ucc_cpu_executor_task_finalize(ucc_ee_executor_task_t *task) ucc_status_t ucc_cpu_executor_finalize(ucc_ee_executor_t *executor) { - ec_debug(&ucc_ec_cpu.super, "executor finalize, eee: %p", executor); + ec_trace(&ucc_ec_cpu.super, "executor finalize, eee: %p", executor); ucc_mpool_put(executor); return UCC_OK; diff --git a/src/components/tl/ucp/tl_ucp_team.c b/src/components/tl/ucp/tl_ucp_team.c index d7a8a8041c..8600eda5d9 100644 --- a/src/components/tl/ucp/tl_ucp_team.c +++ b/src/components/tl/ucp/tl_ucp_team.c @@ -46,7 +46,7 @@ UCC_CLASS_INIT_FUNC(ucc_tl_ucp_team_t, ucc_base_context_t *tl_context, { ucc_tl_ucp_context_t *ctx = ucc_derived_of(tl_context, ucc_tl_ucp_context_t); - ucc_kn_radix_t max_radix; + ucc_kn_radix_t max_radix, min_radix; ucc_status_t status; UCC_CLASS_CALL_SUPER_INIT(ucc_tl_team_t, &ctx->super, params); @@ -91,11 +91,15 @@ UCC_CLASS_INIT_FUNC(ucc_tl_ucp_team_t, ucc_base_context_t *tl_context, } if (self->topo && !IS_SERVICE_TEAM(self) && self->topo->topo->sock_bound) { - max_radix = ucc_min(UCC_TL_TEAM_SIZE(self), - ucc_topo_min_socket_size(self->topo)); - + max_radix = ucc_min(UCC_TL_TEAM_SIZE(self), + ucc_topo_max_ppn(self->topo) == 1 ? + UCC_TL_TEAM_SIZE(self): + ucc_topo_min_socket_size(self->topo)); + min_radix = ucc_min(UCC_TL_TEAM_SIZE(self), + ucc_topo_max_ppn(self->topo) == 1 ? 3: 2); self->opt_radix = ucc_kn_get_opt_radix(UCC_TL_TEAM_SIZE(self), - max_radix); + min_radix, max_radix); + tl_debug(tl_context->lib, "opt knomial radix: %d", self->opt_radix); } tl_debug(tl_context->lib, "posted tl team: %p", self);