Skip to content

Commit

Permalink
TL/UCP: select bigger knomial radix for ppn1 (#936)
Browse files Browse the repository at this point in the history
* TL/UCP: select bigger knomial radix for ppn1

* REVIEW: fix review comments
  • Loading branch information
Sergei-Lebedev authored Mar 18, 2024
1 parent fc799fc commit 0d68445
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 15 deletions.
10 changes: 5 additions & 5 deletions src/coll_patterns/recursive_knomial.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand All @@ -8,7 +8,6 @@
#define RECURSIVE_KNOMIAL_H_

#define UCC_KN_PEER_NULL ((ucc_rank_t)-1)
#define UCC_KN_MIN_RADIX 2
typedef uint16_t ucc_kn_radix_t;

enum {
Expand Down Expand Up @@ -240,15 +239,16 @@ ucc_knomial_calc_recv_dist(ucc_rank_t team_size, ucc_rank_t rank,
/* Calculates (sub) opt radix for Allreduce SRA and Bcast SAG,
by minimizing n_extra ranks */
static inline ucc_rank_t ucc_kn_get_opt_radix(ucc_rank_t team_size,
ucc_kn_radix_t min_radix,
ucc_kn_radix_t max_radix)
{
ucc_rank_t n_extra = 0, min_val = team_size;
ucc_kn_radix_t min_i = UCC_KN_MIN_RADIX;
ucc_kn_radix_t max_r = ucc_max(max_radix, UCC_KN_MIN_RADIX);
ucc_kn_radix_t min_i = min_radix;
ucc_kn_radix_t max_r = ucc_max(max_radix, min_radix);
ucc_kn_radix_t r;
ucc_rank_t fs;

for (r = UCC_KN_MIN_RADIX; r <= max_r; r++) {
for (r = min_radix; r <= max_r; r++) {
fs = r;
while (fs < team_size) {
fs = fs * r;
Expand Down
6 changes: 3 additions & 3 deletions src/components/ec/cpu/ec_cpu.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -70,7 +70,7 @@ ucc_status_t ucc_cpu_executor_init(const ucc_ee_executor_params_t *params,
{
ucc_ee_executor_t *eee = ucc_mpool_get(&ucc_ec_cpu.executors);

ec_debug(&ucc_ec_cpu.super, "executor init, eee: %p", eee);
ec_trace(&ucc_ec_cpu.super, "executor init, eee: %p", eee);
if (ucc_unlikely(!eee)) {
ec_error(&ucc_ec_cpu.super, "failed to allocate executor");
return UCC_ERR_NO_MEMORY;
Expand Down Expand Up @@ -187,7 +187,7 @@ ucc_status_t ucc_cpu_executor_task_finalize(ucc_ee_executor_task_t *task)

ucc_status_t ucc_cpu_executor_finalize(ucc_ee_executor_t *executor)
{
ec_debug(&ucc_ec_cpu.super, "executor finalize, eee: %p", executor);
ec_trace(&ucc_ec_cpu.super, "executor finalize, eee: %p", executor);
ucc_mpool_put(executor);

return UCC_OK;
Expand Down
16 changes: 9 additions & 7 deletions src/components/tl/ucp/tl_ucp_team.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -46,7 +46,8 @@ UCC_CLASS_INIT_FUNC(ucc_tl_ucp_team_t, ucc_base_context_t *tl_context,
{
ucc_tl_ucp_context_t *ctx =
ucc_derived_of(tl_context, ucc_tl_ucp_context_t);
ucc_kn_radix_t max_radix;
ucc_kn_radix_t max_radix, min_radix;
ucc_rank_t tsize;
ucc_status_t status;

UCC_CLASS_CALL_SUPER_INIT(ucc_tl_team_t, &ctx->super, params);
Expand Down Expand Up @@ -91,11 +92,12 @@ UCC_CLASS_INIT_FUNC(ucc_tl_ucp_team_t, ucc_base_context_t *tl_context,
}

if (self->topo && !IS_SERVICE_TEAM(self) && self->topo->topo->sock_bound) {
max_radix = ucc_min(UCC_TL_TEAM_SIZE(self),
ucc_topo_min_socket_size(self->topo));

self->opt_radix = ucc_kn_get_opt_radix(UCC_TL_TEAM_SIZE(self),
max_radix);
tsize = UCC_TL_TEAM_SIZE(self);
max_radix = (ucc_topo_max_ppn(self->topo) == 1) ? tsize :
ucc_min(tsize, ucc_topo_min_socket_size(self->topo));
min_radix = ucc_min(tsize, ucc_topo_max_ppn(self->topo) == 1 ? 3: 2);
self->opt_radix = ucc_kn_get_opt_radix(tsize, min_radix, max_radix);
tl_debug(tl_context->lib, "opt knomial radix: %d", self->opt_radix);
}

tl_debug(tl_context->lib, "posted tl team: %p", self);
Expand Down

0 comments on commit 0d68445

Please sign in to comment.