Skip to content

Commit

Permalink
TL/UCP: add reduce scatter knomial
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergei-Lebedev committed May 3, 2024
1 parent 0bf857d commit 2f1e52f
Show file tree
Hide file tree
Showing 8 changed files with 414 additions and 92 deletions.
54 changes: 53 additions & 1 deletion src/coll_patterns/sra_knomial.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -265,6 +265,18 @@ static inline void ucc_kn_ag_pattern_next_iter(ucc_knomial_pattern_t *p)
}
}

static inline void ucc_kn_rs_pattern_init(ucc_rank_t size, ucc_rank_t rank,
ucc_kn_radix_t radix, size_t count,
ucc_knomial_pattern_t *p)
{
ucc_knomial_pattern_init_backward(size, rank, radix, p);
p->type = KN_PATTERN_REDUCE_SCATTER;
p->count = count;
p->block_size_counts = count;
p->block_size = size - p->n_extra;
p->block_offset = 0;
}

static inline void ucc_kn_rsx_pattern_init(ucc_rank_t size, ucc_rank_t rank,
ucc_kn_radix_t radix, size_t count,
ucc_knomial_pattern_t *p)
Expand All @@ -281,6 +293,14 @@ ucc_kn_rs_pattern_peer_seg(ucc_rank_t peer, ucc_knomial_pattern_t *p,
size_t *peer_seg_count, ptrdiff_t *peer_seg_offset)
{
ucc_rank_t step_radix, seg_index;
ucc_kn_seg_desc_t s;
ucc_rank_t block_offset_inv;
/* offset of the segment in counts of datatypes from the
start of the buffer */
size_t peer_seg_offset_base;
/* offset of the current block in counts of datatypes from the
start of the buffer */
size_t block_offset_counts;

*peer_seg_count = 0;
*peer_seg_offset = 0;
Expand All @@ -295,6 +315,17 @@ ucc_kn_rs_pattern_peer_seg(ucc_rank_t peer, ucc_knomial_pattern_t *p,
step_radix, seg_index);
return;
case KN_PATTERN_REDUCE_SCATTER:
ucc_kn_seg_desc_compute(p, &s, peer);
block_offset_inv = ucc_knomial_pattern_loop_rank_inv(p, p->block_offset);
peer_seg_offset_base = ucc_buffer_block_offset(p->count, p->size,
s.seg_start);
*peer_seg_count = ucc_buffer_block_offset(p->count, p->size,
s.seg_end) -
peer_seg_offset_base;
block_offset_counts = ucc_buffer_block_offset(p->count, p->size,
block_offset_inv);
*peer_seg_offset = peer_seg_offset_base - block_offset_counts;
return;
case KN_PATTERN_REDUCE_SCATTERV:
/* not implemented */
ucc_assert(0);
Expand All @@ -307,6 +338,7 @@ static inline void ucc_kn_rs_pattern_next_iter(ucc_knomial_pattern_t *p)
{
size_t bs;
ptrdiff_t offset;
ucc_kn_seg_desc_t s;

ucc_kn_rs_pattern_peer_seg(p->rank, p, &bs, &offset);
p->block_size_counts = bs;
Expand All @@ -317,6 +349,11 @@ static inline void ucc_kn_rs_pattern_next_iter(ucc_knomial_pattern_t *p)
ucc_knomial_pattern_next_iteration(p);
return;
case KN_PATTERN_REDUCE_SCATTER:
ucc_kn_seg_desc_compute(p, &s, p->rank);
p->block_size = s.seg_size;
p->block_offset += s.seg_offset;
ucc_knomial_pattern_next_iteration_backward(p);
return;
case KN_PATTERN_REDUCE_SCATTERV:
/* not implemented */
ucc_assert(0);
Expand All @@ -325,4 +362,19 @@ static inline void ucc_kn_rs_pattern_next_iter(ucc_knomial_pattern_t *p)
}
}

static inline void ucc_kn_rs_pattern_extra_seg(ucc_knomial_pattern_t *p,
size_t *seg_count,
ptrdiff_t *seg_offset)
{
switch (p->type) {
case KN_PATTERN_REDUCE_SCATTER:
*seg_offset = ucc_buffer_block_count(p->count, p->size, p->rank);
*seg_count = ucc_buffer_block_count(
p->count, p->size, ucc_knomial_pattern_get_extra(p, p->rank));
return;
default:
ucc_assert(0);
}
}

#endif
6 changes: 3 additions & 3 deletions src/components/tl/ucp/allgather/allgather.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ ucc_base_coll_alg_info_t
[UCC_TL_UCP_ALLGATHER_ALG_KNOMIAL] =
{.id = UCC_TL_UCP_ALLGATHER_ALG_KNOMIAL,
.name = "knomial",
.desc = "recursive k-ing with arbitrary radix "},
.desc = "recursive k-ing with arbitrary radix"},
[UCC_TL_UCP_ALLGATHER_ALG_RING] =
{.id = UCC_TL_UCP_ALLGATHER_ALG_RING,
.name = "ring",
Expand All @@ -23,11 +23,11 @@ ucc_base_coll_alg_info_t
{.id = UCC_TL_UCP_ALLGATHER_ALG_NEIGHBOR,
.name = "neighbor",
.desc = "O(N) Neighbor Exchange N/2 steps"},
[UCC_TL_UCP_ALLGATHER_ALG_BRUCK] =
[UCC_TL_UCP_ALLGATHER_ALG_BRUCK] =
{.id = UCC_TL_UCP_ALLGATHER_ALG_BRUCK,
.name = "bruck",
.desc = "O(log(N)) Variation of Bruck algorithm"},
[UCC_TL_UCP_ALLGATHER_ALG_SPARBIT] =
[UCC_TL_UCP_ALLGATHER_ALG_SPARBIT] =
{.id = UCC_TL_UCP_ALLGATHER_ALG_SPARBIT,
.name = "sparbit",
.desc = "O(log(N)) SPARBIT algorithm"},
Expand Down
6 changes: 5 additions & 1 deletion src/components/tl/ucp/reduce_scatter/reduce_scatter.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand All @@ -14,5 +14,9 @@ ucc_base_coll_alg_info_t
{.id = UCC_TL_UCP_REDUCE_SCATTER_ALG_RING,
.name = "ring",
.desc = "O(N) ring"},
[UCC_TL_UCP_REDUCE_SCATTER_ALG_KNOMIAL] =
{.id = UCC_TL_UCP_REDUCE_SCATTER_ALG_KNOMIAL,
.name = "knomial",
.desc = "recursive k-ing with arbitrary radix"},
[UCC_TL_UCP_REDUCE_SCATTER_ALG_LAST] = {
.id = 0, .name = NULL, .desc = NULL}};
4 changes: 3 additions & 1 deletion src/components/tl/ucp/reduce_scatter/reduce_scatter.h
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/

#ifndef REDUCE_SCATTER_H_
#define REDUCE_SCATTER_H_
#include "tl_ucp_coll.h"

enum
{
UCC_TL_UCP_REDUCE_SCATTER_ALG_RING,
UCC_TL_UCP_REDUCE_SCATTER_ALG_KNOMIAL,
UCC_TL_UCP_REDUCE_SCATTER_ALG_LAST
};

Expand Down
Loading

0 comments on commit 2f1e52f

Please sign in to comment.