Skip to content

Commit

Permalink
CL/HIER: change score only for supported colls (#923)
Browse files Browse the repository at this point in the history
  • Loading branch information
Sergei-Lebedev authored Feb 22, 2024
1 parent e13d962 commit 53350c7
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 19 deletions.
12 changes: 11 additions & 1 deletion src/coll_score/ucc_coll_score.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand All @@ -9,6 +9,16 @@
#include "utils/ucc_log.h"
#include "utils/ucc_coll_utils.h"

char *ucc_score_to_str(ucc_score_t score, char *buf, size_t max) {
if (score == UCC_SCORE_MAX) {
ucc_strncpy_safe(buf, "inf", max);
} else {
ucc_snprintf_safe(buf, max, "%d", score);
}

return buf;
}

ucc_status_t ucc_coll_score_alloc(ucc_coll_score_t **score)
{
ucc_coll_score_t *s = ucc_malloc(sizeof(*s), "ucc_coll_score");
Expand Down
14 changes: 8 additions & 6 deletions src/coll_score/ucc_coll_score.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -63,6 +63,8 @@ typedef struct ucc_coll_score {

typedef struct ucc_score_map ucc_score_map_t;

char *ucc_score_to_str(ucc_score_t score, char *buf, size_t max);

/* Allocates empty score data structure */
ucc_status_t ucc_coll_score_alloc(ucc_coll_score_t **score);

Expand All @@ -77,7 +79,7 @@ ucc_status_t ucc_coll_score_add_range(ucc_coll_score_t *score,

/* Releases the score data structure and all the score ranges stored
there */
void ucc_coll_score_free(ucc_coll_score_t *score);
void ucc_coll_score_free(ucc_coll_score_t *score);

/* Merges 2 scores score1 and score2 into the new score "rst" selecting
larger score. Ie.: rst will contain a range from score1 if either
Expand All @@ -87,9 +89,9 @@ void ucc_coll_score_free(ucc_coll_score_t *score);
This fn is used by CL to merge scores from multiple TLs and produce
a score map. As a result the produced score map will select TL with
higher score.*/
ucc_status_t ucc_coll_score_merge(ucc_coll_score_t * score1,
ucc_coll_score_t * score2,
ucc_coll_score_t **rst, int free_inputs);
ucc_status_t ucc_coll_score_merge(ucc_coll_score_t * score1,
ucc_coll_score_t * score2,
ucc_coll_score_t **rst, int free_inputs);


/* Parses SCORE string (see ucc_base_iface.c for pattern description)
Expand Down Expand Up @@ -147,7 +149,7 @@ ucc_status_t ucc_coll_score_build_default(ucc_base_team_t *team,
ucc_status_t ucc_coll_score_build_map(ucc_coll_score_t *score,
ucc_score_map_t **map);

void ucc_coll_score_free_map(ucc_score_map_t *map);
void ucc_coll_score_free_map(ucc_score_map_t *map);

/* Initializes task based on args selection and score map.
Checks fallbacks if necessary. */
Expand Down
19 changes: 11 additions & 8 deletions src/coll_score/ucc_coll_score_map.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -160,11 +160,12 @@ ucc_status_t ucc_coll_init(ucc_score_map_t *map,

void ucc_coll_score_map_print_info(const ucc_score_map_t *map)
{
size_t left;
ucc_msg_range_t *range;
int i, j, all_empty;
char range_str[128];
char coll_str[1024];
size_t left;
ucc_msg_range_t *range;
int i, j, all_empty;
char score_str[32];
char range_str[128];
char coll_str[1024];

for (i = 0; i < UCC_COLL_TYPE_NUM; i++) {
all_empty = 1;
Expand All @@ -191,10 +192,12 @@ void ucc_coll_score_map_print_info(const ucc_score_map_t *map)
super.list_elem) {
ucc_memunits_range_str(range->start, range->end, range_str,
sizeof(range_str));
STR_APPEND(coll_str, left, 256, "{%s}:%s:%u ",
ucc_score_to_str(range->super.score, score_str,
sizeof(score_str));
STR_APPEND(coll_str, left, 256, "{%s}:%s:%s ",
range_str,
range->super.team->context->lib->log_component.name,
range->super.score);
score_str);
}
STR_APPEND(coll_str, left, 4, "\n");
}
Expand Down
10 changes: 7 additions & 3 deletions src/components/cl/hier/cl_hier.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) Meta Platforms, Inc. and affiliates. 2022.
*
* See file LICENSE for terms.
Expand Down Expand Up @@ -109,8 +109,12 @@ typedef struct ucc_cl_hier_team {
UCC_CLASS_DECLARE(ucc_cl_hier_team_t, ucc_base_context_t *,
const ucc_base_team_params_t *);

#define UCC_CL_HIER_SUPPORTED_COLLS \
(UCC_COLL_TYPE_ALLTOALL | UCC_COLL_TYPE_ALLTOALLV)
#define UCC_CL_HIER_SUPPORTED_COLLS \
(UCC_COLL_TYPE_ALLTOALL | \
UCC_COLL_TYPE_ALLTOALLV | \
UCC_COLL_TYPE_ALLREDUCE | \
UCC_COLL_TYPE_BARRIER | \
UCC_COLL_TYPE_BCAST)

ucc_status_t ucc_cl_hier_coll_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
Expand Down
2 changes: 1 addition & 1 deletion src/components/cl/hier/cl_hier_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ ucc_status_t ucc_cl_hier_team_get_scores(ucc_base_team_t *cl_team,
team_info.init = ucc_cl_hier_coll_init;
team_info.num_mem_types = 0;
team_info.supported_mem_types = NULL; /* all memory types supported*/
team_info.supported_colls = UCC_COLL_TYPE_ALL;
team_info.supported_colls = UCC_CL_HIER_SUPPORTED_COLLS;
team_info.size = UCC_CL_TEAM_SIZE(team);

status = ucc_coll_score_alloc(&score);
Expand Down

0 comments on commit 53350c7

Please sign in to comment.