Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CL/HIER: change score only for supported colls #923

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/coll_score/ucc_coll_score.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand All @@ -9,6 +9,16 @@
#include "utils/ucc_log.h"
#include "utils/ucc_coll_utils.h"

char *ucc_score_to_str(ucc_score_t score, char *buf, size_t max) {
if (score == UCC_SCORE_MAX) {
ucc_strncpy_safe(buf, "inf", max);
} else {
ucc_snprintf_safe(buf, max, "%d", score);
}

return buf;
}

ucc_status_t ucc_coll_score_alloc(ucc_coll_score_t **score)
{
ucc_coll_score_t *s = ucc_malloc(sizeof(*s), "ucc_coll_score");
Expand Down
14 changes: 8 additions & 6 deletions src/coll_score/ucc_coll_score.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -63,6 +63,8 @@ typedef struct ucc_coll_score {

typedef struct ucc_score_map ucc_score_map_t;

char *ucc_score_to_str(ucc_score_t score, char *buf, size_t max);

/* Allocates empty score data structure */
ucc_status_t ucc_coll_score_alloc(ucc_coll_score_t **score);

Expand All @@ -77,7 +79,7 @@ ucc_status_t ucc_coll_score_add_range(ucc_coll_score_t *score,

/* Releases the score data structure and all the score ranges stored
there */
void ucc_coll_score_free(ucc_coll_score_t *score);
void ucc_coll_score_free(ucc_coll_score_t *score);

/* Merges 2 scores score1 and score2 into the new score "rst" selecting
larger score. Ie.: rst will contain a range from score1 if either
Expand All @@ -87,9 +89,9 @@ void ucc_coll_score_free(ucc_coll_score_t *score);
This fn is used by CL to merge scores from multiple TLs and produce
a score map. As a result the produced score map will select TL with
higher score.*/
ucc_status_t ucc_coll_score_merge(ucc_coll_score_t * score1,
ucc_coll_score_t * score2,
ucc_coll_score_t **rst, int free_inputs);
ucc_status_t ucc_coll_score_merge(ucc_coll_score_t * score1,
ucc_coll_score_t * score2,
ucc_coll_score_t **rst, int free_inputs);


/* Parses SCORE string (see ucc_base_iface.c for pattern description)
Expand Down Expand Up @@ -147,7 +149,7 @@ ucc_status_t ucc_coll_score_build_default(ucc_base_team_t *team,
ucc_status_t ucc_coll_score_build_map(ucc_coll_score_t *score,
ucc_score_map_t **map);

void ucc_coll_score_free_map(ucc_score_map_t *map);
void ucc_coll_score_free_map(ucc_score_map_t *map);

/* Initializes task based on args selection and score map.
Checks fallbacks if necessary. */
Expand Down
19 changes: 11 additions & 8 deletions src/coll_score/ucc_coll_score_map.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -160,11 +160,12 @@ ucc_status_t ucc_coll_init(ucc_score_map_t *map,

void ucc_coll_score_map_print_info(const ucc_score_map_t *map)
{
size_t left;
ucc_msg_range_t *range;
int i, j, all_empty;
char range_str[128];
char coll_str[1024];
size_t left;
ucc_msg_range_t *range;
int i, j, all_empty;
char score_str[32];
char range_str[128];
char coll_str[1024];

for (i = 0; i < UCC_COLL_TYPE_NUM; i++) {
all_empty = 1;
Expand All @@ -191,10 +192,12 @@ void ucc_coll_score_map_print_info(const ucc_score_map_t *map)
super.list_elem) {
ucc_memunits_range_str(range->start, range->end, range_str,
sizeof(range_str));
STR_APPEND(coll_str, left, 256, "{%s}:%s:%u ",
ucc_score_to_str(range->super.score, score_str,
sizeof(score_str));
STR_APPEND(coll_str, left, 256, "{%s}:%s:%s ",
range_str,
range->super.team->context->lib->log_component.name,
range->super.score);
score_str);
}
STR_APPEND(coll_str, left, 4, "\n");
}
Expand Down
10 changes: 7 additions & 3 deletions src/components/cl/hier/cl_hier.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) Meta Platforms, Inc. and affiliates. 2022.
*
* See file LICENSE for terms.
Expand Down Expand Up @@ -109,8 +109,12 @@ typedef struct ucc_cl_hier_team {
UCC_CLASS_DECLARE(ucc_cl_hier_team_t, ucc_base_context_t *,
const ucc_base_team_params_t *);

#define UCC_CL_HIER_SUPPORTED_COLLS \
(UCC_COLL_TYPE_ALLTOALL | UCC_COLL_TYPE_ALLTOALLV)
#define UCC_CL_HIER_SUPPORTED_COLLS \
(UCC_COLL_TYPE_ALLTOALL | \
UCC_COLL_TYPE_ALLTOALLV | \
UCC_COLL_TYPE_ALLREDUCE | \
UCC_COLL_TYPE_BARRIER | \
UCC_COLL_TYPE_BCAST)

ucc_status_t ucc_cl_hier_coll_init(ucc_base_coll_args_t *coll_args,
ucc_base_team_t *team,
Expand Down
2 changes: 1 addition & 1 deletion src/components/cl/hier/cl_hier_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ ucc_status_t ucc_cl_hier_team_get_scores(ucc_base_team_t *cl_team,
team_info.init = ucc_cl_hier_coll_init;
team_info.num_mem_types = 0;
team_info.supported_mem_types = NULL; /* all memory types supported*/
team_info.supported_colls = UCC_COLL_TYPE_ALL;
team_info.supported_colls = UCC_CL_HIER_SUPPORTED_COLLS;
team_info.size = UCC_CL_TEAM_SIZE(team);

status = ucc_coll_score_alloc(&score);
Expand Down
Loading