Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TL/MLX5: fix warning and var names #962

Merged
merged 1 commit into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/components/tl/mlx5/mcast/tl_mlx5_mcast.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ typedef struct ucc_tl_mlx5_mcast_context {
ucc_tl_mlx5_mcast_context_config_t cfg;
ucc_mpool_t req_mp;
int mcast_enabled;
int mcast_ready;
int mcast_ctx_ready;
ucc_tl_mlx5_mcast_oob_ctx_t oob_ctx;
} ucc_tl_mlx5_mcast_context_t;

Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/mlx5/mcast/tl_mlx5_mcast_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ ucc_status_t ucc_tl_mlx5_mcast_team_init(ucc_base_context_t *base_context,
ucc_tl_mlx5_mcast_coll_comm_t *comm;
int i;

if (!ctx->mcast_enabled || !ctx->mcast_ready || NULL == mcast_context) {
if (!ctx->mcast_ctx_ready) {
tl_debug(base_context->lib,
"mcast context not available, base_context = %p",
base_context );
Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/mlx5/tl_mlx5.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ typedef struct ucc_tl_mlx5_team {
ucc_tl_mlx5_alltoall_t *a2a;
ucc_topo_t *topo;
ucc_ep_map_t ctx_map;
int local_mcast_ctx_ready;
int local_mcast_team_ready;
ucc_tl_mlx5_mcast_team_t *mcast;
ucc_status_t local_status_array[UCC_TL_MLX5_FEATURES_COUNT];
ucc_status_t global_status_array[UCC_TL_MLX5_FEATURES_COUNT];
Expand Down
6 changes: 3 additions & 3 deletions src/components/tl/mlx5/tl_mlx5_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ UCC_CLASS_INIT_FUNC(ucc_tl_mlx5_context_t,
goto err_rcache;
}

self->mcast.mcast_ready = 0;
self->mcast.mcast_ctx_ready = 0;
if (params->thread_mode == UCC_THREAD_SINGLE) {
status = ucc_tl_mlx5_mcast_context_init(&(self->mcast), &(self->cfg.mcast_ctx_conf));
if (UCC_OK != status) {
tl_debug(self->super.super.lib, "failed to initialize mcast context");
} else {
self->mcast.mcast_ready = 1;
self->mcast.mcast_ctx_ready = 1;
}
}
return UCC_OK;
Expand All @@ -82,7 +82,7 @@ UCC_CLASS_CLEANUP_FUNC(ucc_tl_mlx5_context_t)

ucc_mpool_cleanup(&self->req_mp, 1);

if (self->mcast.mcast_ready) {
if (self->mcast.mcast_ctx_ready) {
ucc_tl_mlx5_mcast_clean_ctx(&self->mcast.mcast_context);
}
}
Expand Down
31 changes: 14 additions & 17 deletions src/components/tl/mlx5/tl_mlx5_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,16 @@ UCC_CLASS_INIT_FUNC(ucc_tl_mlx5_team_t, ucc_base_context_t *tl_context,
}

self->mcast = NULL;
status = ucc_tl_mlx5_mcast_team_init(tl_context, &(self->mcast), &(ctx->mcast),
params, &(UCC_TL_MLX5_TEAM_LIB(self)->cfg.mcast_conf));
if (UCC_OK != status) {
tl_warn(tl_context->lib, "mcast team init failed");
self->local_mcast_ctx_ready = 0;
} else {
self->local_mcast_ctx_ready = 1;

self->local_mcast_team_ready = 0;
if (ctx->mcast.mcast_ctx_ready) {
status = ucc_tl_mlx5_mcast_team_init(tl_context, &(self->mcast), &(ctx->mcast),
params, &(UCC_TL_MLX5_TEAM_LIB(self)->cfg.mcast_conf));
if (UCC_OK != status) {
tl_warn(tl_context->lib, "mcast team init failed");
} else {
self->local_mcast_team_ready = 1;
}
}

self->mcast_state = TL_MLX5_TEAM_STATE_MCAST_CTX_CHECK;
Expand Down Expand Up @@ -186,9 +189,9 @@ ucc_status_t ucc_tl_mlx5_team_create_test(ucc_base_team_t *team)
/* mcast context is not available for some of the team members so we cannot create
* mcast team */
tl_debug(UCC_TL_TEAM_LIB(tl_team),
"failure during mcast ctx create, no mcast team support");
"some of the ranks do not have mcast context available so no mcast team is created");

if (tl_team->local_mcast_ctx_ready) {
if (tl_team->local_mcast_team_ready) {
comm = tl_team->mcast->mcast_comm;
/* release the resources */
if (ibv_dereg_mr(comm->grh_mr)) {
Expand Down Expand Up @@ -230,17 +233,11 @@ ucc_status_t ucc_tl_mlx5_team_create_test(ucc_base_team_t *team)
tl_team->mcast_state = TL_MLX5_TEAM_STATE_MCAST_NOT_AVAILABLE;
}

tl_debug(team->context->lib, "attempted to initialize tl team: %p: MCAST component is %s ALLTOALL component is %s",
tl_debug(team->context->lib, "team %p: MCAST component is %s ALLTOALL component is %s",
team, (tl_team->mcast_state == TL_MLX5_TEAM_STATE_MCAST_READY)?"ENABLED":"DISABLED",
(tl_team->a2a_state == TL_MLX5_TEAM_STATE_ALLTOALL_READY)?"ENABLED":"DISABLED");
}

if (tl_team->mcast_state == TL_MLX5_TEAM_STATE_MCAST_NOT_AVAILABLE &&
tl_team->a2a_state == TL_MLX5_TEAM_STATE_ALLTOALL_NOT_AVAILABLE) {
tl_warn(team->context->lib, "unable to initialize tl team as both ALLTOALL and MCAST are not available: %p", team);
return UCC_ERR_NO_RESOURCE;
}

return UCC_OK;
}

Expand All @@ -252,7 +249,7 @@ ucc_status_t ucc_tl_mlx5_team_create_test(ucc_base_team_t *team)
tl_team->local_status_array[UCC_TL_MLX5_A2A_STATUS_INDEX] =
tl_team->a2a_status.local;
tl_team->local_status_array[UCC_TL_MLX5_MCAST_STATUS_INDEX] =
(tl_team->local_mcast_ctx_ready) ? UCC_OK : UCC_ERR_NO_RESOURCE;
(tl_team->local_mcast_team_ready) ? UCC_OK : UCC_ERR_NO_RESOURCE;
goto initial_sync_post;
}

Expand Down
Loading