Skip to content

Commit

Permalink
TL/MLX5: fix warning and var names (#962)
Browse files Browse the repository at this point in the history
  • Loading branch information
MamziB authored Apr 22, 2024
1 parent 8a465a5 commit 132a1c2
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 23 deletions.
2 changes: 1 addition & 1 deletion src/components/tl/mlx5/mcast/tl_mlx5_mcast.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ typedef struct ucc_tl_mlx5_mcast_context {
ucc_tl_mlx5_mcast_context_config_t cfg;
ucc_mpool_t req_mp;
int mcast_enabled;
int mcast_ready;
int mcast_ctx_ready;
ucc_tl_mlx5_mcast_oob_ctx_t oob_ctx;
} ucc_tl_mlx5_mcast_context_t;

Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/mlx5/mcast/tl_mlx5_mcast_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ ucc_status_t ucc_tl_mlx5_mcast_team_init(ucc_base_context_t *base_context,
ucc_tl_mlx5_mcast_coll_comm_t *comm;
int i;

if (!ctx->mcast_enabled || !ctx->mcast_ready || NULL == mcast_context) {
if (!ctx->mcast_ctx_ready) {
tl_debug(base_context->lib,
"mcast context not available, base_context = %p",
base_context );
Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/mlx5/tl_mlx5.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ typedef struct ucc_tl_mlx5_team {
ucc_tl_mlx5_alltoall_t *a2a;
ucc_topo_t *topo;
ucc_ep_map_t ctx_map;
int local_mcast_ctx_ready;
int local_mcast_team_ready;
ucc_tl_mlx5_mcast_team_t *mcast;
ucc_status_t local_status_array[UCC_TL_MLX5_FEATURES_COUNT];
ucc_status_t global_status_array[UCC_TL_MLX5_FEATURES_COUNT];
Expand Down
6 changes: 3 additions & 3 deletions src/components/tl/mlx5/tl_mlx5_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ UCC_CLASS_INIT_FUNC(ucc_tl_mlx5_context_t,
goto err_rcache;
}

self->mcast.mcast_ready = 0;
self->mcast.mcast_ctx_ready = 0;
if (params->thread_mode == UCC_THREAD_SINGLE) {
status = ucc_tl_mlx5_mcast_context_init(&(self->mcast), &(self->cfg.mcast_ctx_conf));
if (UCC_OK != status) {
tl_debug(self->super.super.lib, "failed to initialize mcast context");
} else {
self->mcast.mcast_ready = 1;
self->mcast.mcast_ctx_ready = 1;
}
}
return UCC_OK;
Expand All @@ -82,7 +82,7 @@ UCC_CLASS_CLEANUP_FUNC(ucc_tl_mlx5_context_t)

ucc_mpool_cleanup(&self->req_mp, 1);

if (self->mcast.mcast_ready) {
if (self->mcast.mcast_ctx_ready) {
ucc_tl_mlx5_mcast_clean_ctx(&self->mcast.mcast_context);
}
}
Expand Down
31 changes: 14 additions & 17 deletions src/components/tl/mlx5/tl_mlx5_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,16 @@ UCC_CLASS_INIT_FUNC(ucc_tl_mlx5_team_t, ucc_base_context_t *tl_context,
}

self->mcast = NULL;
status = ucc_tl_mlx5_mcast_team_init(tl_context, &(self->mcast), &(ctx->mcast),
params, &(UCC_TL_MLX5_TEAM_LIB(self)->cfg.mcast_conf));
if (UCC_OK != status) {
tl_warn(tl_context->lib, "mcast team init failed");
self->local_mcast_ctx_ready = 0;
} else {
self->local_mcast_ctx_ready = 1;

self->local_mcast_team_ready = 0;
if (ctx->mcast.mcast_ctx_ready) {
status = ucc_tl_mlx5_mcast_team_init(tl_context, &(self->mcast), &(ctx->mcast),
params, &(UCC_TL_MLX5_TEAM_LIB(self)->cfg.mcast_conf));
if (UCC_OK != status) {
tl_warn(tl_context->lib, "mcast team init failed");
} else {
self->local_mcast_team_ready = 1;
}
}

self->mcast_state = TL_MLX5_TEAM_STATE_MCAST_CTX_CHECK;
Expand Down Expand Up @@ -186,9 +189,9 @@ ucc_status_t ucc_tl_mlx5_team_create_test(ucc_base_team_t *team)
/* mcast context is not available for some of the team members so we cannot create
* mcast team */
tl_debug(UCC_TL_TEAM_LIB(tl_team),
"failure during mcast ctx create, no mcast team support");
"some of the ranks do not have mcast context available so no mcast team is created");

if (tl_team->local_mcast_ctx_ready) {
if (tl_team->local_mcast_team_ready) {
comm = tl_team->mcast->mcast_comm;
/* release the resources */
if (ibv_dereg_mr(comm->grh_mr)) {
Expand Down Expand Up @@ -230,17 +233,11 @@ ucc_status_t ucc_tl_mlx5_team_create_test(ucc_base_team_t *team)
tl_team->mcast_state = TL_MLX5_TEAM_STATE_MCAST_NOT_AVAILABLE;
}

tl_debug(team->context->lib, "attempted to initialize tl team: %p: MCAST component is %s ALLTOALL component is %s",
tl_debug(team->context->lib, "team %p: MCAST component is %s ALLTOALL component is %s",
team, (tl_team->mcast_state == TL_MLX5_TEAM_STATE_MCAST_READY)?"ENABLED":"DISABLED",
(tl_team->a2a_state == TL_MLX5_TEAM_STATE_ALLTOALL_READY)?"ENABLED":"DISABLED");
}

if (tl_team->mcast_state == TL_MLX5_TEAM_STATE_MCAST_NOT_AVAILABLE &&
tl_team->a2a_state == TL_MLX5_TEAM_STATE_ALLTOALL_NOT_AVAILABLE) {
tl_warn(team->context->lib, "unable to initialize tl team as both ALLTOALL and MCAST are not available: %p", team);
return UCC_ERR_NO_RESOURCE;
}

return UCC_OK;
}

Expand All @@ -252,7 +249,7 @@ ucc_status_t ucc_tl_mlx5_team_create_test(ucc_base_team_t *team)
tl_team->local_status_array[UCC_TL_MLX5_A2A_STATUS_INDEX] =
tl_team->a2a_status.local;
tl_team->local_status_array[UCC_TL_MLX5_MCAST_STATUS_INDEX] =
(tl_team->local_mcast_ctx_ready) ? UCC_OK : UCC_ERR_NO_RESOURCE;
(tl_team->local_mcast_team_ready) ? UCC_OK : UCC_ERR_NO_RESOURCE;
goto initial_sync_post;
}

Expand Down

0 comments on commit 132a1c2

Please sign in to comment.