diff --git a/src/components/tl/ucp/alltoall/alltoall_onesided.c b/src/components/tl/ucp/alltoall/alltoall_onesided.c index 5f1c2e8f60..0c4c8b7ae9 100644 --- a/src/components/tl/ucp/alltoall/alltoall_onesided.c +++ b/src/components/tl/ucp/alltoall/alltoall_onesided.c @@ -34,13 +34,12 @@ ucc_status_t ucc_tl_ucp_alltoall_onesided_start(ucc_coll_task_t *ctask) /* TODO: change when support for library-based work buffers is complete */ nelems = (nelems / gsize) * ucc_dt_size(TASK_ARGS(task).src.info.datatype); dest = dest + grank * nelems; - mtype = (TASK_ARGS(task).src.info.mem_type == UCC_MEMORY_TYPE_DPU) ? UCC_MEMORY_TYPE_HOST : TASK_ARGS(task).src.info.mem_type; + mtype = (TASK_ARGS(task).src.info.mem_type == UCC_MEMORY_TYPE_EXPORTED) ? UCC_MEMORY_TYPE_HOST : TASK_ARGS(task).src.info.mem_type; UCPCHECK_GOTO(ucc_tl_ucp_put_nb((void *)(src + start * nelems), (void *)dest, nelems, start, mtype, team, task), task, out); UCPCHECK_GOTO(ucc_tl_ucp_atomic_inc(pSync, start, team), task, out); -#if 1 for (peer = (start + 1) % gsize; peer != start; peer = (peer + 1) % gsize) { UCPCHECK_GOTO(ucc_tl_ucp_put_nb((void *)(src + peer * nelems), (void *)dest, nelems, peer, mtype, team, task), @@ -48,7 +47,6 @@ ucc_status_t ucc_tl_ucp_alltoall_onesided_start(ucc_coll_task_t *ctask) UCPCHECK_GOTO(ucc_tl_ucp_atomic_inc(pSync, peer, team), task, out); } -#endif return ucc_progress_queue_enqueue(UCC_TL_CORE_CTX(team)->pq, &task->super); out: return task->super.status; diff --git a/src/components/tl/ucp/alltoallv/alltoallv_onesided.c b/src/components/tl/ucp/alltoallv/alltoallv_onesided.c index 9bb5b2d309..9089e41657 100644 --- a/src/components/tl/ucp/alltoallv/alltoallv_onesided.c +++ b/src/components/tl/ucp/alltoallv/alltoallv_onesided.c @@ -30,7 +30,7 @@ ucc_status_t ucc_tl_ucp_alltoallv_onesided_start(ucc_coll_task_t *ctask) ucc_tl_ucp_task_reset(task, UCC_INPROGRESS); - mtype = (TASK_ARGS(task).src.info_v.mem_type == UCC_MEMORY_TYPE_DPU) ? UCC_MEMORY_TYPE_HOST : TASK_ARGS(task).src.info_v.mem_type; + mtype = (TASK_ARGS(task).src.info_v.mem_type == UCC_MEMORY_TYPE_EXPORTED) ? UCC_MEMORY_TYPE_HOST : TASK_ARGS(task).src.info_v.mem_type; /* perform a put to each member peer using the peer's index in the * destination displacement. */ for (peer = (grank + 1) % gsize; task->onesided.put_posted < gsize; diff --git a/src/components/tl/ucp/tl_ucp_coll.c b/src/components/tl/ucp/tl_ucp_coll.c index 0da3b8a33f..d830209d71 100644 --- a/src/components/tl/ucp/tl_ucp_coll.c +++ b/src/components/tl/ucp/tl_ucp_coll.c @@ -198,7 +198,7 @@ ucc_status_t ucc_tl_ucp_memmap_append_segment(ucc_tl_ucp_task_t *task, ucp_mem_h mh; // map the memory - if (map->mem_type == UCC_MEMORY_TYPE_DPU) { + if (map->mem_type == UCC_MEMORY_TYPE_EXPORTED) { if (map->resource != NULL) { mmap_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_EXPORTED_MEMH_BUFFER; mmap_params.exported_memh_buffer = map->resource; diff --git a/src/ucc/api/ucc.h b/src/ucc/api/ucc.h index 4859df1742..0ac297cbf1 100644 --- a/src/ucc/api/ucc.h +++ b/src/ucc/api/ucc.h @@ -166,9 +166,11 @@ typedef enum ucc_memory_type { UCC_MEMORY_TYPE_CUDA_MANAGED, /*!< NVIDIA CUDA managed memory */ UCC_MEMORY_TYPE_ROCM, /*!< AMD ROCM memory */ UCC_MEMORY_TYPE_ROCM_MANAGED, /*!< AMD ROCM managed system memory */ - UCC_MEMORY_TYPE_DPU, /*!< DPU / SmartNIC memory */ UCC_MEMORY_TYPE_LAST, - UCC_MEMORY_TYPE_UNKNOWN = UCC_MEMORY_TYPE_LAST + UCC_MEMORY_TYPE_UNKNOWN = UCC_MEMORY_TYPE_LAST, + UCC_MEMORY_TYPE_EXPORTED /*!< Exported memory for use by + DPU / SmartNIC. Memory is not valid for + any other use. */ } ucc_memory_type_t; /**