Skip to content

Commit

Permalink
CL/DOCA_UROM: Move plugin to contrib
Browse files Browse the repository at this point in the history
  • Loading branch information
nsarka committed Jun 14, 2024
1 parent bd3f9de commit a15f0aa
Show file tree
Hide file tree
Showing 17 changed files with 5,652 additions and 98 deletions.
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

if !DOCS_ONLY
SUBDIRS = \
contrib \
src \
tools/info \
cmake
Expand Down
82 changes: 0 additions & 82 deletions config/m4/doca_urom_ucc.m4

This file was deleted.

6 changes: 1 addition & 5 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ AS_IF([test "x$with_docs_only" = xyes],
AM_CONDITIONAL([HAVE_RDMACM],[false])
AM_CONDITIONAL([HAVE_MLX5DV],[false])
AM_CONDITIONAL([HAVE_DOCA_UROM], [false])
AM_CONDITIONAL([HAVE_DOCA_UROM_UCC], [false])
],
[
AM_CONDITIONAL([DOCS_ONLY], [false])
Expand All @@ -175,7 +174,6 @@ AS_IF([test "x$with_docs_only" = xyes],
m4_include([config/m4/nccl.m4])
m4_include([config/m4/rocm.m4])
m4_include([config/m4/doca_urom.m4])
m4_include([config/m4/doca_urom_ucc.m4])
m4_include([config/m4/rccl.m4])
m4_include([config/m4/sharp.m4])
m4_include([config/m4/mpi.m4])
Expand Down Expand Up @@ -212,9 +210,6 @@ AS_IF([test "x$with_docs_only" = xyes],
CHECK_DOCA_UROM
AC_MSG_RESULT([DOCA_UROM support: $doca_urom_happy])
CHECK_DOCA_UROM_UCC
AC_MSG_RESULT([DOCA_UROM_UCC support: $doca_urom_ucc_happy])
CHECK_GTEST
AC_MSG_RESULT([GTEST support: $gtest_happy])
Expand All @@ -234,6 +229,7 @@ LDFLAGS="$LDFLAGS $UCS_LDFLAGS $UCS_LIBADD"
CHECK_TL_COLL_PLUGINS
AC_CONFIG_FILES([
Makefile
contrib/Makefile
src/Makefile
src/ucc/api/ucc_version.h
src/core/ucc_version.c
Expand Down
24 changes: 24 additions & 0 deletions contrib/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#

#if HAVE_DOCA_UROM

sources = \
doca_urom_ucc_plugin/common/urom_ucc.h \
doca_urom_ucc_plugin/host/worker_ucc.h \
doca_urom_ucc_plugin/host/worker_ucc.c \
doca_urom_ucc_plugin/dpu/worker_ucc_p2p.c \
doca_urom_ucc_plugin/dpu/worker_ucc.h \
doca_urom_ucc_plugin/dpu/worker_ucc.c

module_LTLIBRARIES = libucc_doca_urom_plugin.la
libucc_doca_urom_plugin_la_SOURCES = $(sources)
libucc_doca_urom_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) $(BASE_CPPFLAGS) $(DOCA_UROM_CPPFLAGS)
libucc_doca_urom_plugin_la_CFLAGS = $(BASE_CFLAGS)
libucc_doca_urom_plugin_la_LDFLAGS = -version-info $(SOVERSION) --as-needed $(DOCA_UROM_LDFLAGS)
libucc_doca_urom_plugin_la_LIBADD = $(DOCA_UROM_LIBADD)

include $(top_srcdir)/config/module.am

#endif
171 changes: 171 additions & 0 deletions contrib/doca_urom_ucc_plugin/common/urom_ucc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
/*
* Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES, ALL RIGHTS RESERVED.
*
* This software product is a proprietary product of NVIDIA CORPORATION &
* AFFILIATES (the "Company") and all right, title, and interest in and to the
* software product, including all associated intellectual property rights, are
* and shall remain exclusively with the Company.
*
* This software product is governed by the End User License Agreement
* provided with the software product.
*
*/

#ifndef UROM_UCC_H_
#define UROM_UCC_H_

#include <ucp/api/ucp.h>
#include <ucc/api/ucc.h>

#ifdef __cplusplus
extern "C" {
#endif

/* UCC serializing next raw, iter points to the offset place and returns the buffer start */
#define urom_ucc_serialize_next_raw(_iter, _type, _offset) \
({ \
_type *_result = (_type *)(*(_iter)); \
*(_iter) = UCS_PTR_BYTE_OFFSET(*(_iter), _offset); \
_result; \
})

/* UCC command types */
enum urom_worker_ucc_cmd_type {
UROM_WORKER_CMD_UCC_LIB_CREATE, /* UCC library create command */
UROM_WORKER_CMD_UCC_LIB_DESTROY, /* UCC library destroy command */
UROM_WORKER_CMD_UCC_CONTEXT_CREATE, /* UCC context create command */
UROM_WORKER_CMD_UCC_CONTEXT_DESTROY, /* UCC context destroy command */
UROM_WORKER_CMD_UCC_TEAM_CREATE, /* UCC team create command */
UROM_WORKER_CMD_UCC_COLL, /* UCC collective create command */
UROM_WORKER_CMD_UCC_CREATE_PASSIVE_DATA_CHANNEL, /* UCC passive data channel command */
};

/*
* UCC library create command structure
*
* Input parameters for creating the library handle. The semantics of the parameters are defined by ucc.h
* On successful completion of urom_worker_cmd_ucc_lib_create,
* The UROM worker will generate a notification on the notification queue. This
* notification has reference to local library handle on the worker. The
* implementation can choose to create shadow handles or safely pack the
* library handle on the BlueCC worker to the AEU.
*/
struct urom_worker_cmd_ucc_lib_create {
void *params; /* UCC library parameters */
};

/* UCC context create command structure */
struct urom_worker_cmd_ucc_context_create {
union {
int64_t start; /* The started index */
int64_t *array; /* Set stride to <= 0 if array is used */
};
int64_t stride; /* Set number of strides */
int64_t size; /* Set stride size */
void *base_va; /* Shared buffer address */
uint64_t len; /* Buffer length */
};

/* UCC passive data channel command structure */
struct urom_worker_cmd_ucc_pass_dc {
void *ucp_addr; /* UCP worker address on host */
size_t addr_len; /* UCP worker address length */
};

/* UCC context destroy command structure */
struct urom_worker_cmd_ucc_context_destroy {
void *context_h; /* UCC context pointer */
};

/* UCC team create command structure */
struct urom_worker_cmd_ucc_team_create {
int64_t start; /* Team start index */
int64_t stride; /* Number of strides */
int64_t size; /* Stride size */
void *context_h; /* UCC context */
};

/* UCC team destroy command structure */
struct urom_worker_cmd_ucc_team_destroy {
void *team; /* UCC team to destroy */
};

/* UCC collective command structure */
struct urom_worker_cmd_ucc_coll {
void *coll_args; /* Collective arguments */
void *team; /* UCC team */
int use_xgvmi; /* If operation uses XGVMI */
void *work_buffer; /* Work buffer */
size_t work_buffer_size; /* Buffer size */
size_t team_size; /* Team size */
};

/* UROM UCC worker command structure */
struct urom_worker_ucc_cmd {
uint64_t cmd_type; /* Type of command as defined by urom_worker_ucc_cmd_type */
uint64_t dpu_worker_id; /* DPU worker id as part of the team */
union {
struct urom_worker_cmd_ucc_lib_create lib_create_cmd; /* Lib create command */
struct urom_worker_cmd_ucc_context_create context_create_cmd; /* Context create command */
struct urom_worker_cmd_ucc_context_destroy context_destroy_cmd; /* Context destroy command */
struct urom_worker_cmd_ucc_team_create team_create_cmd; /* Team create command */
struct urom_worker_cmd_ucc_team_destroy team_destroy_cmd; /* Team destroy command */
struct urom_worker_cmd_ucc_coll coll_cmd; /* UCC collective command */
struct urom_worker_cmd_ucc_pass_dc pass_dc_create_cmd; /* Passive data channel command */
};
};

/* UCC notification types */
enum urom_worker_ucc_notify_type {
UROM_WORKER_NOTIFY_UCC_LIB_CREATE_COMPLETE, /* Create UCC library on DPU notification */
UROM_WORKER_NOTIFY_UCC_LIB_DESTROY_COMPLETE, /* Destroy UCC library on DPU notification */
UROM_WORKER_NOTIFY_UCC_CONTEXT_CREATE_COMPLETE, /* Create UCC context on DPU notification */
UROM_WORKER_NOTIFY_UCC_CONTEXT_DESTROY_COMPLETE, /* Destroy UCC context on DPU notification */
UROM_WORKER_NOTIFY_UCC_TEAM_CREATE_COMPLETE, /* Create UCC team on DPU notification */
UROM_WORKER_NOTIFY_UCC_COLLECTIVE_COMPLETE, /* UCC collective completion notification */
UROM_WORKER_NOTIFY_UCC_PASSIVE_DATA_CHANNEL_COMPLETE, /* UCC data channel completion notification */
};

/* UCC context create notification structure */
struct urom_worker_ucc_notify_context_create {
void *context; /* Pointer to UCC context */
};

/* UCC team create notification structure */
struct urom_worker_ucc_notify_team_create {
void *team; /* Pointer to UCC team */
};

/* UCC collective notification structure */
struct urom_worker_ucc_notify_collective {
ucc_status_t status; /* UCC collective status */
};

/* UCC passive data channel notification structure */
struct urom_worker_ucc_notify_pass_dc {
ucc_status_t status; /* UCC data channel status */
};

/* UROM UCC worker notification structure */
struct urom_worker_notify_ucc {
uint64_t notify_type; /* Notify type as defined by urom_worker_ucc_notify_type */
uint64_t dpu_worker_id; /* DPU worker id */
union {
struct urom_worker_ucc_notify_context_create context_create_nqe; /* Context create notification */
struct urom_worker_ucc_notify_team_create team_create_nqe; /* Team create notification */
struct urom_worker_ucc_notify_collective coll_nqe; /* Collective notification */
struct urom_worker_ucc_notify_pass_dc pass_dc_nqe; /* Passive data channel notification */
};
};

typedef struct ucc_worker_key_buf {
size_t src_len;
size_t dst_len;
char rkeys[1024];
} ucc_worker_key_buf;

#ifdef __cplusplus
} /* extern "C" */
#endif

#endif /* UROM_UCC_H_ */
Loading

0 comments on commit a15f0aa

Please sign in to comment.