Skip to content

Commit

Permalink
plugin update to nccl-2.20
Browse files Browse the repository at this point in the history
  • Loading branch information
bureddy committed Feb 16, 2024
1 parent 3ff78de commit baed7db
Show file tree
Hide file tree
Showing 12 changed files with 933 additions and 443 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ AC_ARG_WITH([verbs],
AC_CHECK_HEADER( [infiniband/verbs.h], [],[AC_MSG_FAILURE([ibverbs header files not found])])
AC_CHECK_LIB([ibverbs], [ibv_get_device_list], [],[AC_MSG_FAILURE([libibverbs not found]);])

AC_CHECK_DECLS([IBV_ACCESS_RELAXED_ORDERING, IBV_QPF_GRH_REQUIRED, ibv_reg_dmabuf_mr], [], [],
AC_CHECK_DECLS([IBV_ACCESS_RELAXED_ORDERING, IBV_QPF_GRH_REQUIRED, ibv_reg_dmabuf_mr, ibv_query_ece, ibv_set_ece], [], [],
[[#include <infiniband/verbs.h>]])

# check for ucx
Expand Down
2 changes: 2 additions & 0 deletions include/ibvwrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ static inline ncclResult_t wrap_ibv_poll_cq(struct ibv_cq *cq, int num_entries,
ncclResult_t wrap_ibv_create_qp(struct ibv_qp **ret, struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr);
ncclResult_t wrap_ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask);
ncclResult_t wrap_ibv_destroy_qp(struct ibv_qp *qp);
ncclResult_t wrap_ibv_query_ece(struct ibv_qp *qp, struct ibv_ece *ece, int* supported);
ncclResult_t wrap_ibv_set_ece(struct ibv_qp *qp, struct ibv_ece *ece, int* supported);
ncclResult_t wrap_ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr);
ncclResult_t wrap_ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr);
ncclResult_t wrap_ibv_event_type_str(char **ret, enum ibv_event_type event);
Expand Down
47 changes: 42 additions & 5 deletions include/nccl.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
#endif

#define NCCL_MAJOR 2
#define NCCL_MINOR 15
#define NCCL_PATCH 1
#define NCCL_MINOR 20
#define NCCL_PATCH 3
#define NCCL_SUFFIX ""

#define NCCL_VERSION_CODE 21510
#define NCCL_VERSION_CODE 22003
#define NCCL_VERSION(X,Y,Z) (((X) <= 2 && (Y) <= 8) ? (X) * 1000 + (Y) * 100 + (Z) : (X) * 10000 + (Y) * 100 + (Z))

#ifdef __cplusplus
Expand All @@ -42,15 +42,24 @@ typedef enum { ncclSuccess = 0,
ncclInProgress = 7,
ncclNumResults = 8 } ncclResult_t;

#define NCCL_CONFIG_UNDEF_INT INT_MIN
#define NCCL_CONFIG_UNDEF_PTR NULL
#define NCCL_SPLIT_NOCOLOR -1

/* Communicator configuration. Users can assign value to attributes to specify the
* behavior of a communicator. */
typedef struct ncclConfig_v21400 {
typedef struct ncclConfig_v21700 {
/* attributes that users should never touch. */
size_t size;
unsigned int magic;
unsigned int version;
/* attributes that users are able to customize. */
int blocking;
int cgaClusterSize;
int minCTAs;
int maxCTAs;
const char *netName;
int splitShare;
} ncclConfig_t;

/* Config initializer must be assigned to initialize config structure when it is created.
Expand All @@ -59,9 +68,23 @@ typedef struct ncclConfig_v21400 {
sizeof(ncclConfig_t), /* size */ \
0xcafebeef, /* magic */ \
NCCL_VERSION(NCCL_MAJOR, NCCL_MINOR, NCCL_PATCH), /* version */ \
1 /* blocking */ \
NCCL_CONFIG_UNDEF_INT, /* blocking */ \
NCCL_CONFIG_UNDEF_INT, /* cgaClusterSize */ \
NCCL_CONFIG_UNDEF_INT, /* minCTAs */ \
NCCL_CONFIG_UNDEF_INT, /* maxCTAs */ \
NCCL_CONFIG_UNDEF_PTR, /* netName */ \
NCCL_CONFIG_UNDEF_INT /* splitShare */ \
}

/* NCCL malloc and free function for all types of NCCL optimizations
* (e.g. user buffer registration). The actual allocated size might
* be larger than requested due to granularity requirement. */
ncclResult_t ncclMemAlloc(void** ptr, size_t size);
ncclResult_t pncclMemAlloc(void** ptr, size_t size);

ncclResult_t ncclMemFree(void *ptr);
ncclResult_t pncclMemFree(void *ptr);

/* Return the NCCL_VERSION_CODE of the NCCL library in the supplied integer.
* This integer is coded with the MAJOR, MINOR and PATCH level of the
* NCCL library
Expand Down Expand Up @@ -119,6 +142,10 @@ ncclResult_t pncclCommAbort(ncclComm_t comm);
const char* ncclGetErrorString(ncclResult_t result);
const char* pncclGetErrorString(ncclResult_t result);

/* Returns a human-readable message of the last error that occurred. */
const char* ncclGetLastError(ncclComm_t comm);
const char* pncclGetLastError(ncclComm_t comm);

/* Checks whether the comm has encountered any asynchronous errors */
ncclResult_t ncclCommGetAsyncError(ncclComm_t comm, ncclResult_t *asyncError);
ncclResult_t pncclCommGetAsyncError(ncclComm_t comm, ncclResult_t *asyncError);
Expand All @@ -135,6 +162,16 @@ ncclResult_t pncclCommCuDevice(const ncclComm_t comm, int* device);
ncclResult_t ncclCommUserRank(const ncclComm_t comm, int* rank);
ncclResult_t pncclCommUserRank(const ncclComm_t comm, int* rank);


/* Register CUDA buffer for zero-copy operation */
ncclResult_t ncclCommRegister(const ncclComm_t comm, void* buff, size_t size, void** handle);
ncclResult_t pncclCommRegister(const ncclComm_t comm, void* buff, size_t size, void** handle);

/* Deregister CUDA buffer */
ncclResult_t ncclCommDeregister(const ncclComm_t comm, void* handle);
ncclResult_t pncclCommDeregister(const ncclComm_t comm, void* handle);


/* Reduction operation selector */
typedef enum { ncclNumOps_dummy = 5 } ncclRedOp_dummy_t;
typedef enum { ncclSum = 0,
Expand Down
56 changes: 34 additions & 22 deletions include/p2p_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,27 @@ struct ncclIbMrCache {
int capacity, population;
};

#define NCCL_IB_MAX_DEVS_PER_NIC 2
#define MAX_MERGED_DEV_NAME (MAXNAMESIZE*NCCL_IB_MAX_DEVS_PER_NIC)+NCCL_IB_MAX_DEVS_PER_NIC
struct ncclIbMergedDev {
int ndevs;
int devs[NCCL_IB_MAX_DEVS_PER_NIC]; // Points to an index in ncclIbDevs
int speed;
char devName[MAX_MERGED_DEV_NAME]; // Up to NCCL_IB_MAX_DEVS_PER_NIC * name size, and a character for each '+'
} __attribute__((aligned(64)));

struct ncclIbRequest {
struct ncclIbVerbs* verbs;
struct ncclIbNetCommBase* base;
int type;
int events;
struct ncclSocket* sock;
struct ncclIbGidInfo* gidInfo;
int events[NCCL_IB_MAX_DEVS_PER_NIC];
struct ncclIbNetCommDevBase* devBases[NCCL_IB_MAX_DEVS_PER_NIC];
int nreqs;
union {
struct {
int size;
void* data;
uint32_t lkey;
uint32_t lkeys[NCCL_IB_MAX_DEVS_PER_NIC];
int offset;
} send;
struct {
Expand All @@ -64,56 +73,57 @@ struct ncclIbRequest {
};
};

struct ncclIbVerbs {
int dev;
struct ibv_pd* pd; // duplicate of ncclIbDevs[dev].pd
// Retain local RoCE address for error logging
struct ncclIbGidInfo {
uint8_t link_layer;
union ibv_gid localGid;
};

typedef struct ncclIbNetCommDevBase {
int ibDevN;
struct ibv_pd* pd;
struct ibv_cq* cq;
uint64_t pad[1];
struct ncclIbRequest reqs[MAX_REQUESTS];
};
struct ncclIbGidInfo gidInfo;
} ncclIbNetCommDevBase;

typedef struct ncclIbDev {
pthread_mutex_t lock;
int device;
uint64_t guid;
uint8_t port;
uint8_t portNum;
uint8_t link;
uint8_t isSharpDev;
int speed;
struct ibv_context* context;
int pdRefs;
struct ibv_pd* pd;
struct ncclIbVerbs verbs;
char devName[MAXNAMESIZE];
char *pciPath;
int realPort;
int maxQp;
struct ncclIbMrCache mrCache;
int ar; // ADAPTIVE_ROUTING
} __attribute__((aligned(64))) nccl_ib_dev_t;
struct ibv_port_attr portAttr;
} __attribute__((aligned(64))) ncclIbDev;

#define MAX_IB_PORT 15
struct userIbDev {
char devName[MAXNAMESIZE];
uint16_t port_en;
};

#define MAX_IB_DEVS 32
struct ncclIbMergedDev ncclIbMergedDevs[MAX_IB_DEVS];
extern struct ncclIbDev ncclIbDevs[MAX_IB_DEVS];
extern struct ncclIbDev userIbDevs[MAX_IB_DEVS];
/* Detect whether GDR can work on a given NIC with the current CUDA device
* Returns :
* ncclSuccess : GDR works
* ncclSystemError : no module or module loaded but not supported by GPU */
ncclResult_t nccl_p2p_gdr_support(int dev);
ncclResult_t nccl_p2p_gdr_support();

ncclResult_t nccl_p2p_dmabuf_support(int dev);

ncclResult_t nccl_p2p_ib_pci_path(nccl_ib_dev_t *devs, int num_devs, char* dev_name, char** path, int* real_port);
ncclResult_t nccl_p2p_ib_pci_path(ncclIbDev *devs, int num_devs, char* dev_name, char** path, int* real_port);

ncclResult_t nccl_p2p_ib_get_properties(nccl_ib_dev_t *devs, int dev, ncclNetProperties_t* props);
ncclResult_t nccl_p2p_ib_get_properties(ncclIbDev *devs, int dev, ncclNetProperties_t* props);

ncclResult_t nccl_p2p_ib_init(int *num_devs, nccl_ib_dev_t *ncclIbDevs, char *ncclIbIfName, union ncclSocketAddress *ncclIbIfAddr, pthread_t *ncclIbAsyncThread, ncclDebugLogger_t logFunction);
ncclResult_t nccl_p2p_ib_init(int *num_devs, ncclIbDev *ncclIbDevs, char *ncclIbIfName, union ncclSocketAddress *ncclIbIfAddr, pthread_t *ncclIbAsyncThread, ncclDebugLogger_t logFunction);

/* Convert value returtned by ibv_query_port to actual link width */
int nccl_p2p_ib_width(int width);
Expand All @@ -125,6 +135,8 @@ int64_t ncclParamSharpMaxComms();

int64_t ncclParamIbMergeVfs();

int64_t ncclParamIbMergeNics();

int ncclIbRelaxedOrderingCapable(void);

nccl_p2p_plugin_t nccl_p2p_get_plugin_type();
Expand Down
1 change: 1 addition & 0 deletions include/param.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
const char* userHomeDir();
void setEnvFile(const char* fileName);
void initEnv();
const char *ncclGetEnv(const char *name);

void ncclLoadParam(char const* env, int64_t deftVal, int64_t uninitialized, int64_t* cache);

Expand Down
Loading

0 comments on commit baed7db

Please sign in to comment.