Skip to content

Commit

Permalink
rdma: rename bounce to rx_buff
Browse files Browse the repository at this point in the history
Bounce buffers are used to receive both eager messages and ctrl messages
-- in the latter case, the name "bounce buffer" doesn't make sense.
Thus, rename to the more generic "rx_buff".

Signed-off-by: Eric Raut <eraut@amazon.com>
  • Loading branch information
rauteric authored and aws-nslick committed Jan 10, 2025
1 parent 92b08c2 commit 86d6896
Show file tree
Hide file tree
Showing 4 changed files with 286 additions and 283 deletions.
4 changes: 2 additions & 2 deletions include/nccl_ofi_msgbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ typedef enum {
} nccl_ofi_msgbuff_result_t;

/* Type of element stored in msg buffer. This is used to distinguish between
reqs and bounce buffers (when we don't have req) stored in the message buffer */
reqs and rx buffers (when we don't have req) stored in the message buffer */
typedef enum {
/* Request */
NCCL_OFI_MSGBUFF_REQ,
/* Bounce buffer */
/* Rx buffer */
NCCL_OFI_MSGBUFF_BUFF
} nccl_ofi_msgbuff_elemtype_t;

Expand Down
10 changes: 6 additions & 4 deletions include/nccl_ofi_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,15 +288,17 @@ OFI_NCCL_PARAM_INT(disable_dmabuf, "DISABLE_DMABUF", 1);
OFI_NCCL_PARAM_UINT(min_stripe_size, "MIN_STRIPE_SIZE", (128 * 1024));

/*
* Minimum bounce buffers posted per endpoint. The plugin will attempt to post
* more bounce buffers if we dip below this threshold, allocating new bounce
* Minimum rx buffers (ctrl/eager) posted per endpoint. The plugin will attempt
* to post more rx buffers if we dip below this threshold, allocating new rx
* buffers if needed.
*
* Note: the parameter is called "bounce buffer" for backward compatibility.
*/
OFI_NCCL_PARAM_INT(rdma_min_posted_bounce_buffers, "RDMA_MIN_POSTED_BOUNCE_BUFFERS", 64);

/*
* Maximum bounce buffers posted per endpoint. The plugin will not attempt to
* post more bounce buffers if we reach this threshold, returning available
* Maximum rx buffers posted per endpoint. The plugin will not attempt to
* post more rx buffers if we reach this threshold, returning available
* buffers to the free list if needed
*/
OFI_NCCL_PARAM_INT(rdma_max_posted_bounce_buffers, "RDMA_MAX_POSTED_BOUNCE_BUFFERS", 128);
Expand Down
57 changes: 29 additions & 28 deletions include/nccl_ofi_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ typedef enum nccl_net_ofi_rdma_req_type {
NCCL_OFI_RDMA_RECV_SEGMS,
/* Eager local copy request. Subrequest of NCCL_OFI_RDMA_RECV */
NCCL_OFI_RDMA_EAGER_COPY,
/* Bounce request */
NCCL_OFI_RDMA_BOUNCE,
/* Rx buff post request */
NCCL_OFI_RDMA_RX_BUFF,
/* Flush request */
NCCL_OFI_RDMA_FLUSH,
/* Connect message send request */
Expand Down Expand Up @@ -184,8 +184,9 @@ typedef struct nccl_net_ofi_rdma_close_msg {
uint32_t send_comm_id;
} nccl_net_ofi_rdma_close_msg_t;

/* For LL/LL128 protocols, bounce buffers (source of RDMA read operations) need to be 128B aligned */
#define BOUNCE_BUFFER_ALIGNMENT 128
/* For LL/LL128 protocols, eager rx buffers (source of RDMA read operations)
need to be 128B aligned */
#define EAGER_RX_BUFFER_ALIGNMENT 128

struct nccl_net_ofi_rdma_req;
struct nccl_net_ofi_rdma_ep;
Expand All @@ -195,24 +196,24 @@ typedef struct nccl_net_ofi_rdma_ep nccl_net_ofi_rdma_ep_t;
typedef struct nccl_net_ofi_ep_rail nccl_net_ofi_ep_rail_t;

typedef struct {
/* Bounce buffer freelist item */
nccl_ofi_freelist_elem_t *bounce_fl_elem;
/* Length of bounce buffer */
/* Rx buffer freelist item */
nccl_ofi_freelist_elem_t *rx_buff_fl_elem;
/* Length of rx buffer */
size_t buff_len;
/* Length of received data */
size_t recv_len;

/*
* Keeps tracks of Rail ID which is used to post the bounce buffer.
* This is useful for re-posting the bounce buffer on the same rail
* Keeps tracks of Rail ID which is used to post the rx buffer.
* This is useful for re-posting the buffer on the same rail
* when it gets completed.
*/
nccl_net_ofi_ep_rail_t *rail;
/*
* Back-pointer to associated endpoint
*/
nccl_net_ofi_rdma_ep_t *ep;
} rdma_req_bounce_data_t;
} rdma_req_rx_buff_data_t;

typedef struct {
/* Remote destination buffer address */
Expand Down Expand Up @@ -297,8 +298,8 @@ typedef struct {
} rdma_req_send_close_data_t;

typedef struct {
/* Pointer to bounce buffer containing eager data */
nccl_net_ofi_rdma_req_t *eager_bounce_req;
/* Pointer to rx buffer containing eager data */
nccl_net_ofi_rdma_req_t *eager_rx_buff_req;
/* Pointer to recv parent request */
nccl_net_ofi_rdma_req_t *recv_req;
} rdma_req_eager_copy_data_t;
Expand Down Expand Up @@ -385,7 +386,7 @@ typedef struct nccl_net_ofi_rdma_req {
rdma_req_eager_copy_data_t eager_copy_data;
rdma_req_recv_segms_data_t recv_segms_data;
rdma_req_flush_data_t flush_data;
rdma_req_bounce_data_t bounce_data;
rdma_req_rx_buff_data_t rx_buff_data;
};

/* Size of completed request */
Expand Down Expand Up @@ -678,17 +679,17 @@ struct nccl_net_ofi_ep_rail {
struct fid_domain *domain;

/*
* Bounce buffer management
* Rx buffer management
*/

/* Number of bounce buffers posted */
size_t num_bounce_posted;
/* Minimum posted bounce buffers (see RDMA_MIN_POSTED_BOUNCE_BUFFERS) */
size_t min_bounce_posted;
/* Maximum posted bounce buffers (see RDMA_MAX_POSTED_BOUNCE_BUFFERS) */
size_t max_bounce_posted;
/* Mutex for bounce buffer operations */
pthread_mutex_t bounce_mutex;
/* Number of rx buffers posted */
size_t num_rx_buff_posted;
/* Minimum posted rx buffers (see RDMA_MIN_POSTED_BOUNCE_BUFFERS) */
size_t min_rx_buff_posted;
/* Maximum posted rx buffers (see RDMA_MAX_POSTED_BOUNCE_BUFFERS) */
size_t max_rx_buff_posted;
/* Mutex for rx buffer operations */
pthread_mutex_t rx_buff_mutex;
};

/*
Expand Down Expand Up @@ -727,12 +728,12 @@ struct nccl_net_ofi_rdma_ep {
/* Pending requests queue */
nccl_ofi_deque_t *pending_reqs_queue;

/* Free list of bounce buffers */
nccl_ofi_freelist_t *bounce_buff_fl;
/* Free list of bounce buffer requests */
nccl_ofi_freelist_t *bounce_buff_reqs_fl;
/* Size of bounce buffers */
size_t bounce_buff_size;
/* Free list of rx buffers */
nccl_ofi_freelist_t *rx_buff_fl;
/* Free list of rx buffer requests */
nccl_ofi_freelist_t *rx_buff_reqs_fl;
/* Size of rx buffers */
size_t rx_buff_size;

/* true if the current endpoint is a endpoint_per_communicator
receive communicator */
Expand Down
Loading

0 comments on commit 86d6896

Please sign in to comment.