diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f717c84509..b569198343f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ ## v20.04: (Upcoming Release) +### rpc + +Add optional 'no_wr_batching' parameter to 'nvmf_create_transport' RPC method. + +### nvmf + +Add 'no_wr_batching' parameter in 'spdk_nvmf_transport_opts' for the ability disable WR batching RDMA. + +Add NoWRBatching option in [Transport] section, this can be used for disable RDMA WR batching. + ### vmd A new function, `spdk_vmd_fini`, has been added. It releases all resources acquired by the VMD library through the `spdk_vmd_init` call. diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index 7aff53c501a..14b00883b71 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -3844,6 +3844,7 @@ no_srq | Optional | boolean | Disable shared receive queue c2h_success | Optional | boolean | Disable C2H success optimization (TCP only) dif_insert_or_strip | Optional | boolean | Enable DIF insert for write I/O and DIF strip for read I/O DIF (TCP only) sock_priority | Optional | number | The socket priority of the connection owned by this transport (TCP only) +no_wr_batching | Optional | boolean | Disable work requests batching (RDMA only) ### Example: diff --git a/etc/spdk/nvmf.conf.in b/etc/spdk/nvmf.conf.in index 5799f65cfbd..3619f53523f 100644 --- a/etc/spdk/nvmf.conf.in +++ b/etc/spdk/nvmf.conf.in @@ -111,6 +111,9 @@ # Set the maximum number outstanding I/O per shared receive queue. Relevant only for RDMA transport #MaxSRQDepth 4096 + # Disable batching for RDMA requests + #NoWRBatching False + [Transport] # Set TCP transport type. Type TCP diff --git a/include/spdk/nvmf.h b/include/spdk/nvmf.h index 4b9133da452..f59ddad3abd 100644 --- a/include/spdk/nvmf.h +++ b/include/spdk/nvmf.h @@ -83,6 +83,7 @@ struct spdk_nvmf_transport_opts { bool no_srq; bool c2h_success; bool dif_insert_or_strip; + bool no_wr_batching; uint32_t sock_priority; }; diff --git a/lib/nvmf/nvmf_rpc.c b/lib/nvmf/nvmf_rpc.c index 9855c942a0a..56913a51078 100644 --- a/lib/nvmf/nvmf_rpc.c +++ b/lib/nvmf/nvmf_rpc.c @@ -1602,6 +1602,10 @@ static const struct spdk_json_object_decoder nvmf_rpc_create_transport_decoder[] "tgt_name", offsetof(struct nvmf_rpc_create_transport_ctx, tgt_name), spdk_json_decode_string, true }, + { + "no_wr_batching", offsetof(struct nvmf_rpc_create_transport_ctx, opts.no_wr_batching), + spdk_json_decode_bool, false + }, }; static void @@ -1745,6 +1749,7 @@ dump_nvmf_transport(struct spdk_json_write_ctx *w, struct spdk_nvmf_transport *t if (type == SPDK_NVME_TRANSPORT_RDMA) { spdk_json_write_named_uint32(w, "max_srq_depth", opts->max_srq_depth); spdk_json_write_named_bool(w, "no_srq", opts->no_srq); + spdk_json_write_named_bool(w, "no_wr_batching", opts->no_wr_batching); } else if (type == SPDK_NVME_TRANSPORT_TCP) { spdk_json_write_named_bool(w, "c2h_success", opts->c2h_success); spdk_json_write_named_uint32(w, "sock_priority", opts->sock_priority); diff --git a/lib/nvmf/rdma.c b/lib/nvmf/rdma.c index ad3b4a631c2..b32ba13a7db 100644 --- a/lib/nvmf/rdma.c +++ b/lib/nvmf/rdma.c @@ -522,6 +522,14 @@ struct spdk_nvmf_rdma_transport { static inline void spdk_nvmf_rdma_start_disconnect(struct spdk_nvmf_rdma_qpair *rqpair); +static void +_poller_submit_sends(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_poller *rpoller); + +static void +_poller_submit_recvs(struct spdk_nvmf_rdma_transport *rtransport, + struct spdk_nvmf_rdma_poller *rpoller); + static inline int spdk_nvmf_rdma_check_ibv_state(enum ibv_qp_state state) { @@ -1105,6 +1113,7 @@ static void nvmf_rdma_qpair_queue_recv_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_recv_wr *first) { struct ibv_recv_wr *last; + struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport, struct spdk_nvmf_rdma_transport, transport); last = first; while (last->next != NULL) { @@ -1121,6 +1130,10 @@ nvmf_rdma_qpair_queue_recv_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_r rqpair->resources->recvs_to_post.last->next = first; rqpair->resources->recvs_to_post.last = last; } + + if (rtransport->transport.opts.no_wr_batching) { + _poller_submit_recvs(rtransport, rqpair->poller); + } } /* Append the given send wr structure to the qpair's outstanding sends list. */ @@ -1129,6 +1142,7 @@ static void nvmf_rdma_qpair_queue_send_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_send_wr *first) { struct ibv_send_wr *last; + struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport, struct spdk_nvmf_rdma_transport, transport); last = first; while (last->next != NULL) { @@ -1143,6 +1157,10 @@ nvmf_rdma_qpair_queue_send_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_s rqpair->sends_to_post.last->next = first; rqpair->sends_to_post.last = last; } + + if (rtransport->transport.opts.no_wr_batching) { + _poller_submit_sends(rtransport, rqpair->poller); + } } static int @@ -2295,6 +2313,7 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport, #define SPDK_NVMF_RDMA_DEFAULT_BUFFER_CACHE_SIZE 32 #define SPDK_NVMF_RDMA_DEFAULT_NO_SRQ false #define SPDK_NVMF_RDMA_DIF_INSERT_OR_STRIP false +#define SPDK_NVMF_RDMA_DEFAULT_NO_WR_BATCHING false static void spdk_nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts) @@ -2310,6 +2329,7 @@ spdk_nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts) opts->max_srq_depth = SPDK_NVMF_RDMA_DEFAULT_SRQ_DEPTH; opts->no_srq = SPDK_NVMF_RDMA_DEFAULT_NO_SRQ; opts->dif_insert_or_strip = SPDK_NVMF_RDMA_DIF_INSERT_OR_STRIP; + opts->no_wr_batching = SPDK_NVMF_RDMA_DEFAULT_NO_WR_BATCHING; } const struct spdk_mem_map_ops g_nvmf_rdma_map_ops = { @@ -2370,7 +2390,8 @@ spdk_nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts) " Transport opts: max_ioq_depth=%d, max_io_size=%d,\n" " max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n" " in_capsule_data_size=%d, max_aq_depth=%d,\n" - " num_shared_buffers=%d, max_srq_depth=%d, no_srq=%d\n", + " num_shared_buffers=%d, max_srq_depth=%d, no_srq=%d,\n" + " no_wr_batching=%d\n", opts->max_queue_depth, opts->max_io_size, opts->max_qpairs_per_ctrlr, @@ -2379,7 +2400,8 @@ spdk_nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts) opts->max_aq_depth, opts->num_shared_buffers, opts->max_srq_depth, - opts->no_srq); + opts->no_srq, + opts->no_wr_batching); /* I/O unit size cannot be larger than max I/O size */ if (opts->io_unit_size > opts->max_io_size) { @@ -3725,7 +3747,7 @@ _qp_reset_failed_recvs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_recv_wr * spdk_nvmf_rdma_start_disconnect(rqpair); } -static void +void _poller_submit_recvs(struct spdk_nvmf_rdma_transport *rtransport, struct spdk_nvmf_rdma_poller *rpoller) { @@ -3818,7 +3840,7 @@ _qp_reset_failed_sends(struct spdk_nvmf_rdma_transport *rtransport, } -static void +void _poller_submit_sends(struct spdk_nvmf_rdma_transport *rtransport, struct spdk_nvmf_rdma_poller *rpoller) { diff --git a/module/event/subsystems/nvmf/conf.c b/module/event/subsystems/nvmf/conf.c index 278ede8efc4..df8b8379c10 100644 --- a/module/event/subsystems/nvmf/conf.c +++ b/module/event/subsystems/nvmf/conf.c @@ -642,6 +642,8 @@ spdk_nvmf_parse_transport(struct spdk_nvmf_parse_transport_ctx *ctx) } bval = spdk_conf_section_get_boolval(ctx->sp, "NoSRQ", false); opts.no_srq = bval; + bval = spdk_conf_section_get_boolval(ctx->sp, "NoWRBatching", false); + opts.no_wr_batching = bval; } if (trtype == SPDK_NVME_TRANSPORT_TCP) { diff --git a/scripts/rpc.py b/scripts/rpc.py index 2660d3a72a1..c8a53e88106 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -1684,7 +1684,8 @@ def nvmf_create_transport(args): no_srq=args.no_srq, c2h_success=args.c2h_success, dif_insert_or_strip=args.dif_insert_or_strip, - sock_priority=args.sock_priority) + sock_priority=args.sock_priority, + no_wr_batching=args.no_wr_batching) p = subparsers.add_parser('nvmf_create_transport', help='Create NVMf transport') p.add_argument('-t', '--trtype', help='Transport type (ex. RDMA)', type=str, required=True) @@ -1702,6 +1703,7 @@ def nvmf_create_transport(args): p.add_argument('-o', '--c2h-success', action='store_false', help='Disable C2H success optimization. Relevant only for TCP transport') p.add_argument('-f', '--dif-insert-or-strip', action='store_true', help='Enable DIF insert/strip. Relevant only for TCP transport') p.add_argument('-y', '--sock-priority', help='The sock priority of the tcp connection. Relevant only for TCP transport', type=int) + p.add_argument('-b', '--no-wr-batching', action='store_false', help='Disable work requests batching. Relevant only for RDMA transport', default=False) p.set_defaults(func=nvmf_create_transport) def nvmf_get_transports(args): diff --git a/scripts/rpc/nvmf.py b/scripts/rpc/nvmf.py index c471f63373f..e30133718cb 100644 --- a/scripts/rpc/nvmf.py +++ b/scripts/rpc/nvmf.py @@ -106,7 +106,8 @@ def nvmf_create_transport(client, no_srq=False, c2h_success=True, dif_insert_or_strip=None, - sock_priority=None): + sock_priority=None, + no_wr_batching=None): """NVMf Transport Create options. Args: @@ -123,7 +124,7 @@ def nvmf_create_transport(client, no_srq: Boolean flag to disable SRQ even for devices that support it - RDMA specific (optional) c2h_success: Boolean flag to disable the C2H success optimization - TCP specific (optional) dif_insert_or_strip: Boolean flag to enable DIF insert/strip for I/O - TCP specific (optional) - + no_wr_batching: Boolean flag to disable work requests batching - RDMA specific (optional) Returns: True or False """ @@ -158,6 +159,8 @@ def nvmf_create_transport(client, params['dif_insert_or_strip'] = dif_insert_or_strip if sock_priority: params['sock_priority'] = sock_priority + if no_wr_batching is not None: + params['no_wr_batching'] = no_wr_batching return client.call('nvmf_create_transport', params)