| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* |
| * Copyright (C) 2017, Microsoft Corporation. |
| * Copyright (c) 2025, Stefan Metzmacher |
| */ |
| |
| #include "internal.h" |
| #include <linux/folio_queue.h> |
| |
| struct smbdirect_map_sges { |
| struct ib_sge *sge; |
| size_t num_sge; |
| size_t max_sge; |
| struct ib_device *device; |
| u32 local_dma_lkey; |
| enum dma_data_direction direction; |
| }; |
| |
| static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len, |
| struct smbdirect_map_sges *state); |
| |
| static void smbdirect_connection_recv_io_refill_work(struct work_struct *work); |
| static void smbdirect_connection_send_immediate_work(struct work_struct *work); |
| |
| static void smbdirect_connection_qp_event_handler(struct ib_event *event, void *context) |
| { |
| struct smbdirect_socket *sc = context; |
| |
| smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, |
| "%s on device %.*s socket %p (cm_id=%p) status %s first_error %1pe\n", |
| ib_event_msg(event->event), |
| IB_DEVICE_NAME_MAX, |
| event->device->name, |
| sc, sc->rdma.cm_id, |
| smbdirect_socket_status_string(sc->status), |
| SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); |
| |
| switch (event->event) { |
| case IB_EVENT_CQ_ERR: |
| case IB_EVENT_QP_FATAL: |
| smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); |
| break; |
| |
| default: |
| break; |
| } |
| } |
| |
| static int smbdirect_connection_rdma_event_handler(struct rdma_cm_id *id, |
| struct rdma_cm_event *event) |
| { |
| struct smbdirect_socket *sc = id->context; |
| int ret = -ECONNRESET; |
| |
| if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) |
| ret = -ENETDOWN; |
| if (IS_ERR(SMBDIRECT_DEBUG_ERR_PTR(event->status))) |
| ret = event->status; |
| |
| /* |
| * cma_cm_event_handler() has |
| * lockdep_assert_held(&id_priv->handler_mutex); |
| * |
| * Mutexes are not allowed in interrupts, |
| * and we rely on not being in an interrupt here. |
| */ |
| WARN_ON_ONCE(in_interrupt()); |
| |
| if (event->event != sc->rdma.expected_event) { |
| smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, |
| "%s (first_error=%1pe, expected=%s) => event=%s status=%d => ret=%1pe\n", |
| smbdirect_socket_status_string(sc->status), |
| SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), |
| rdma_event_msg(sc->rdma.expected_event), |
| rdma_event_msg(event->event), |
| event->status, |
| SMBDIRECT_DEBUG_ERR_PTR(ret)); |
| |
| /* |
| * If we get RDMA_CM_EVENT_DEVICE_REMOVAL, |
| * we should change to SMBDIRECT_SOCKET_DISCONNECTED, |
| * so that rdma_disconnect() is avoided later via |
| * smbdirect_socket_schedule_cleanup[_status]() => |
| * smbdirect_socket_cleanup_work(). |
| * |
| * As otherwise we'd set SMBDIRECT_SOCKET_DISCONNECTING, |
| * but never ever get RDMA_CM_EVENT_DISCONNECTED and |
| * never reach SMBDIRECT_SOCKET_DISCONNECTED. |
| */ |
| if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) |
| smbdirect_socket_schedule_cleanup_status(sc, |
| SMBDIRECT_LOG_ERR, |
| ret, |
| SMBDIRECT_SOCKET_DISCONNECTED); |
| else |
| smbdirect_socket_schedule_cleanup(sc, ret); |
| if (sc->ib.qp) |
| ib_drain_qp(sc->ib.qp); |
| return 0; |
| } |
| |
| smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, |
| "%s (first_error=%1pe) event=%s\n", |
| smbdirect_socket_status_string(sc->status), |
| SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), |
| rdma_event_msg(event->event)); |
| |
| switch (event->event) { |
| case RDMA_CM_EVENT_DISCONNECTED: |
| /* |
| * We need to change to SMBDIRECT_SOCKET_DISCONNECTED, |
| * so that rdma_disconnect() is avoided later via |
| * smbdirect_socket_schedule_cleanup_status() => |
| * smbdirect_socket_cleanup_work(). |
| * |
| * As otherwise we'd set SMBDIRECT_SOCKET_DISCONNECTING, |
| * but never ever get RDMA_CM_EVENT_DISCONNECTED and |
| * never reach SMBDIRECT_SOCKET_DISCONNECTED. |
| * |
| * This is also a normal disconnect so |
| * SMBDIRECT_LOG_INFO should be good enough |
| * and avoids spamming the default logs. |
| */ |
| smbdirect_socket_schedule_cleanup_status(sc, |
| SMBDIRECT_LOG_INFO, |
| ret, |
| SMBDIRECT_SOCKET_DISCONNECTED); |
| if (sc->ib.qp) |
| ib_drain_qp(sc->ib.qp); |
| return 0; |
| |
| default: |
| break; |
| } |
| |
| /* |
| * This is an internal error, should be handled above via |
| * event->event != sc->rdma.expected_event already. |
| */ |
| WARN_ON_ONCE(sc->rdma.expected_event != RDMA_CM_EVENT_DISCONNECTED); |
| smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); |
| return 0; |
| } |
| |
| void smbdirect_connection_rdma_established(struct smbdirect_socket *sc) |
| { |
| smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, |
| "rdma established: device: %.*s local: %pISpsfc remote: %pISpsfc\n", |
| IB_DEVICE_NAME_MAX, |
| sc->ib.dev->name, |
| &sc->rdma.cm_id->route.addr.src_addr, |
| &sc->rdma.cm_id->route.addr.dst_addr); |
| |
| sc->rdma.cm_id->event_handler = smbdirect_connection_rdma_event_handler; |
| sc->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED; |
| } |
| |
| void smbdirect_connection_negotiation_done(struct smbdirect_socket *sc) |
| { |
| if (unlikely(sc->first_error)) |
| return; |
| |
| if (sc->status == SMBDIRECT_SOCKET_CONNECTED) |
| /* |
| * This is the accept case where |
| * smbdirect_socket_accept() already sets |
| * SMBDIRECT_SOCKET_CONNECTED |
| */ |
| goto done; |
| |
| if (sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING) { |
| /* |
| * Something went wrong... |
| */ |
| smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, |
| "status=%s first_error=%1pe local: %pISpsfc remote: %pISpsfc\n", |
| smbdirect_socket_status_string(sc->status), |
| SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), |
| &sc->rdma.cm_id->route.addr.src_addr, |
| &sc->rdma.cm_id->route.addr.dst_addr); |
| return; |
| } |
| |
| /* |
| * We are done, so we can wake up the waiter. |
| */ |
| WARN_ONCE(sc->status == SMBDIRECT_SOCKET_CONNECTED, |
| "status=%s first_error=%1pe", |
| smbdirect_socket_status_string(sc->status), |
| SMBDIRECT_DEBUG_ERR_PTR(sc->first_error)); |
| sc->status = SMBDIRECT_SOCKET_CONNECTED; |
| |
| /* |
| * We need to setup the refill and send immediate work |
| * in order to get a working connection. |
| */ |
| done: |
| INIT_WORK(&sc->recv_io.posted.refill_work, smbdirect_connection_recv_io_refill_work); |
| INIT_WORK(&sc->idle.immediate_work, smbdirect_connection_send_immediate_work); |
| |
| smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, |
| "negotiated: local: %pISpsfc remote: %pISpsfc\n", |
| &sc->rdma.cm_id->route.addr.src_addr, |
| &sc->rdma.cm_id->route.addr.dst_addr); |
| |
| wake_up(&sc->status_wait); |
| } |
| |
| static u32 smbdirect_rdma_rw_send_wrs(struct ib_device *dev, |
| const struct ib_qp_init_attr *attr) |
| { |
| /* |
| * This could be split out of rdma_rw_init_qp() |
| * and be a helper function next to rdma_rw_mr_factor() |
| * |
| * We can't check unlikely(rdma_rw_force_mr) here, |
| * but that is most likely 0 anyway. |
| */ |
| u32 factor; |
| |
| WARN_ON_ONCE(attr->port_num == 0); |
| |
| /* |
| * Each context needs at least one RDMA READ or WRITE WR. |
| * |
| * For some hardware we might need more, eventually we should ask the |
| * HCA driver for a multiplier here. |
| */ |
| factor = 1; |
| |
| /* |
| * If the device needs MRs to perform RDMA READ or WRITE operations, |
| * we'll need two additional MRs for the registrations and the |
| * invalidation. |
| */ |
| if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd) |
| factor += 2; /* inv + reg */ |
| |
| return factor * attr->cap.max_rdma_ctxs; |
| } |
| |
| int smbdirect_connection_create_qp(struct smbdirect_socket *sc) |
| { |
| const struct smbdirect_socket_parameters *sp = &sc->parameters; |
| struct ib_qp_init_attr qp_attr; |
| struct ib_qp_cap qp_cap; |
| u32 rdma_send_wr; |
| u32 max_send_wr; |
| int ret; |
| |
| /* |
| * Note that {rdma,ib}_create_qp() will call |
| * rdma_rw_init_qp() if max_rdma_ctxs is not 0. |
| * It will adjust max_send_wr to the required |
| * number of additional WRs for the RDMA RW operations. |
| * It will cap max_send_wr to the device limit. |
| * |
| * We use allocate sp->responder_resources * 2 MRs |
| * and each MR needs WRs for REG and INV, so |
| * we use '* 4'. |
| * |
| * +1 for ib_drain_qp() |
| */ |
| memset(&qp_cap, 0, sizeof(qp_cap)); |
| qp_cap.max_send_wr = sp->send_credit_target + sp->responder_resources * 4 + 1; |
| qp_cap.max_recv_wr = sp->recv_credit_max + 1; |
| qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; |
| qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; |
| qp_cap.max_inline_data = 0; |
| qp_cap.max_rdma_ctxs = sc->rw_io.credits.max; |
| |
| /* |
| * Find out the number of max_send_wr |
| * after rdma_rw_init_qp() adjusted it. |
| * |
| * We only do it on a temporary variable, |
| * as rdma_create_qp() will trigger |
| * rdma_rw_init_qp() again. |
| */ |
| memset(&qp_attr, 0, sizeof(qp_attr)); |
| qp_attr.cap = qp_cap; |
| qp_attr.port_num = sc->rdma.cm_id->port_num; |
| rdma_send_wr = smbdirect_rdma_rw_send_wrs(sc->ib.dev, &qp_attr); |
| max_send_wr = qp_cap.max_send_wr + rdma_send_wr; |
| |
| if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe || |
| qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) { |
| pr_err("Possible CQE overrun: max_send_wr %d\n", |
| qp_cap.max_send_wr); |
| pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", |
| IB_DEVICE_NAME_MAX, |
| sc->ib.dev->name, |
| sc->ib.dev->attrs.max_cqe, |
| sc->ib.dev->attrs.max_qp_wr); |
| pr_err("consider lowering send_credit_target = %d\n", |
| sp->send_credit_target); |
| return -EINVAL; |
| } |
| |
| if (qp_cap.max_rdma_ctxs && |
| (max_send_wr >= sc->ib.dev->attrs.max_cqe || |
| max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) { |
| pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n", |
| rdma_send_wr, qp_cap.max_send_wr, max_send_wr); |
| pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", |
| IB_DEVICE_NAME_MAX, |
| sc->ib.dev->name, |
| sc->ib.dev->attrs.max_cqe, |
| sc->ib.dev->attrs.max_qp_wr); |
| pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n", |
| sp->send_credit_target, qp_cap.max_rdma_ctxs); |
| return -EINVAL; |
| } |
| |
| if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe || |
| qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) { |
| pr_err("Possible CQE overrun: max_recv_wr %d\n", |
| qp_cap.max_recv_wr); |
| pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", |
| IB_DEVICE_NAME_MAX, |
| sc->ib.dev->name, |
| sc->ib.dev->attrs.max_cqe, |
| sc->ib.dev->attrs.max_qp_wr); |
| pr_err("consider lowering receive_credit_max = %d\n", |
| sp->recv_credit_max); |
| return -EINVAL; |
| } |
| |
| if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge || |
| qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) { |
| pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", |
| IB_DEVICE_NAME_MAX, |
| sc->ib.dev->name, |
| sc->ib.dev->attrs.max_send_sge, |
| sc->ib.dev->attrs.max_recv_sge); |
| return -EINVAL; |
| } |
| |
| sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); |
| if (IS_ERR(sc->ib.pd)) { |
| pr_err("Can't create RDMA PD: %1pe\n", sc->ib.pd); |
| ret = PTR_ERR(sc->ib.pd); |
| sc->ib.pd = NULL; |
| return ret; |
| } |
| |
| sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, |
| max_send_wr, |
| sc->ib.poll_ctx); |
| if (IS_ERR(sc->ib.send_cq)) { |
| pr_err("Can't create RDMA send CQ: %1pe\n", sc->ib.send_cq); |
| ret = PTR_ERR(sc->ib.send_cq); |
| sc->ib.send_cq = NULL; |
| goto err; |
| } |
| |
| sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, |
| qp_cap.max_recv_wr, |
| sc->ib.poll_ctx); |
| if (IS_ERR(sc->ib.recv_cq)) { |
| pr_err("Can't create RDMA recv CQ: %1pe\n", sc->ib.recv_cq); |
| ret = PTR_ERR(sc->ib.recv_cq); |
| sc->ib.recv_cq = NULL; |
| goto err; |
| } |
| |
| /* |
| * We reset completely here! |
| * As the above use was just temporary |
| * to calc max_send_wr and rdma_send_wr. |
| * |
| * rdma_create_qp() will trigger rdma_rw_init_qp() |
| * again if max_rdma_ctxs is not 0. |
| */ |
| memset(&qp_attr, 0, sizeof(qp_attr)); |
| qp_attr.event_handler = smbdirect_connection_qp_event_handler; |
| qp_attr.qp_context = sc; |
| qp_attr.cap = qp_cap; |
| qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; |
| qp_attr.qp_type = IB_QPT_RC; |
| qp_attr.send_cq = sc->ib.send_cq; |
| qp_attr.recv_cq = sc->ib.recv_cq; |
| qp_attr.port_num = ~0; |
| |
| ret = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); |
| if (ret) { |
| pr_err("Can't create RDMA QP: %1pe\n", |
| SMBDIRECT_DEBUG_ERR_PTR(ret)); |
| goto err; |
| } |
| sc->ib.qp = sc->rdma.cm_id->qp; |
| |
| return 0; |
| err: |
| smbdirect_connection_destroy_qp(sc); |
| return ret; |
| } |
| |
| void smbdirect_connection_destroy_qp(struct smbdirect_socket *sc) |
| { |
| if (sc->ib.qp) { |
| ib_drain_qp(sc->ib.qp); |
| sc->ib.qp = NULL; |
| rdma_destroy_qp(sc->rdma.cm_id); |
| } |
| if (sc->ib.recv_cq) { |
| ib_destroy_cq(sc->ib.recv_cq); |
| sc->ib.recv_cq = NULL; |
| } |
| if (sc->ib.send_cq) { |
| ib_destroy_cq(sc->ib.send_cq); |
| sc->ib.send_cq = NULL; |
| } |
| if (sc->ib.pd) { |
| ib_dealloc_pd(sc->ib.pd); |
| sc->ib.pd = NULL; |
| } |
| } |
| |
| int smbdirect_connection_create_mem_pools(struct smbdirect_socket *sc) |
| { |
| const struct smbdirect_socket_parameters *sp = &sc->parameters; |
| char name[80]; |
| size_t i; |
| |
| /* |
| * We use sizeof(struct smbdirect_negotiate_resp) for the |
| * payload size as it is larger as |
| * sizeof(struct smbdirect_data_transfer). |
| * |
| * This will fit client and server usage for now. |
| */ |
| snprintf(name, sizeof(name), "smbdirect_send_io_cache_%p", sc); |
| struct kmem_cache_args send_io_args = { |
| .align = __alignof__(struct smbdirect_send_io), |
| }; |
| sc->send_io.mem.cache = kmem_cache_create(name, |
| sizeof(struct smbdirect_send_io) + |
| sizeof(struct smbdirect_negotiate_resp), |
| &send_io_args, |
| SLAB_HWCACHE_ALIGN); |
| if (!sc->send_io.mem.cache) |
| goto err; |
| |
| sc->send_io.mem.pool = mempool_create_slab_pool(sp->send_credit_target, |
| sc->send_io.mem.cache); |
| if (!sc->send_io.mem.pool) |
| goto err; |
| |
| /* |
| * A payload size of sp->max_recv_size should fit |
| * any message. |
| * |
| * For smbdirect_data_transfer messages the whole |
| * buffer might be exposed to userspace |
| * (currently on the client side...) |
| * The documentation says data_offset = 0 would be |
| * strange but valid. |
| */ |
| snprintf(name, sizeof(name), "smbdirect_recv_io_cache_%p", sc); |
| struct kmem_cache_args recv_io_args = { |
| .align = __alignof__(struct smbdirect_recv_io), |
| .useroffset = sizeof(struct smbdirect_recv_io), |
| .usersize = sp->max_recv_size, |
| }; |
| sc->recv_io.mem.cache = kmem_cache_create(name, |
| sizeof(struct smbdirect_recv_io) + |
| sp->max_recv_size, |
| &recv_io_args, |
| SLAB_HWCACHE_ALIGN); |
| if (!sc->recv_io.mem.cache) |
| goto err; |
| |
| sc->recv_io.mem.pool = mempool_create_slab_pool(sp->recv_credit_max, |
| sc->recv_io.mem.cache); |
| if (!sc->recv_io.mem.pool) |
| goto err; |
| |
| for (i = 0; i < sp->recv_credit_max; i++) { |
| struct smbdirect_recv_io *recv_io; |
| |
| recv_io = mempool_alloc(sc->recv_io.mem.pool, |
| sc->recv_io.mem.gfp_mask); |
| if (!recv_io) |
| goto err; |
| recv_io->socket = sc; |
| recv_io->sge.length = 0; |
| list_add_tail(&recv_io->list, &sc->recv_io.free.list); |
| } |
| |
| return 0; |
| err: |
| smbdirect_connection_destroy_mem_pools(sc); |
| return -ENOMEM; |
| } |
| |
| void smbdirect_connection_destroy_mem_pools(struct smbdirect_socket *sc) |
| { |
| struct smbdirect_recv_io *recv_io, *next_io; |
| |
| list_for_each_entry_safe(recv_io, next_io, &sc->recv_io.free.list, list) { |
| list_del(&recv_io->list); |
| mempool_free(recv_io, sc->recv_io.mem.pool); |
| } |
| |
| /* |
| * Note mempool_destroy() and kmem_cache_destroy() |
| * work fine with a NULL pointer |
| */ |
| |
| mempool_destroy(sc->recv_io.mem.pool); |
| sc->recv_io.mem.pool = NULL; |
| |
| kmem_cache_destroy(sc->recv_io.mem.cache); |
| sc->recv_io.mem.cache = NULL; |
| |
| mempool_destroy(sc->send_io.mem.pool); |
| sc->send_io.mem.pool = NULL; |
| |
| kmem_cache_destroy(sc->send_io.mem.cache); |
| sc->send_io.mem.cache = NULL; |
| } |
| |
| struct smbdirect_send_io *smbdirect_connection_alloc_send_io(struct smbdirect_socket *sc) |
| { |
| struct smbdirect_send_io *msg; |
| |
| msg = mempool_alloc(sc->send_io.mem.pool, sc->send_io.mem.gfp_mask); |
| if (!msg) |
| return ERR_PTR(-ENOMEM); |
| msg->socket = sc; |
| INIT_LIST_HEAD(&msg->sibling_list); |
| msg->num_sge = 0; |
| |
| return msg; |
| } |
| |
| void smbdirect_connection_free_send_io(struct smbdirect_send_io *msg) |
| { |
| struct smbdirect_socket *sc = msg->socket; |
| size_t i; |
| |
| /* |
| * The list needs to be empty! |
| * The caller should take care of it. |
| */ |
| WARN_ON_ONCE(!list_empty(&msg->sibling_list)); |
| |
| /* |
| * Note we call ib_dma_unmap_page(), even if some sges are mapped using |
| * ib_dma_map_single(). |
| * |
| * The difference between _single() and _page() only matters for the |
| * ib_dma_map_*() case. |
| * |
| * For the ib_dma_unmap_*() case it does not matter as both take the |
| * dma_addr_t and dma_unmap_single_attrs() is just an alias to |
| * dma_unmap_page_attrs(). |
| */ |
| for (i = 0; i < msg->num_sge; i++) |
| ib_dma_unmap_page(sc->ib.dev, |
| msg->sge[i].addr, |
| msg->sge[i].length, |
| DMA_TO_DEVICE); |
| |
| mempool_free(msg, sc->send_io.mem.pool); |
| } |
| |
| struct smbdirect_recv_io *smbdirect_connection_get_recv_io(struct smbdirect_socket *sc) |
| { |
| struct smbdirect_recv_io *msg = NULL; |
| unsigned long flags; |
| |
| spin_lock_irqsave(&sc->recv_io.free.lock, flags); |
| if (likely(!sc->first_error)) |
| msg = list_first_entry_or_null(&sc->recv_io.free.list, |
| struct smbdirect_recv_io, |
| list); |
| if (likely(msg)) { |
| list_del(&msg->list); |
| sc->statistics.get_receive_buffer++; |
| } |
| spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); |
| |
| return msg; |
| } |
| |
| void smbdirect_connection_put_recv_io(struct smbdirect_recv_io *msg) |
| { |
| struct smbdirect_socket *sc = msg->socket; |
| unsigned long flags; |
| |
| if (likely(msg->sge.length != 0)) { |
| ib_dma_unmap_single(sc->ib.dev, |
| msg->sge.addr, |
| msg->sge.length, |
| DMA_FROM_DEVICE); |
| msg->sge.length = 0; |
| } |
| |
| spin_lock_irqsave(&sc->recv_io.free.lock, flags); |
| list_add_tail(&msg->list, &sc->recv_io.free.list); |
| sc->statistics.put_receive_buffer++; |
| spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); |
| |
| queue_work(sc->workqueues.refill, &sc->recv_io.posted.refill_work); |
| } |
| |
| void smbdirect_connection_reassembly_append_recv_io(struct smbdirect_socket *sc, |
| struct smbdirect_recv_io *msg, |
| u32 data_length) |
| { |
| unsigned long flags; |
| |
| spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); |
| list_add_tail(&msg->list, &sc->recv_io.reassembly.list); |
| sc->recv_io.reassembly.queue_length++; |
| /* |
| * Make sure reassembly_data_length is updated after list and |
| * reassembly_queue_length are updated. On the dequeue side |
| * reassembly_data_length is checked without a lock to determine |
| * if reassembly_queue_length and list is up to date |
| */ |
| virt_wmb(); |
| sc->recv_io.reassembly.data_length += data_length; |
| spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); |
| sc->statistics.enqueue_reassembly_queue++; |
| } |
| |
| struct smbdirect_recv_io * |
| smbdirect_connection_reassembly_first_recv_io(struct smbdirect_socket *sc) |
| { |
| struct smbdirect_recv_io *msg; |
| |
| msg = list_first_entry_or_null(&sc->recv_io.reassembly.list, |
| struct smbdirect_recv_io, |
| list); |
| |
| return msg; |
| } |
| |
| void smbdirect_connection_negotiate_rdma_resources(struct smbdirect_socket *sc, |
| u8 peer_initiator_depth, |
| u8 peer_responder_resources, |
| const struct rdma_conn_param *param) |
| { |
| struct smbdirect_socket_parameters *sp = &sc->parameters; |
| |
| if (rdma_protocol_iwarp(sc->ib.dev, sc->rdma.cm_id->port_num) && |
| param->private_data_len == 8) { |
| /* |
| * Legacy clients with only iWarp MPA v1 support |
| * need a private blob in order to negotiate |
| * the IRD/ORD values. |
| */ |
| const __be32 *ird_ord_hdr = param->private_data; |
| u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); |
| u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); |
| |
| /* |
| * cifs.ko sends the legacy IRD/ORD negotiation |
| * event if iWarp MPA v2 was used. |
| * |
| * Here we check that the values match and only |
| * mark the client as legacy if they don't match. |
| */ |
| if ((u32)param->initiator_depth != ird32 || |
| (u32)param->responder_resources != ord32) { |
| /* |
| * There are broken clients (old cifs.ko) |
| * using little endian and also |
| * struct rdma_conn_param only uses u8 |
| * for initiator_depth and responder_resources, |
| * so we truncate the value to U8_MAX. |
| * |
| * smb_direct_accept_client() will then |
| * do the real negotiation in order to |
| * select the minimum between client and |
| * server. |
| */ |
| ird32 = min_t(u32, ird32, U8_MAX); |
| ord32 = min_t(u32, ord32, U8_MAX); |
| |
| sc->rdma.legacy_iwarp = true; |
| peer_initiator_depth = (u8)ird32; |
| peer_responder_resources = (u8)ord32; |
| } |
| } |
| |
| /* |
| * negotiate the value by using the minimum |
| * between client and server if the client provided |
| * non 0 values. |
| */ |
| if (peer_initiator_depth != 0) |
| sp->initiator_depth = min_t(u8, sp->initiator_depth, |
| peer_initiator_depth); |
| if (peer_responder_resources != 0) |
| sp->responder_resources = min_t(u8, sp->responder_resources, |
| peer_responder_resources); |
| } |
| |
| bool smbdirect_connection_is_connected(struct smbdirect_socket *sc) |
| { |
| if (unlikely(!sc || sc->first_error || sc->status != SMBDIRECT_SOCKET_CONNECTED)) |
| return false; |
| return true; |
| } |
| __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_is_connected); |
| |
| int smbdirect_connection_wait_for_connected(struct smbdirect_socket *sc) |
| { |
| const struct smbdirect_socket_parameters *sp = &sc->parameters; |
| union { |
| struct sockaddr sa; |
| struct sockaddr_storage ss; |
| } src_addr, dst_addr; |
| const struct sockaddr *src = NULL; |
| const struct sockaddr *dst = NULL; |
| char _devname[IB_DEVICE_NAME_MAX] = { 0, }; |
| const char *devname = NULL; |
| int ret; |
| |
| if (sc->rdma.cm_id) { |
| src_addr.ss = sc->rdma.cm_id->route.addr.src_addr; |
| if (src_addr.sa.sa_family != AF_UNSPEC) |
| src = &src_addr.sa; |
| dst_addr.ss = sc->rdma.cm_id->route.addr.dst_addr; |
| if (dst_addr.sa.sa_family != AF_UNSPEC) |
| dst = &dst_addr.sa; |
| |
| if (sc->ib.dev) { |
| memcpy(_devname, sc->ib.dev->name, IB_DEVICE_NAME_MAX); |
| devname = _devname; |
| } |
| } |
| |
| smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_INFO, |
| "waiting for connection: device: %.*s local: %pISpsfc remote: %pISpsfc\n", |
| IB_DEVICE_NAME_MAX, devname, src, dst); |
| |
| ret = wait_event_interruptible_timeout(sc->status_wait, |
| sc->status == SMBDIRECT_SOCKET_CONNECTED || |
| sc->first_error, |
| msecs_to_jiffies(sp->negotiate_timeout_msec)); |
| if (sc->rdma.cm_id) { |
| /* |
| * Maybe src and dev are updated in the meantime. |
| */ |
| src_addr.ss = sc->rdma.cm_id->route.addr.src_addr; |
| if (src_addr.sa.sa_family != AF_UNSPEC) |
| src = &src_addr.sa; |
| dst_addr.ss = sc->rdma.cm_id->route.addr.dst_addr; |
| if (dst_addr.sa.sa_family != AF_UNSPEC) |
| dst = &dst_addr.sa; |
| |
| if (sc->ib.dev) { |
| memcpy(_devname, sc->ib.dev->name, IB_DEVICE_NAME_MAX); |
| devname = _devname; |
| } |
| } |
| if (ret == 0) |
| ret = -ETIMEDOUT; |
| if (ret < 0) |
| smbdirect_socket_schedule_cleanup(sc, ret); |
| if (sc->first_error) { |
| int lvl = SMBDIRECT_LOG_ERR; |
| |
| ret = sc->first_error; |
| if (ret == -ENODEV) |
| lvl = SMBDIRECT_LOG_INFO; |
| |
| smbdirect_log_rdma_event(sc, lvl, |
| "connection failed %1pe device: %.*s local: %pISpsfc remote: %pISpsfc\n", |
| SMBDIRECT_DEBUG_ERR_PTR(ret), |
| IB_DEVICE_NAME_MAX, devname, src, dst); |
| return ret; |
| } |
| |
| return 0; |
| } |
| __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_wait_for_connected); |
| |
| void smbdirect_connection_idle_timer_work(struct work_struct *work) |
| { |
| struct smbdirect_socket *sc = |
| container_of(work, struct smbdirect_socket, idle.timer_work.work); |
| const struct smbdirect_socket_parameters *sp = &sc->parameters; |
| |
| if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { |
| smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_ERR, |
| "%s => timeout sc->idle.keepalive=%s\n", |
| smbdirect_socket_status_string(sc->status), |
| sc->idle.keepalive == SMBDIRECT_KEEPALIVE_SENT ? |
| "SENT" : "PENDING"); |
| smbdirect_socket_schedule_cleanup(sc, -ETIMEDOUT); |
| return; |
| } |
| |
| if (sc->status != SMBDIRECT_SOCKET_CONNECTED) |
| return; |
| |
| /* |
| * Now use the keepalive timeout (instead of keepalive interval) |
| * in order to wait for a response |
| */ |
| sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; |
| mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, |
| msecs_to_jiffies(sp->keepalive_timeout_msec)); |
| smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, |
| "schedule send of empty idle message\n"); |
| queue_work(sc->workqueues.immediate, &sc->idle.immediate_work); |
| } |
| |
| u16 smbdirect_connection_grant_recv_credits(struct smbdirect_socket *sc) |
| { |
| int missing; |
| int available; |
| int new_credits; |
| |
| if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) |
| return 0; |
| |
| missing = (int)sc->recv_io.credits.target - atomic_read(&sc->recv_io.credits.count); |
| available = atomic_xchg(&sc->recv_io.credits.available, 0); |
| new_credits = min3((int)U16_MAX, missing, available); |
| if (new_credits <= 0) { |
| /* |
| * If credits are available, but not granted |
| * we need to re-add them again. |
| */ |
| if (available) |
| atomic_add(available, &sc->recv_io.credits.available); |
| return 0; |
| } |
| |
| if (new_credits < available) { |
| /* |
| * Readd the remaining available again. |
| */ |
| available -= new_credits; |
| atomic_add(available, &sc->recv_io.credits.available); |
| } |
| |
| /* |
| * Remember we granted the credits |
| */ |
| atomic_add(new_credits, &sc->recv_io.credits.count); |
| return new_credits; |
| } |
| |
| static bool smbdirect_connection_request_keep_alive(struct smbdirect_socket *sc) |
| { |
| const struct smbdirect_socket_parameters *sp = &sc->parameters; |
| |
| if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { |
| sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; |
| /* |
| * Now use the keepalive timeout (instead of keepalive interval) |
| * in order to wait for a response |
| */ |
| mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, |
| msecs_to_jiffies(sp->keepalive_timeout_msec)); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| int smbdirect_connection_post_send_wr(struct smbdirect_socket *sc, |
| struct ib_send_wr *wr) |
| { |
| int ret; |
| |
| if (unlikely(sc->first_error)) |
| return sc->first_error; |
| |
| atomic_inc(&sc->send_io.pending.count); |
| ret = ib_post_send(sc->ib.qp, wr, NULL); |
| if (ret) { |
| atomic_dec(&sc->send_io.pending.count); |
| smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, |
| "ib_post_send() failed %1pe\n", |
| SMBDIRECT_DEBUG_ERR_PTR(ret)); |
| smbdirect_socket_schedule_cleanup(sc, ret); |
| } |
| |
| return ret; |
| } |
| |
| static void smbdirect_connection_send_batch_init(struct smbdirect_send_batch *batch, |
| bool need_invalidate_rkey, |
| unsigned int remote_key) |
| { |
| INIT_LIST_HEAD(&batch->msg_list); |
| batch->wr_cnt = 0; |
| batch->need_invalidate_rkey = need_invalidate_rkey; |
| batch->remote_key = remote_key; |
| batch->credit = 0; |
| } |
| |
| int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc, |
| struct smbdirect_send_batch *batch, |
| bool is_last) |
| { |
| struct smbdirect_send_io *first, *last; |
| int ret = 0; |
| |
| if (list_empty(&batch->msg_list)) |
| goto release_credit; |
| |
| first = list_first_entry(&batch->msg_list, |
| struct smbdirect_send_io, |
| sibling_list); |
| last = list_last_entry(&batch->msg_list, |
| struct smbdirect_send_io, |
| sibling_list); |
| |
| if (batch->need_invalidate_rkey) { |
| first->wr.opcode = IB_WR_SEND_WITH_INV; |
| first->wr.ex.invalidate_rkey = batch->remote_key; |
| batch->need_invalidate_rkey = false; |
| batch->remote_key = 0; |
| } |
| |
| last->wr.send_flags = IB_SEND_SIGNALED; |
| last->wr.wr_cqe = &last->cqe; |
| |
| /* |
| * Remove last from send_ctx->msg_list |
| * and splice the rest of send_ctx->msg_list |
| * to last->sibling_list. |
| * |
| * send_ctx->msg_list is a valid empty list |
| * at the end. |
| */ |
| list_del_init(&last->sibling_list); |
| list_splice_tail_init(&batch->msg_list, &last->sibling_list); |
| batch->wr_cnt = 0; |
| |
| ret = smbdirect_connection_post_send_wr(sc, &first->wr); |
| if (ret) { |
| struct smbdirect_send_io *sibling, *next; |
| |
| list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { |
| list_del_init(&sibling->sibling_list); |
| smbdirect_connection_free_send_io(sibling); |
| } |
| smbdirect_connection_free_send_io(last); |
| } |
| |
| release_credit: |
| if (is_last && !ret && batch->credit) { |
| atomic_add(batch->credit, &sc->send_io.bcredits.count); |
| batch->credit = 0; |
| wake_up(&sc->send_io.bcredits.wait_queue); |
| } |
| |
| return ret; |
| } |
| __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_batch_flush); |
| |
| struct smbdirect_send_batch * |
| smbdirect_init_send_batch_storage(struct smbdirect_send_batch_storage *storage, |
| bool need_invalidate_rkey, |
| unsigned int remote_key) |
| { |
| struct smbdirect_send_batch *batch = (struct smbdirect_send_batch *)storage; |
| |
| memset(storage, 0, sizeof(*storage)); |
| BUILD_BUG_ON(sizeof(*batch) > sizeof(*storage)); |
| |
| smbdirect_connection_send_batch_init(batch, |
| need_invalidate_rkey, |
| remote_key); |
| |
| return batch; |
| } |
| __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_init_send_batch_storage); |
| |
| static int smbdirect_connection_wait_for_send_bcredit(struct smbdirect_socket *sc, |
| struct smbdirect_send_batch *batch) |
| { |
| int ret; |
| |
| if (batch->credit) |
| return 0; |
| |
| ret = smbdirect_socket_wait_for_credits(sc, |
| SMBDIRECT_SOCKET_CONNECTED, |
| -ENOTCONN, |
| &sc->send_io.bcredits.wait_queue, |
| &sc->send_io.bcredits.count, |
| 1); |
| if (ret) |
| return ret; |
| |
| batch->credit = 1; |
| return 0; |
| } |
| |
| static int smbdirect_connection_wait_for_send_lcredit(struct smbdirect_socket *sc, |
| struct smbdirect_send_batch *batch) |
| { |
| if (batch && atomic_read(&sc->send_io.lcredits.count) <= 1) { |
| int ret; |
| |
| ret = smbdirect_connection_send_batch_flush(sc, batch, false); |
| if (ret) |
| return ret; |
| } |
| |
| return smbdirect_socket_wait_for_credits(sc, |
| SMBDIRECT_SOCKET_CONNECTED, |
| -ENOTCONN, |
| &sc->send_io.lcredits.wait_queue, |
| &sc->send_io.lcredits.count, |
| 1); |
| } |
| |
| static int smbdirect_connection_wait_for_send_credits(struct smbdirect_socket *sc, |
| struct smbdirect_send_batch *batch) |
| { |
| if (batch && (batch->wr_cnt >= 16 || atomic_read(&sc->send_io.credits.count) <= 1)) { |
| int ret; |
| |
| ret = smbdirect_connection_send_batch_flush(sc, batch, false); |
| if (ret) |
| return ret; |
| } |
| |
| return smbdirect_socket_wait_for_credits(sc, |
| SMBDIRECT_SOCKET_CONNECTED, |
| -ENOTCONN, |
| &sc->send_io.credits.wait_queue, |
| &sc->send_io.credits.count, |
| 1); |
| } |
| |
| static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc); |
| |
| static int smbdirect_connection_post_send_io(struct smbdirect_socket *sc, |
| struct smbdirect_send_batch *batch, |
| struct smbdirect_send_io *msg) |
| { |
| int i; |
| |
| for (i = 0; i < msg->num_sge; i++) |
| ib_dma_sync_single_for_device(sc->ib.dev, |
| msg->sge[i].addr, msg->sge[i].length, |
| DMA_TO_DEVICE); |
| |
| msg->cqe.done = smbdirect_connection_send_io_done; |
| msg->wr.wr_cqe = &msg->cqe; |
| msg->wr.opcode = IB_WR_SEND; |
| msg->wr.sg_list = &msg->sge[0]; |
| msg->wr.num_sge = msg->num_sge; |
| msg->wr.next = NULL; |
| |
| if (batch) { |
| msg->wr.send_flags = 0; |
| if (!list_empty(&batch->msg_list)) { |
| struct smbdirect_send_io *last; |
| |
| last = list_last_entry(&batch->msg_list, |
| struct smbdirect_send_io, |
| sibling_list); |
| last->wr.next = &msg->wr; |
| } |
| list_add_tail(&msg->sibling_list, &batch->msg_list); |
| batch->wr_cnt++; |
| return 0; |
| } |
| |
| msg->wr.send_flags = IB_SEND_SIGNALED; |
| return smbdirect_connection_post_send_wr(sc, &msg->wr); |
| } |
| |
| int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc, |
| struct smbdirect_send_batch *batch, |
| struct iov_iter *iter, |
| unsigned int flags, |
| u32 remaining_data_length) |
| { |
| const struct smbdirect_socket_parameters *sp = &sc->parameters; |
| struct smbdirect_send_batch _batch; |
| struct smbdirect_send_io *msg; |
| struct smbdirect_data_transfer *packet; |
| size_t header_length; |
| u16 new_credits = 0; |
| u32 data_length = 0; |
| int ret; |
| |
| if (WARN_ON_ONCE(flags)) |
| return -EINVAL; /* no flags support for now */ |
| |
| if (iter) { |
| if (WARN_ON_ONCE(iov_iter_rw(iter) != ITER_SOURCE)) |
| return -EINVAL; /* It's a bug in upper layer to get there */ |
| |
| header_length = sizeof(struct smbdirect_data_transfer); |
| if (WARN_ON_ONCE(remaining_data_length == 0 || |
| iov_iter_count(iter) > remaining_data_length)) |
| return -EINVAL; |
| } else { |
| /* If this is a packet without payload, don't send padding */ |
| header_length = offsetof(struct smbdirect_data_transfer, padding); |
| if (WARN_ON_ONCE(remaining_data_length)) |
| return -EINVAL; |
| } |
| |
| if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { |
| smbdirect_log_write(sc, SMBDIRECT_LOG_ERR, |
| "status=%s first_error=%1pe => %1pe\n", |
| smbdirect_socket_status_string(sc->status), |
| SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), |
| SMBDIRECT_DEBUG_ERR_PTR(-ENOTCONN)); |
| return -ENOTCONN; |
| } |
| |
| if (!batch) { |
| smbdirect_connection_send_batch_init(&_batch, false, 0); |
| batch = &_batch; |
| } |
| |
| ret = smbdirect_connection_wait_for_send_bcredit(sc, batch); |
| if (ret) |
| goto bcredit_failed; |
| |
| ret = smbdirect_connection_wait_for_send_lcredit(sc, batch); |
| if (ret) |
| goto lcredit_failed; |
| |
| ret = smbdirect_connection_wait_for_send_credits(sc, batch); |
| if (ret) |
| goto credit_failed; |
| |
| new_credits = smbdirect_connection_grant_recv_credits(sc); |
| if (new_credits == 0 && |
| atomic_read(&sc->send_io.credits.count) == 0 && |
| atomic_read(&sc->recv_io.credits.count) == 0) { |
| /* |
| * queue the refill work in order to |
| * get some new recv credits we can grant to |
| * the peer. |
| */ |
| queue_work(sc->workqueues.refill, &sc->recv_io.posted.refill_work); |
| |
| /* |
| * wait until either the refill work or the peer |
| * granted new credits |
| */ |
| ret = wait_event_interruptible(sc->send_io.credits.wait_queue, |
| atomic_read(&sc->send_io.credits.count) >= 1 || |
| atomic_read(&sc->recv_io.credits.available) >= 1 || |
| sc->status != SMBDIRECT_SOCKET_CONNECTED); |
| if (sc->status != SMBDIRECT_SOCKET_CONNECTED) |
| ret = -ENOTCONN; |
| if (ret < 0) |
| goto credit_failed; |
| |
| new_credits = smbdirect_connection_grant_recv_credits(sc); |
| } |
| |
| msg = smbdirect_connection_alloc_send_io(sc); |
| if (IS_ERR(msg)) { |
| ret = PTR_ERR(msg); |
| goto alloc_failed; |
| } |
| |
| /* Map the packet to DMA */ |
| msg->sge[0].addr = ib_dma_map_single(sc->ib.dev, |
| msg->packet, |
| header_length, |
| DMA_TO_DEVICE); |
| ret = ib_dma_mapping_error(sc->ib.dev, msg->sge[0].addr); |
| if (ret) |
| goto err; |
| |
| msg->sge[0].length = header_length; |
| msg->sge[0].lkey = sc->ib.pd->local_dma_lkey; |
| msg->num_sge = 1; |
| |
| if (iter) { |
| struct smbdirect_map_sges extract = { |
| .num_sge = msg->num_sge, |
| .max_sge = ARRAY_SIZE(msg->sge), |
| .sge = msg->sge, |
| .device = sc->ib.dev, |
| .local_dma_lkey = sc->ib.pd->local_dma_lkey, |
| .direction = DMA_TO_DEVICE, |
| }; |
| size_t payload_len = umin(iov_iter_count(iter), |
| sp->max_send_size - sizeof(*packet)); |
| |
| ret = smbdirect_map_sges_from_iter(iter, payload_len, &extract); |
| if (ret < 0) |
| goto err; |
| data_length = ret; |
| remaining_data_length -= data_length; |
| msg->num_sge = extract.num_sge; |
| } |
| |
| /* Fill in the packet header */ |
| packet = (struct smbdirect_data_transfer *)msg->packet; |
| packet->credits_requested = cpu_to_le16(sp->send_credit_target); |
| packet->credits_granted = cpu_to_le16(new_credits); |
| |
| packet->flags = 0; |
| if (smbdirect_connection_request_keep_alive(sc)) |
| packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); |
| |
| packet->reserved = 0; |
| if (!data_length) |
| packet->data_offset = 0; |
| else |
| packet->data_offset = cpu_to_le32(24); |
| packet->data_length = cpu_to_le32(data_length); |
| packet->remaining_data_length = cpu_to_le32(remaining_data_length); |
| packet->padding = 0; |
| |
| smbdirect_log_outgoing(sc, SMBDIRECT_LOG_INFO, |
| "DataOut: %s=%u, %s=%u, %s=0x%x, %s=%u, %s=%u, %s=%u\n", |
| "CreditsRequested", |
| le16_to_cpu(packet->credits_requested), |
| "CreditsGranted", |
| le16_to_cpu(packet->credits_granted), |
| "Flags", |
| le16_to_cpu(packet->flags), |
| "RemainingDataLength", |
| le32_to_cpu(packet->remaining_data_length), |
| "DataOffset", |
| le32_to_cpu(packet->data_offset), |
| "DataLength", |
| le32_to_cpu(packet->data_length)); |
| |
| ret = smbdirect_connection_post_send_io(sc, batch, msg); |
| if (ret) |
| goto err; |
| |
| /* |
| * From here msg is moved to send_ctx |
| * and we should not free it explicitly. |
| */ |
| |
| if (batch == &_batch) { |
| ret = smbdirect_connection_send_batch_flush(sc, batch, true); |
| if (ret) |
| goto flush_failed; |
| } |
| |
| return data_length; |
| err: |
| smbdirect_connection_free_send_io(msg); |
| flush_failed: |
| alloc_failed: |
| atomic_inc(&sc->send_io.credits.count); |
| credit_failed: |
| atomic_inc(&sc->send_io.lcredits.count); |
| lcredit_failed: |
| atomic_add(batch->credit, &sc->send_io.bcredits.count); |
| batch->credit = 0; |
| bcredit_failed: |
| return ret; |
| } |
| __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_single_iter); |
| |
| int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc) |
| { |
| /* |
| * As an optimization, we don't wait for individual I/O to finish |
| * before sending the next one. |
| * Send them all and wait for pending send count to get to 0 |
| * that means all the I/Os have been out and we are good to return |
| */ |
| |
| wait_event(sc->send_io.pending.zero_wait_queue, |
| atomic_read(&sc->send_io.pending.count) == 0 || |
| sc->status != SMBDIRECT_SOCKET_CONNECTED); |
| if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { |
| smbdirect_log_write(sc, SMBDIRECT_LOG_ERR, |
| "status=%s first_error=%1pe => %1pe\n", |
| smbdirect_socket_status_string(sc->status), |
| SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), |
| SMBDIRECT_DEBUG_ERR_PTR(-ENOTCONN)); |
| return -ENOTCONN; |
| } |
| |
| return 0; |
| } |
| __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_wait_zero_pending); |
| |
| int smbdirect_connection_send_iter(struct smbdirect_socket *sc, |
| struct iov_iter *iter, |
| unsigned int flags, |
| bool need_invalidate, |
| unsigned int remote_key) |
| { |
| const struct smbdirect_socket_parameters *sp = &sc->parameters; |
| struct smbdirect_send_batch batch; |
| int total_count = iov_iter_count(iter); |
| int ret; |
| int error = 0; |
| __be32 hdr; |
| |
| if (WARN_ONCE(flags, "unexpected flags=0x%x\n", flags)) |
| return -EINVAL; /* no flags support for now */ |
| |
| if (WARN_ON_ONCE(iov_iter_rw(iter) != ITER_SOURCE)) |
| return -EINVAL; /* It's a bug in upper layer to get there */ |
| |
| if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { |
| smbdirect_log_write(sc, SMBDIRECT_LOG_INFO, |
| "status=%s first_error=%1pe => %1pe\n", |
| smbdirect_socket_status_string(sc->status), |
| SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), |
| SMBDIRECT_DEBUG_ERR_PTR(-ENOTCONN)); |
| return -ENOTCONN; |
| } |
| |
| /* |
| * For now we expect the iter to have the full |
| * message, including a 4 byte length header. |
| */ |
| if (iov_iter_count(iter) <= 4) |
| return -EINVAL; |
| if (!copy_from_iter_full(&hdr, sizeof(hdr), iter)) |
| return -EFAULT; |
| if (iov_iter_count(iter) != be32_to_cpu(hdr)) |
| return -EINVAL; |
| |
| /* |
| * The size must fit into the negotiated |
| * fragmented send size. |
| */ |
| if (iov_iter_count(iter) > sp->max_fragmented_send_size) |
| return -EMSGSIZE; |
| |
| smbdirect_log_write(sc, SMBDIRECT_LOG_INFO, |
| "Sending (RDMA): length=%zu\n", |
| iov_iter_count(iter)); |
| |
| smbdirect_connection_send_batch_init(&batch, need_invalidate, remote_key); |
| while (iov_iter_count(iter)) { |
| ret = smbdirect_connection_send_single_iter(sc, |
| &batch, |
| iter, |
| flags, |
| iov_iter_count(iter)); |
| if (unlikely(ret < 0)) { |
| error = ret; |
| break; |
| } |
| } |
| |
| ret = smbdirect_connection_send_batch_flush(sc, &batch, true); |
| if (unlikely(ret && !error)) |
| error = ret; |
| |
| /* |
| * As an optimization, we don't wait for individual I/O to finish |
| * before sending the next one. |
| * Send them all and wait for pending send count to get to 0 |
| * that means all the I/Os have been out and we are good to return |
| */ |
| |
| ret = smbdirect_connection_send_wait_zero_pending(sc); |
| if (unlikely(ret && !error)) |
| error = ret; |
| |
| if (unlikely(error)) |
| return error; |
| |
| return total_count; |
| } |
| __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_send_iter); |
| |
| static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc) |
| { |
| struct smbdirect_send_io *msg = |
| container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); |
| struct smbdirect_socket *sc = msg->socket; |
| struct smbdirect_send_io *sibling, *next; |
| int lcredits = 0; |
| |
| smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_INFO, |
| "smbdirect_send_io completed. status='%s (%d)', opcode=%d\n", |
| ib_wc_status_msg(wc->status), wc->status, wc->opcode); |
| |
| if (unlikely(!(msg->wr.send_flags & IB_SEND_SIGNALED))) { |
| /* |
| * This happens when smbdirect_send_io is a sibling |
| * before the final message, it is signaled on |
| * error anyway, so we need to skip |
| * smbdirect_connection_free_send_io here, |
| * otherwise is will destroy the memory |
| * of the siblings too, which will cause |
| * use after free problems for the others |
| * triggered from ib_drain_qp(). |
| */ |
| if (wc->status != IB_WC_SUCCESS) |
| goto skip_free; |
| |
| /* |
| * This should not happen! |
| * But we better just close the |
| * connection... |
| */ |
| smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, |
| "unexpected send completion wc->status=%s (%d) wc->opcode=%d\n", |
| ib_wc_status_msg(wc->status), wc->status, wc->opcode); |
| smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); |
| return; |
| } |
| |
| /* |
| * Free possible siblings and then the main send_io |
| */ |
| list_for_each_entry_safe(sibling, next, &msg->sibling_list, sibling_list) { |
| list_del_init(&sibling->sibling_list); |
| smbdirect_connection_free_send_io(sibling); |
| lcredits += 1; |
| } |
| /* Note this frees wc->wr_cqe, but not wc */ |
| smbdirect_connection_free_send_io(msg); |
| lcredits += 1; |
| |
| if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_SEND))) { |
| skip_free: |
| if (wc->status != IB_WC_WR_FLUSH_ERR) |
| smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR, |
| "wc->status=%s (%d) wc->opcode=%d\n", |
| ib_wc_status_msg(wc->status), wc->status, wc->opcode); |
| smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); |
| return; |
| } |
| |
| atomic_add(lcredits, &sc->send_io.lcredits.count); |
| wake_up(&sc->send_io.lcredits.wait_queue); |
| |
| if (atomic_dec_and_test(&sc->send_io.pending.count)) |
| wake_up(&sc->send_io.pending.zero_wait_queue); |
| } |
| |
| static void smbdirect_connection_send_immediate_work(struct work_struct *work) |
| { |
| struct smbdirect_socket *sc = |
| container_of(work, struct smbdirect_socket, idle.immediate_work); |
| int ret; |
| |
| if (sc->status != SMBDIRECT_SOCKET_CONNECTED) |
| return; |
| |
| smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, |
| "send an empty message\n"); |
| sc->statistics.send_empty++; |
| ret = smbdirect_connection_send_single_iter(sc, NULL, NULL, 0, 0); |
| if (ret < 0) { |
| smbdirect_log_write(sc, SMBDIRECT_LOG_ERR, |
| "smbdirect_connection_send_single_iter ret=%1pe\n", |
| SMBDIRECT_DEBUG_ERR_PTR(ret)); |
| smbdirect_socket_schedule_cleanup(sc, ret); |
| } |
| } |
| |
| int smbdirect_connection_post_recv_io(struct smbdirect_recv_io *msg) |
| { |
| struct smbdirect_socket *sc = msg->socket; |
| const struct smbdirect_socket_parameters *sp = &sc->parameters; |
| struct ib_recv_wr recv_wr = { |
| .wr_cqe = &msg->cqe, |
| .sg_list = &msg->sge, |
| .num_sge = 1, |
| }; |
| int ret; |
| |
| if (unlikely(sc->first_error)) |
| return sc->first_error; |
| |
| msg->sge.addr = ib_dma_map_single(sc->ib.dev, |
| msg->packet, |
| sp->max_recv_size, |
| DMA_FROM_DEVICE); |
| ret = ib_dma_mapping_error(sc->ib.dev, msg->sge.addr); |
| if (ret) |
| return ret; |
| |
| msg->sge.length = sp->max_recv_size; |
| msg->sge.lkey = sc->ib.pd->local_dma_lkey; |
| |
| ret = ib_post_recv(sc->ib.qp, &recv_wr, NULL); |
| if (ret) { |
| smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, |
| "ib_post_recv failed ret=%d (%1pe)\n", |
| ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); |
| ib_dma_unmap_single(sc->ib.dev, |
| msg->sge.addr, |
| msg->sge.length, |
| DMA_FROM_DEVICE); |
| msg->sge.length = 0; |
| smbdirect_socket_schedule_cleanup(sc, ret); |
| } |
| |
| return ret; |
| } |
| |
| void smbdirect_connection_recv_io_done(struct ib_cq *cq, struct ib_wc *wc) |
| { |
| struct smbdirect_recv_io *recv_io = |
| container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); |
| struct smbdirect_socket *sc = recv_io->socket; |
| const struct smbdirect_socket_parameters *sp = &sc->parameters; |
| struct smbdirect_data_transfer *data_transfer; |
| int current_recv_credits; |
| u16 old_recv_credit_target; |
| u16 credits_requested; |
| u16 credits_granted; |
| u16 flags; |
| u32 data_offset; |
| u32 data_length; |
| u32 remaining_data_length; |
| |
| if (unlikely(wc->status != IB_WC_SUCCESS || WARN_ON_ONCE(wc->opcode != IB_WC_RECV))) { |
| if (wc->status != IB_WC_WR_FLUSH_ERR) |
| smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, |
| "wc->status=%s (%d) wc->opcode=%d\n", |
| ib_wc_status_msg(wc->status), wc->status, wc->opcode); |
| goto error; |
| } |
| |
| smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_INFO, |
| "recv_io=0x%p type=%d wc status=%s wc opcode %d byte_len=%d pkey_index=%u\n", |
| recv_io, sc->recv_io.expected, |
| ib_wc_status_msg(wc->status), wc->opcode, |
| wc->byte_len, wc->pkey_index); |
| |
| /* |
| * Reset timer to the keepalive interval in |
| * order to trigger our next keepalive message. |
| */ |
| sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; |
| mod_delayed_work(sc->workqueues.idle, &sc->idle.timer_work, |
| msecs_to_jiffies(sp->keepalive_interval_msec)); |
| |
| ib_dma_sync_single_for_cpu(sc->ib.dev, |
| recv_io->sge.addr, |
| recv_io->sge.length, |
| DMA_FROM_DEVICE); |
| |
| if (unlikely(wc->byte_len < |
| offsetof(struct smbdirect_data_transfer, padding))) { |
| smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, |
| "wc->byte_len=%u < %zu\n", |
| wc->byte_len, |
| offsetof(struct smbdirect_data_transfer, padding)); |
| goto error; |
| } |
| |
| data_transfer = (struct smbdirect_data_transfer *)recv_io->packet; |
| credits_requested = le16_to_cpu(data_transfer->credits_requested); |
| credits_granted = le16_to_cpu(data_transfer->credits_granted); |
| flags = le16_to_cpu(data_transfer->flags); |
| remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length); |
| data_offset = le32_to_cpu(data_transfer->data_offset); |
| data_length = le32_to_cpu(data_transfer->data_length); |
| |
| smbdirect_log_incoming(sc, SMBDIRECT_LOG_INFO, |
| "DataIn: %s=%u, %s=%u, %s=0x%x, %s=%u, %s=%u, %s=%u\n", |
| "CreditsRequested", |
| credits_requested, |
| "CreditsGranted", |
| credits_granted, |
| "Flags", |
| flags, |
| "RemainingDataLength", |
| remaining_data_length, |
| "DataOffset", |
| data_offset, |
| "DataLength", |
| data_length); |
| |
| if (unlikely(credits_requested == 0)) { |
| smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, |
| "invalid: credits_requested == 0\n"); |
| goto error; |
| } |
| |
| if (unlikely(data_offset % 8 != 0)) { |
| smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, |
| "invalid: data_offset=%u (0x%x) not aligned to 8\n", |
| data_offset, data_offset); |
| goto error; |
| } |
| |
| if (unlikely(wc->byte_len < data_offset || |
| (u64)wc->byte_len < (u64)data_offset + data_length)) { |
| smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, |
| "wc->byte_len=%u < date_offset=%u + data_length=%u\n", |
| wc->byte_len, data_offset, data_length); |
| goto error; |
| } |
| |
| if (unlikely(remaining_data_length > sp->max_fragmented_recv_size || |
| data_length > sp->max_fragmented_recv_size || |
| (u64)remaining_data_length + (u64)data_length > (u64)sp->max_fragmented_recv_size)) { |
| smbdirect_log_rdma_event(sc, SMBDIRECT_LOG_ERR, |
| "remaining_data_length=%u + data_length=%u > max_fragmented=%u\n", |
| remaining_data_length, data_length, sp->max_fragmented_recv_size); |
| goto error; |
| } |
| |
| if (data_length) { |
| if (sc->recv_io.reassembly.full_packet_received) |
| recv_io->first_segment = true; |
| |
| if (remaining_data_length) |
| sc->recv_io.reassembly.full_packet_received = false; |
| else |
| sc->recv_io.reassembly.full_packet_received = true; |
| } |
| |
| atomic_dec(&sc->recv_io.posted.count); |
| current_recv_credits = atomic_dec_return(&sc->recv_io.credits.count); |
| |
| /* |
| * We take the value from the peer, which is checked to be higher than 0, |
| * but we limit it to the max value we support in order to have |
| * the main logic simpler. |
| */ |
| old_recv_credit_target = sc->recv_io.credits.target; |
| sc->recv_io.credits.target = credits_requested; |
| sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, |
| sp->recv_credit_max); |
| if (credits_granted) { |
| atomic_add(credits_granted, &sc->send_io.credits.count); |
| /* |
| * We have new send credits granted from remote peer |
| * If any sender is waiting for credits, unblock it |
| */ |
| wake_up(&sc->send_io.credits.wait_queue); |
| } |
| |
| /* Send an immediate response right away if requested */ |
| if (flags & SMBDIRECT_FLAG_RESPONSE_REQUESTED) { |
| smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, |
| "schedule send of immediate response\n"); |
| queue_work(sc->workqueues.immediate, &sc->idle.immediate_work); |
| } |
| |
| /* |
| * If this is a packet with data playload place the data in |
| * reassembly queue and wake up the reading thread |
| */ |
| if (data_length) { |
| if (current_recv_credits <= (sc->recv_io.credits.target / 4) || |
| sc->recv_io.credits.target > old_recv_credit_target) |
| queue_work(sc->workqueues.refill, &sc->recv_io.posted.refill_work); |
| |
| smbdirect_connection_reassembly_append_recv_io(sc, recv_io, data_length); |
| wake_up(&sc->recv_io.reassembly.wait_queue); |
| } else |
| smbdirect_connection_put_recv_io(recv_io); |
| |
| return; |
| |
| error: |
| /* |
| * Make sure smbdirect_connection_put_recv_io() does not |
| * start recv_io.posted.refill_work. |
| */ |
| disable_work(&sc->recv_io.posted.refill_work); |
| smbdirect_connection_put_recv_io(recv_io); |
| smbdirect_socket_schedule_cleanup(sc, -ECONNABORTED); |
| } |
| |
| int smbdirect_connection_recv_io_refill(struct smbdirect_socket *sc) |
| { |
| int missing; |
| int posted = 0; |
| |
| if (unlikely(sc->first_error)) |
| return sc->first_error; |
| |
| /* |
| * Find out how much smbdirect_recv_io buffers we should post. |
| * |
| * Note that sc->recv_io.credits.target is the value |
| * from the peer and it can in theory change over time, |
| * but it is forced to be at least 1 and at max |
| * sp->recv_credit_max. |
| * |
| * So it can happen that missing will be lower than 0, |
| * which means the peer has recently lowered its desired |
| * target, while be already granted a higher number of credits. |
| * |
| * Note 'posted' is the number of smbdirect_recv_io buffers |
| * posted within this function, while sc->recv_io.posted.count |
| * is the overall value of posted smbdirect_recv_io buffers. |
| * |
| * We try to post as much buffers as missing, but |
| * this is limited if a lot of smbdirect_recv_io buffers |
| * are still in the sc->recv_io.reassembly.list instead of |
| * the sc->recv_io.free.list. |
| * |
| */ |
| missing = (int)sc->recv_io.credits.target - atomic_read(&sc->recv_io.posted.count); |
| while (posted < missing) { |
| struct smbdirect_recv_io *recv_io; |
| int ret; |
| |
| /* |
| * It's ok if smbdirect_connection_get_recv_io() |
| * returns NULL, it means smbdirect_recv_io structures |
| * are still be in the reassembly.list. |
| */ |
| recv_io = smbdirect_connection_get_recv_io(sc); |
| if (!recv_io) |
| break; |
| |
| recv_io->first_segment = false; |
| |
| ret = smbdirect_connection_post_recv_io(recv_io); |
| if (ret) { |
| smbdirect_log_rdma_recv(sc, SMBDIRECT_LOG_ERR, |
| "smbdirect_connection_post_recv_io failed rc=%d (%1pe)\n", |
| ret, SMBDIRECT_DEBUG_ERR_PTR(ret)); |
| smbdirect_connection_put_recv_io(recv_io); |
| return ret; |
| } |
| |
| atomic_inc(&sc->recv_io.posted.count); |
| posted += 1; |
| } |
| |
| /* If nothing was posted we're done */ |
| if (posted == 0) |
| return 0; |
| |
| atomic_add(posted, &sc->recv_io.credits.available); |
| |
| /* |
| * If the last send credit is waiting for credits |
| * it can grant we need to wake it up |
| */ |
| if (atomic_read(&sc->send_io.bcredits.count) == 0 && |
| atomic_read(&sc->send_io.credits.count) == 0) |
| wake_up(&sc->send_io.credits.wait_queue); |
| |
| /* |
| * If we posted at least one smbdirect_recv_io buffer, |
| * we need to inform the peer about it and grant |
| * additional credits. |
| * |
| * However there is one case where we don't want to |
| * do that. |
| * |
| * If only a single credit was missing before |
| * reaching the requested target, we should not |
| * post an immediate send, as that would cause |
| * endless ping pong once a keep alive exchange |
| * is started. |
| * |
| * However if sc->recv_io.credits.target is only 1, |
| * the peer has no credit left and we need to |
| * grant the credit anyway. |
| */ |
| if (missing == 1 && sc->recv_io.credits.target != 1) |
| return 0; |
| |
| return posted; |
| } |
| |
| static void smbdirect_connection_recv_io_refill_work(struct work_struct *work) |
| { |
| struct smbdirect_socket *sc = |
| container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); |
| int posted; |
| |
| posted = smbdirect_connection_recv_io_refill(sc); |
| if (unlikely(posted < 0)) { |
| smbdirect_socket_schedule_cleanup(sc, posted); |
| return; |
| } |
| if (posted > 0) { |
| smbdirect_log_keep_alive(sc, SMBDIRECT_LOG_INFO, |
| "schedule send of an empty message\n"); |
| queue_work(sc->workqueues.immediate, &sc->idle.immediate_work); |
| } |
| } |
| |
| int smbdirect_connection_recvmsg(struct smbdirect_socket *sc, |
| struct msghdr *msg, |
| unsigned int flags) |
| { |
| struct smbdirect_recv_io *response; |
| struct smbdirect_data_transfer *data_transfer; |
| size_t size = iov_iter_count(&msg->msg_iter); |
| int to_copy, to_read, data_read, offset; |
| u32 data_length, remaining_data_length, data_offset; |
| int ret; |
| |
| if (WARN_ONCE(flags, "unexpected flags=0x%x\n", flags)) |
| return -EINVAL; /* no flags support for now */ |
| |
| if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) != ITER_DEST)) |
| return -EINVAL; /* It's a bug in upper layer to get there */ |
| |
| again: |
| if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { |
| smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, |
| "status=%s first_error=%1pe => %1pe\n", |
| smbdirect_socket_status_string(sc->status), |
| SMBDIRECT_DEBUG_ERR_PTR(sc->first_error), |
| SMBDIRECT_DEBUG_ERR_PTR(-ENOTCONN)); |
| return -ENOTCONN; |
| } |
| |
| /* |
| * No need to hold the reassembly queue lock all the time as we are |
| * the only one reading from the front of the queue. The transport |
| * may add more entries to the back of the queue at the same time |
| */ |
| smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, |
| "size=%zd sc->recv_io.reassembly.data_length=%d\n", |
| size, sc->recv_io.reassembly.data_length); |
| if (sc->recv_io.reassembly.data_length >= size) { |
| int queue_length; |
| int queue_removed = 0; |
| unsigned long flags; |
| |
| /* |
| * Need to make sure reassembly_data_length is read before |
| * reading reassembly_queue_length and calling |
| * smbdirect_connection_reassembly_first_recv_io. This call is lock free |
| * as we never read at the end of the queue which are being |
| * updated in SOFTIRQ as more data is received |
| */ |
| virt_rmb(); |
| queue_length = sc->recv_io.reassembly.queue_length; |
| data_read = 0; |
| to_read = size; |
| offset = sc->recv_io.reassembly.first_entry_offset; |
| while (data_read < size) { |
| response = smbdirect_connection_reassembly_first_recv_io(sc); |
| data_transfer = (void *)response->packet; |
| data_length = le32_to_cpu(data_transfer->data_length); |
| remaining_data_length = |
| le32_to_cpu( |
| data_transfer->remaining_data_length); |
| data_offset = le32_to_cpu(data_transfer->data_offset); |
| |
| /* |
| * The upper layer expects RFC1002 length at the |
| * beginning of the payload. Return it to indicate |
| * the total length of the packet. This minimize the |
| * change to upper layer packet processing logic. This |
| * will be eventually remove when an intermediate |
| * transport layer is added |
| */ |
| if (response->first_segment && size == 4) { |
| unsigned int rfc1002_len = |
| data_length + remaining_data_length; |
| __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); |
| |
| if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr), |
| &msg->msg_iter) != sizeof(rfc1002_hdr)) |
| return -EFAULT; |
| data_read = 4; |
| response->first_segment = false; |
| smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, |
| "returning rfc1002 length %d\n", |
| rfc1002_len); |
| goto read_rfc1002_done; |
| } |
| |
| to_copy = min_t(int, data_length - offset, to_read); |
| if (copy_to_iter((u8 *)data_transfer + data_offset + offset, |
| to_copy, &msg->msg_iter) != to_copy) |
| return -EFAULT; |
| |
| /* move on to the next buffer? */ |
| if (to_copy == data_length - offset) { |
| queue_length--; |
| /* |
| * No need to lock if we are not at the |
| * end of the queue |
| */ |
| if (queue_length) |
| list_del(&response->list); |
| else { |
| spin_lock_irqsave( |
| &sc->recv_io.reassembly.lock, flags); |
| list_del(&response->list); |
| spin_unlock_irqrestore( |
| &sc->recv_io.reassembly.lock, flags); |
| } |
| queue_removed++; |
| sc->statistics.dequeue_reassembly_queue++; |
| smbdirect_connection_put_recv_io(response); |
| offset = 0; |
| smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, |
| "smbdirect_connection_put_recv_io offset=0\n"); |
| } else |
| offset += to_copy; |
| |
| to_read -= to_copy; |
| data_read += to_copy; |
| |
| smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, |
| "memcpy %d bytes len-ofs=%u => todo=%u done=%u ofs=%u\n", |
| to_copy, data_length - offset, |
| to_read, data_read, offset); |
| } |
| |
| spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); |
| sc->recv_io.reassembly.data_length -= data_read; |
| sc->recv_io.reassembly.queue_length -= queue_removed; |
| spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); |
| |
| sc->recv_io.reassembly.first_entry_offset = offset; |
| smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, |
| "returning data_read=%d reassembly_length=%d first_ofs=%u\n", |
| data_read, sc->recv_io.reassembly.data_length, |
| sc->recv_io.reassembly.first_entry_offset); |
| read_rfc1002_done: |
| return data_read; |
| } |
| |
| smbdirect_log_read(sc, SMBDIRECT_LOG_INFO, |
| "wait_event on more data\n"); |
| ret = wait_event_interruptible(sc->recv_io.reassembly.wait_queue, |
| sc->recv_io.reassembly.data_length >= size || |
| sc->status != SMBDIRECT_SOCKET_CONNECTED); |
| /* Don't return any data if interrupted */ |
| if (ret) |
| return ret; |
| |
| goto again; |
| } |
| __SMBDIRECT_EXPORT_SYMBOL__(smbdirect_connection_recvmsg); |
| |
| static bool smbdirect_map_sges_single_page(struct smbdirect_map_sges *state, |
| struct page *page, size_t off, size_t len) |
| { |
| struct ib_sge *sge; |
| u64 addr; |
| |
| if (state->num_sge >= state->max_sge) |
| return false; |
| |
| addr = ib_dma_map_page(state->device, page, |
| off, len, state->direction); |
| if (ib_dma_mapping_error(state->device, addr)) |
| return false; |
| |
| sge = &state->sge[state->num_sge++]; |
| sge->addr = addr; |
| sge->length = len; |
| sge->lkey = state->local_dma_lkey; |
| |
| return true; |
| } |
| |
| /* |
| * Extract page fragments from a BVEC-class iterator and add them to an ib_sge |
| * list. The pages are not pinned. |
| */ |
| static ssize_t smbdirect_map_sges_from_bvec(struct iov_iter *iter, |
| struct smbdirect_map_sges *state, |
| ssize_t maxsize) |
| { |
| const struct bio_vec *bv = iter->bvec; |
| unsigned long start = iter->iov_offset; |
| unsigned int i; |
| ssize_t ret = 0; |
| |
| for (i = 0; i < iter->nr_segs; i++) { |
| size_t off, len; |
| bool ok; |
| |
| len = bv[i].bv_len; |
| if (start >= len) { |
| start -= len; |
| continue; |
| } |
| |
| len = min_t(size_t, maxsize, len - start); |
| off = bv[i].bv_offset + start; |
| |
| ok = smbdirect_map_sges_single_page(state, |
| bv[i].bv_page, |
| off, |
| len); |
| if (!ok) |
| return -EIO; |
| |
| ret += len; |
| maxsize -= len; |
| if (state->num_sge >= state->max_sge || maxsize <= 0) |
| break; |
| start = 0; |
| } |
| |
| if (ret > 0) |
| iov_iter_advance(iter, ret); |
| return ret; |
| } |
| |
| /* |
| * Extract fragments from a KVEC-class iterator and add them to an ib_sge list. |
| * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers. |
| * The pages are not pinned. |
| */ |
| static ssize_t smbdirect_map_sges_from_kvec(struct iov_iter *iter, |
| struct smbdirect_map_sges *state, |
| ssize_t maxsize) |
| { |
| const struct kvec *kv = iter->kvec; |
| unsigned long start = iter->iov_offset; |
| unsigned int i; |
| ssize_t ret = 0; |
| |
| for (i = 0; i < iter->nr_segs; i++) { |
| struct page *page; |
| unsigned long kaddr; |
| size_t off, len, seg; |
| |
| len = kv[i].iov_len; |
| if (start >= len) { |
| start -= len; |
| continue; |
| } |
| |
| kaddr = (unsigned long)kv[i].iov_base + start; |
| off = kaddr & ~PAGE_MASK; |
| len = min_t(size_t, maxsize, len - start); |
| kaddr &= PAGE_MASK; |
| |
| maxsize -= len; |
| do { |
| bool ok; |
| |
| seg = min_t(size_t, len, PAGE_SIZE - off); |
| |
| if (is_vmalloc_or_module_addr((void *)kaddr)) |
| page = vmalloc_to_page((void *)kaddr); |
| else |
| page = virt_to_page((void *)kaddr); |
| |
| ok = smbdirect_map_sges_single_page(state, page, off, seg); |
| if (!ok) |
| return -EIO; |
| |
| ret += seg; |
| len -= seg; |
| kaddr += PAGE_SIZE; |
| off = 0; |
| } while (len > 0 && state->num_sge < state->max_sge); |
| |
| if (state->num_sge >= state->max_sge || maxsize <= 0) |
| break; |
| start = 0; |
| } |
| |
| if (ret > 0) |
| iov_iter_advance(iter, ret); |
| return ret; |
| } |
| |
| /* |
| * Extract folio fragments from a FOLIOQ-class iterator and add them to an |
| * ib_sge list. The folios are not pinned. |
| */ |
| static ssize_t smbdirect_map_sges_from_folioq(struct iov_iter *iter, |
| struct smbdirect_map_sges *state, |
| ssize_t maxsize) |
| { |
| const struct folio_queue *folioq = iter->folioq; |
| unsigned int slot = iter->folioq_slot; |
| ssize_t ret = 0; |
| size_t offset = iter->iov_offset; |
| |
| if (WARN_ON_ONCE(!folioq)) |
| return -EIO; |
| |
| if (slot >= folioq_nr_slots(folioq)) { |
| folioq = folioq->next; |
| if (WARN_ON_ONCE(!folioq)) |
| return -EIO; |
| slot = 0; |
| } |
| |
| do { |
| struct folio *folio = folioq_folio(folioq, slot); |
| size_t fsize = folioq_folio_size(folioq, slot); |
| |
| if (offset < fsize) { |
| size_t part = umin(maxsize, fsize - offset); |
| bool ok; |
| |
| ok = smbdirect_map_sges_single_page(state, |
| folio_page(folio, 0), |
| offset, |
| part); |
| if (!ok) |
| return -EIO; |
| |
| offset += part; |
| ret += part; |
| maxsize -= part; |
| } |
| |
| if (offset >= fsize) { |
| offset = 0; |
| slot++; |
| if (slot >= folioq_nr_slots(folioq)) { |
| if (!folioq->next) { |
| WARN_ON_ONCE(ret < iter->count); |
| break; |
| } |
| folioq = folioq->next; |
| slot = 0; |
| } |
| } |
| } while (state->num_sge < state->max_sge && maxsize > 0); |
| |
| iter->folioq = folioq; |
| iter->folioq_slot = slot; |
| iter->iov_offset = offset; |
| iter->count -= ret; |
| return ret; |
| } |
| |
| /* |
| * Extract page fragments from up to the given amount of the source iterator |
| * and build up an ib_sge list that refers to all of those bits. The ib_sge list |
| * is appended to, up to the maximum number of elements set in the parameter |
| * block. |
| * |
| * The extracted page fragments are not pinned or ref'd in any way; if an |
| * IOVEC/UBUF-type iterator is to be used, it should be converted to a |
| * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some |
| * way. |
| */ |
| static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len, |
| struct smbdirect_map_sges *state) |
| { |
| ssize_t ret; |
| size_t before = state->num_sge; |
| |
| if (WARN_ON_ONCE(iov_iter_rw(iter) != ITER_SOURCE)) |
| return -EIO; |
| |
| switch (iov_iter_type(iter)) { |
| case ITER_BVEC: |
| ret = smbdirect_map_sges_from_bvec(iter, state, len); |
| break; |
| case ITER_KVEC: |
| ret = smbdirect_map_sges_from_kvec(iter, state, len); |
| break; |
| case ITER_FOLIOQ: |
| ret = smbdirect_map_sges_from_folioq(iter, state, len); |
| break; |
| default: |
| WARN_ONCE(1, "iov_iter_type[%u]\n", iov_iter_type(iter)); |
| return -EIO; |
| } |
| |
| if (ret < 0) { |
| while (state->num_sge > before) { |
| struct ib_sge *sge = &state->sge[state->num_sge--]; |
| |
| ib_dma_unmap_page(state->device, |
| sge->addr, |
| sge->length, |
| state->direction); |
| } |
| } |
| |
| return ret; |
| } |