io_uring: fix nvme's 32b cqes on mixed cq

The nvme uring_cmd only uses 32b CQEs. If the ring uses a mixed CQ, then
we need to make sure we flag the completion as a 32b CQE.

On the other hand, if nvme uring_cmd was using a dedicated 32b CQE, the
posting was missing the extra memcpy because it only applied to bit CQEs
on a mixed CQ.

Fixes: e26dca67fde1943 ("io_uring: add support for IORING_SETUP_CQE_MIXED")
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 6b3ac8a..e28bb91 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -410,7 +410,7 @@ static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
 
 	if (pdu->bio)
 		blk_rq_unmap_user(pdu->bio);
-	io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags);
+	io_uring_cmd_done32(ioucmd, pdu->status, pdu->result, issue_flags);
 }
 
 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index c8185f5..02d50f0 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -56,8 +56,8 @@ int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
  * Note: the caller should never hard code @issue_flags and is only allowed
  * to pass the mask provided by the core io_uring code.
  */
-void io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, u64 res2,
-			unsigned issue_flags);
+void __io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, u64 res2,
+			 unsigned issue_flags, bool is_cqe32);
 
 void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
 			    io_uring_cmd_tw_t task_work_cb,
@@ -104,8 +104,8 @@ static inline int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
 {
 	return -EOPNOTSUPP;
 }
-static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret,
-		u64 ret2, unsigned issue_flags)
+static inline void __io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret,
+		u64 ret2, unsigned issue_flags, bool is_cqe32)
 {
 }
 static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
@@ -159,6 +159,18 @@ static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd)
 	return cmd_to_io_kiocb(cmd)->ctx;
 }
 
+static inline void io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret,
+				     u64 res2, unsigned issue_flags)
+{
+	return __io_uring_cmd_done(ioucmd, ret, res2, issue_flags, false);
+}
+
+static inline void io_uring_cmd_done32(struct io_uring_cmd *ioucmd, s32 ret,
+				       u64 res2, unsigned issue_flags)
+{
+	return __io_uring_cmd_done(ioucmd, ret, res2, issue_flags, true);
+}
+
 int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
 			    void (*release)(void *), unsigned int index,
 			    unsigned int issue_flags);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index e92099d..af4c113 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -275,7 +275,7 @@ static __always_inline bool io_fill_cqe_req(struct io_ring_ctx *ctx,
 		return false;
 
 	memcpy(cqe, &req->cqe, sizeof(*cqe));
-	if (is_cqe32) {
+	if (ctx->flags & IORING_SETUP_CQE32 || is_cqe32) {
 		memcpy(cqe->big_cqe, &req->big_cqe, sizeof(*cqe));
 		memset(&req->big_cqe, 0, sizeof(req->big_cqe));
 	}
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 5562e84..a688c9f 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -151,8 +151,8 @@ static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
  * Called by consumers of io_uring_cmd, if they originally returned
  * -EIOCBQUEUED upon receiving the command.
  */
-void io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret, u64 res2,
-		       unsigned issue_flags)
+void __io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret, u64 res2,
+		       unsigned issue_flags, bool is_cqe32)
 {
 	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
 
@@ -165,8 +165,11 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret, u64 res2,
 		req_set_fail(req);
 
 	io_req_set_res(req, ret, 0);
-	if (req->ctx->flags & IORING_SETUP_CQE32)
+	if (is_cqe32) {
+		if (req->ctx->flags & IORING_SETUP_CQE_MIXED)
+			req->cqe.flags |= IORING_CQE_F_32;
 		io_req_set_cqe32_extra(req, res2, 0);
+	}
 	io_req_uring_cleanup(req, issue_flags);
 	if (req->ctx->flags & IORING_SETUP_IOPOLL) {
 		/* order with io_iopoll_req_issued() checking ->iopoll_complete */
@@ -180,7 +183,7 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, s32 ret, u64 res2,
 		io_req_task_work_add(req);
 	}
 }
-EXPORT_SYMBOL_GPL(io_uring_cmd_done);
+EXPORT_SYMBOL_GPL(__io_uring_cmd_done);
 
 int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {