netfs: Support encryption on Unbuffered/DIO write Support unbuffered and direct I/O writes to an encrypted file. This may require making an RMW cycle if the write is not appropriately aligned with respect to the crypto blocks. Signed-off-by: David Howells <dhowells@redhat.com> cc: Paulo Alcantara <pc@manguebit.org> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org

diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 3d108f5..4866304 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c

@@ -10,26 +10,251 @@
 #include "internal.h"
 
 /*
+ * Perform a read to a buffer from the server, slicing up the region to be read
+ * according to the network rsize.
+ */
+static bool netfs_rmw_read_one(struct netfs_io_request *rreq, struct bvecq *bq)
+{
+	struct netfs_io_stream *stream = &rreq->io_streams[0];
+	size_t len = 0;
+	int ret = 0;
+
+	for (int i = 0; i < bq->nr_slots; i++)
+		len += bq->bv[i].bv_len;
+
+	rreq->start		= bq->fpos;
+	rreq->len		= len;
+	stream->issue_from	= bq->fpos;
+	stream->buffered	= len;
+
+	do {
+		struct netfs_io_subrequest *subreq;
+
+		subreq = netfs_alloc_subrequest(rreq, NETFS_DOWNLOAD_FROM_SERVER);
+		if (!subreq) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		subreq->start	= stream->issue_from;
+		subreq->len	= stream->buffered;
+
+		spin_lock(&rreq->lock);
+		list_add_tail(&subreq->rreq_link, &stream->subrequests);
+		trace_netfs_sreq(subreq, netfs_sreq_trace_added);
+		spin_unlock(&rreq->lock);
+
+		netfs_stat(&netfs_n_rh_download);
+		rreq->netfs_ops->issue_read(subreq);
+
+		cond_resched();
+	} while (stream->buffered > 0);
+
+	return ret;
+}
+
+/*
+ * Perform the read side of an RMW write.  We're supplied with a chain of one
+ * or two buffers into which we should read directly.
+ */
+static ssize_t netfs_rmw_read(struct netfs_io_request *wreq, struct bvecq *bq)
+{
+	struct netfs_io_request *rreq;
+	struct netfs_io_stream *stream;
+	ssize_t ret;
+
+	_enter("RMW:R=%x %llx", wreq->debug_id, bq->fpos);
+
+	rreq = netfs_alloc_request(wreq->mapping, NULL, bq->fpos, 0, NETFS_RMW_READ);
+	if (IS_ERR(rreq))
+		return PTR_ERR(rreq);
+	stream = &rreq->io_streams[0];
+
+	stream->dispatch_cursor.bvecq = bvecq_get(bq);
+	stream->dispatch_cursor.slot = 0;
+	stream->dispatch_cursor.offset = 0;
+
+	bvecq_pos_set(&rreq->encrypt_cursor, &stream->dispatch_cursor);
+	bvecq_pos_set(&rreq->bounce_copy, &stream->dispatch_cursor);
+	bvecq_pos_set(&rreq->bounce_collect, &stream->dispatch_cursor);
+
+	__set_bit(NETFS_RREQ_CONTENT_ENCRYPTION, &rreq->flags);
+
+	netfs_rmw_read_one(rreq, bq);
+	if (bq->next)
+		netfs_rmw_read_one(rreq, bq->next);
+
+	ret = netfs_wait_for_read(rreq);
+	netfs_put_request(rreq, netfs_rreq_trace_put_return);
+	return ret;
+}
+
+/*
+ * Read gaps at either end of the bounce buffer that need to be filled for an
+ * RMW cycle.
+ */
+static ssize_t netfs_unbuffered_rmw(struct netfs_io_request *wreq,
+				    struct netfs_io_subrequest *subreq,
+				    unsigned long long to,
+				    unsigned long long end)
+{
+	struct bvecq *before = NULL, *after = NULL;
+	size_t bsize = wreq->crypto_bsize;
+	int ret;
+
+	_enter("%llx,%llx", to, end);
+
+	/* Build a buffer chain to cover the gaps.  If we have two gaps, they
+	 * must be discontiguous and so we will need two separate bvecqs for
+	 * that; however, if the entire write spans at most two pages, just do
+	 * one read for both gaps plus the middle.
+	 */
+	if (subreq->start < wreq->start) {
+		before = bvecq_alloc_one(2, GFP_KERNEL);
+		if (!before)
+			return -ENOMEM;
+		before->fpos = subreq->start;
+		before->bv[0] = wreq->encrypt_cursor.bvecq->bv[wreq->encrypt_cursor.slot];
+		before->bv[0].bv_offset += wreq->encrypt_cursor.offset;
+		before->bv[0].bv_len = bsize;
+		bvecq_filled_to(before, 1);
+	}
+
+	if (to == end && subreq->start + subreq->len < to) {
+		size_t part = end - subreq->start;
+
+		if (before && part <= 2 * PAGE_SIZE) {
+			struct bvecq *bq;
+			size_t page0 = PAGE_SIZE - before->bv[0].bv_offset;
+			int slot;
+
+			if (part <= page0) {
+				before->bv[0].bv_len = part;
+				bvecq_filled_to(before, 1);
+				goto do_it;
+			}
+
+			bq = wreq->encrypt_cursor.bvecq;
+			slot = wreq->encrypt_cursor.slot + 1;
+			if (slot > bq->nr_slots) {
+				bq = bq->next;
+				slot = 0;
+			}
+
+			before->bv[0].bv_len = page0;
+			before->bv[1] = bq->bv[slot];
+			before->bv[1].bv_len = part - page0;
+			bvecq_filled_to(before, 2);
+			goto do_it;
+		}
+
+		after = bvecq_alloc_one(1, GFP_KERNEL);
+		if (!after) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		after->fpos = to - bsize;
+		after->bv[0] = wreq->bounce_alloc.bvecq->bv[wreq->bounce_alloc.slot];
+		after->bv[0].bv_offset = to & (PAGE_SIZE - 1);
+		after->bv[0].bv_len = bsize;
+		bvecq_filled_to(after, 1);
+	}
+
+	if (before && after) {
+		before->next = after;
+		after->prev = before;
+		after->discontig = true;
+	}
+
+do_it:
+	ret = netfs_rmw_read(wreq, before ?: after);
+
+out:
+	bvecq_put(before ?: after);
+	return ret;
+}
+
+/*
+ * Load data into the bounce buffer and encrypt it.
+ */
+static int netfs_unbuffered_load_bounce(struct netfs_io_subrequest *subreq)
+{
+	struct netfs_io_request *wreq = subreq->rreq;
+	struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
+	unsigned long long to, end;
+	ssize_t got;
+	size_t amount = subreq->len;
+	int ret;
+
+	/* Expand the bounce buffer as needed. */
+	to = round_up(subreq->start + subreq->len, wreq->crypto_bsize);
+	end = round_up(wreq->start + wreq->len, wreq->crypto_bsize);
+
+	if (wreq->bounce_alloc_to < to) {
+		ret = bvecq_buffer_add_space(&wreq->bounce_alloc,
+					     &wreq->bounce_alloc_to,
+					     to, end, false, GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Perform RMW if there are gaps to be filled. */
+	if (stream->issue_from < wreq->start ||
+	    (to == end && subreq->start + subreq->len < to)) {
+		ret = netfs_unbuffered_rmw(wreq, subreq, to, end);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Copy in the data.  We need to work around any RMW gaps. */
+	if (subreq->start < wreq->start + wreq->submitted)
+		amount -= wreq->submitted;
+	if (amount > wreq->len - wreq->submitted)
+		amount = wreq->len - wreq->submitted;
+
+	got = bvecq_copy_to_bvecq(&wreq->copy_cursor, &wreq->bounce_copy, amount);
+	if (got != amount)
+		return -EFAULT;
+
+	/* And then encrypt the data in-place. */
+	return netfs_encrypt(wreq, to, GFP_KERNEL);
+}
+
+/*
  * Prepare the buffer for an unbuffered/DIO write.
  */
 int netfs_prepare_unbuffered_write_buffer(struct netfs_io_subrequest *subreq,
 					  unsigned int max_segs, bool copy)
 {
-	struct netfs_io_stream *stream = &subreq->rreq->io_streams[subreq->stream_nr];
+	struct netfs_io_request *wreq = subreq->rreq;
+	struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
 	ssize_t got;
 	size_t len;
+	int ret;
+
+	len = subreq->len;
+	if (test_bit(NETFS_RREQ_CONTENT_ENCRYPTION, &wreq->flags) &&
+	    len >= wreq->crypto_bsize)
+		len = round_down(len, wreq->crypto_bsize);
+
+	if (test_bit(NETFS_RREQ_USE_BOUNCE_BUFFER, &wreq->flags)) {
+		ret = netfs_unbuffered_load_bounce(subreq);
+		if (ret < 0)
+			return ret;
+	}
 
 	bvecq_pos_set(&subreq->dispatch_pos, &stream->dispatch_cursor);
+
 	if (copy) {
-		got = bvecq_extract(&stream->dispatch_cursor, subreq->len, max_segs,
+		got = bvecq_extract(&stream->dispatch_cursor, len, max_segs,
 				    &subreq->content.bvecq);
 		if (got < 0)
 			return -ENOMEM;
 		len = got;
 	} else {
 		bvecq_pos_set(&subreq->content, &stream->dispatch_cursor);
-		len = bvecq_slice(&stream->dispatch_cursor, subreq->len, max_segs,
-				  &subreq->nr_segs);
+
+		len = bvecq_slice(&stream->dispatch_cursor, len, max_segs, &subreq->nr_segs);
 	}
 
 	if (len < subreq->len) {
@@ -143,12 +368,11 @@ static int netfs_unbuffered_write(struct netfs_io_request *wreq)
 
 	stream->issue_from = wreq->start;
 	stream->buffered = wreq->len;
-	bvecq_pos_set(&stream->dispatch_cursor, &wreq->load_cursor);
-	bvecq_pos_set(&wreq->collect_cursor, &stream->dispatch_cursor);
 
 	if (wreq->origin == NETFS_DIO_WRITE)
 		inode_dio_begin(wreq->inode);
 
+
 	for (;;) {
 		bool retry = false;
 
@@ -243,20 +467,15 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
 					   struct netfs_group *netfs_group)
 {
 	struct netfs_io_request *wreq;
+	struct netfs_io_stream *stream;
 	unsigned long long start = iocb->ki_pos;
 	unsigned long long end = start + iov_iter_count(iter);
-	ssize_t ret;
+	ssize_t ret, n;
 	size_t len = iov_iter_count(iter);
 	bool async = !is_sync_kiocb(iocb);
 
 	_enter("");
 
-	/* We're going to need a bounce buffer if what we transmit is going to
-	 * be different in some way to the source buffer, e.g. because it gets
-	 * encrypted/compressed or because it needs expanding to a block size.
-	 */
-	// TODO
-
 	_debug("uw %llx-%llx", start, end);
 
 	wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start,
@@ -266,33 +485,81 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
 		return PTR_ERR(wreq);
 
 	wreq->len = iov_iter_count(iter);
-	wreq->io_streams[0].avail = true;
+	wreq->submitted = 0;
+	stream = &wreq->io_streams[0];
+	stream->avail = true;
 	trace_netfs_write(wreq, (iocb->ki_flags & IOCB_DIRECT ?
 				 netfs_write_trace_dio_write :
 				 netfs_write_trace_unbuffered_write));
 
-	{
-		/* If this is an async op and we're not using a bounce buffer,
-		 * we have to save the source buffer as the iterator is only
-		 * good until we return.  In such a case, extract an iterator
-		 * to represent as much of the the output buffer as we can
-		 * manage.  Note that the extraction may shorten the request.
-		 */
-		ssize_t n = netfs_extract_iter(iter, len, INT_MAX, iocb->ki_pos,
-					       &wreq->load_cursor.bvecq, 0);
-
-		if (n < 0) {
-			ret = n;
-			goto error_put;
-		}
-		wreq->len = n;
-		_debug("dio-write %zx/%zx %u/%u",
-		       n, len, wreq->load_cursor.bvecq->nr_slots,
-		       wreq->load_cursor.bvecq->max_slots);
+	/* If we're going to do encryption or compression, we're going to need
+	 * a bounce buffer.
+	 */
+	if (test_bit(NETFS_RREQ_CONTENT_ENCRYPTION, &wreq->flags)) {
+		__set_bit(NETFS_RREQ_USE_BOUNCE_BUFFER, &wreq->flags);
+		__set_bit(NETFS_RREQ_CRYPT_IN_PLACE, &wreq->flags);
 	}
 
-	/* Copy the data into the bounce buffer and encrypt it. */
-	// TODO
+	/* Transcribe the source buffer into a bvecq chain.  We need this for
+	 * async writes because the source iterator but we also use it for
+	 * unencrypted sync writes as it gets passed to the filesystem in this
+	 * form.
+	 *
+	 * We extract as much of the buffer as we can manage, but this may
+	 * shorten the request.
+	 */
+	n = netfs_extract_iter(iter, len, INT_MAX, iocb->ki_pos,
+			       &wreq->load_cursor.bvecq, 0);
+	if (n < 0) {
+		ret = n;
+		goto error_put;
+	}
+	wreq->len = n;
+	_debug("dio-write %zx/%zx %u/%u",
+	       n, len, wreq->load_cursor.bvecq->nr_slots,
+	       wreq->load_cursor.bvecq->max_slots);
+
+	/* Set up the bounce buffer if we need it.  Allow for padding the
+	 * request out to the crypo block size and allocate at least one bvecq
+	 * into it.
+	 */
+	if (test_bit(NETFS_RREQ_USE_BOUNCE_BUFFER, &wreq->flags)) {
+		size_t bsize = wreq->crypto_bsize;
+		size_t gap;
+
+		bvecq_pos_set(&wreq->copy_cursor, &wreq->load_cursor);
+
+		wreq->bounce_alloc_to = round_down(wreq->start, bsize);
+		atomic64_set(&wreq->encrypted_to, wreq->bounce_alloc_to);
+		gap = wreq->start - wreq->bounce_alloc_to;
+
+		stream->issue_from = wreq->bounce_alloc_to;
+		stream->buffered = round_up(wreq->len + gap, bsize);
+
+		ret = bvecq_buffer_init(&wreq->bounce_alloc, wreq->debug_id);
+		if (ret < 0)
+			goto error_put;
+
+		/*   0--->
+		 *  ~--+-------+-------+-------+-------+---~
+		 *     :       |       |       |       |
+		 *     :spent  |encrypt|copied |alloced|
+		 *     :       |-ed    |       |       |
+		 *  ~--+-------+-------+-------+-------+---~
+		 *                                     ^bounce_alloc
+		 *                             ^bounce_copy
+		 *                     ^encrypt_cursor
+		 *             ^dispatch_cursor
+		 */
+		bvecq_pos_set(&wreq->bounce_copy, &wreq->bounce_alloc);
+		bvecq_pos_set(&wreq->encrypt_cursor, &wreq->bounce_alloc);
+		bvecq_pos_set(&stream->dispatch_cursor, &wreq->bounce_alloc);
+
+	} else {
+		stream->buffered = ret;
+		stream->issue_from = wreq->start;
+		bvecq_pos_set(&stream->dispatch_cursor, &wreq->load_cursor);
+	}
 
 	/* Dispatch the write. */
 	__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);

diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index a64bd28..73216a0 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c

@@ -44,6 +44,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
 	[NETFS_WRITEBACK]		= "WB",
 	[NETFS_WRITEBACK_SINGLE]	= "W1",
 	[NETFS_WRITETHROUGH]		= "WT",
+	[NETFS_RMW_READ]		= "RM",
 	[NETFS_UNBUFFERED_WRITE]	= "UW",
 	[NETFS_DIO_WRITE]		= "DW",
 	[NETFS_PGPRIV2_COPY_TO_CACHE]	= "2C",

diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index f618581..d295580 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c

@@ -138,6 +138,7 @@ static void netfs_deinit_request(struct netfs_io_request *rreq)
 	if (rreq->cache_resources.ops)
 		rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
 	bvecq_pos_unset(&rreq->load_cursor);
+	bvecq_pos_unset(&rreq->copy_cursor);
 	bvecq_pos_unset(&rreq->collect_cursor);
 	bvecq_pos_unset(&rreq->bounce_alloc);
 	bvecq_pos_unset(&rreq->encrypt_cursor);

diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 739713c..7bda8cf 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c

@@ -437,6 +437,7 @@ bool netfs_read_collection(struct netfs_io_request *rreq)
 	case NETFS_UNBUFFERED_READ:
 	case NETFS_DIO_READ:
 	case NETFS_READ_GAPS:
+	case NETFS_RMW_READ:
 		netfs_rreq_assess_dio(rreq);
 		break;
 	case NETFS_READ_SINGLE:

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 06e2265..394cf3e 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h

@@ -231,6 +231,7 @@ enum netfs_io_origin {
 	NETFS_WRITEBACK,		/* This write was triggered by writepages */
 	NETFS_WRITEBACK_SINGLE,		/* This monolithic write was triggered by writepages */
 	NETFS_WRITETHROUGH,		/* This write was made by netfs_perform_write() */
+	NETFS_RMW_READ,			/* This is an unbuffered read for RMW */
 	NETFS_UNBUFFERED_WRITE,		/* This is an unbuffered write */
 	NETFS_DIO_WRITE,		/* This is a direct I/O write */
 	NETFS_PGPRIV2_COPY_TO_CACHE,	/* [DEPRECATED] This is writing read data to the cache */
@@ -260,6 +261,7 @@ struct netfs_io_request {
 	struct netfs_group	*group;		/* Writeback group being written back */
 	struct bvecq		*spare;		/* Advance allocation of bvecq */
 	struct bvecq_pos	load_cursor;	/* Point at which new folios are loaded in */
+	struct bvecq_pos	copy_cursor;	/* Copy-out point from main buffer list */
 	struct bvecq_pos	collect_cursor;	/* Clear-up point of I/O buffer */
 	struct bvecq_pos	bounce_alloc;	/* Bounce buffer allocation point */
 	struct bvecq_pos	encrypt_cursor;	/* Encrypt dispatch point */

diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index b143a0d..2415631 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h

@@ -44,6 +44,7 @@
 	EM(NETFS_WRITEBACK,			"WB")		\
 	EM(NETFS_WRITEBACK_SINGLE,		"W1")		\
 	EM(NETFS_WRITETHROUGH,			"WT")		\
+	EM(NETFS_RMW_READ,			"RM")		\
 	EM(NETFS_UNBUFFERED_WRITE,		"UW")		\
 	EM(NETFS_DIO_WRITE,			"DW")		\
 	E_(NETFS_PGPRIV2_COPY_TO_CACHE,		"2C")