fs/netfs/write_collect.c - linux/kernel/git/dhowells/linux-fs - Git at Google

 // SPDX-License-Identifier: GPL-2.0-only
 /* Network filesystem write subrequest result collection, assessment
  * and retrying.
  *
  * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  */

 #include <linux/export.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include "internal.h"

 /* Notes made in the collector */
 #define HIT_PENDING		0x01	/* A front op was still pending */
 #define NEED_REASSESS		0x02	/* Need to loop round and reassess */
 #define MADE_PROGRESS		0x04	/* Made progress cleaning up a stream or the folio set */
 #define NEED_UNLOCK		0x08	/* The pagecache needs unlocking */
 #define NEED_RETRY		0x10	/* A front op requests retrying */
 #define SAW_FAILURE		0x20	/* One stream or hit a permanent failure */

 static void netfs_dump_request(const struct netfs_io_request *rreq)
 {
 	pr_err("Request R=%08x r=%d fl=%lx or=%x e=%ld\n",
 	       rreq->debug_id, refcount_read(&rreq->ref), rreq->flags,
 	       rreq->origin, rreq->error);
 	pr_err("  st=%llx tsl=%zx/%llx/%llx\n",
 	       rreq->start, rreq->transferred, rreq->submitted, rreq->len);
 	pr_err("  cci=%llx/%llx\n",
 	       rreq->cleaned_to, rreq->collected_to);
 	pr_err("  iw=%pSR\n", rreq->netfs_ops->issue_write);
 	for (int i = 0; i < NR_IO_STREAMS; i++) {
 		const struct netfs_io_subrequest *sreq;
 		const struct netfs_io_stream *s = &rreq->io_streams[i];

 		pr_err("  str[%x] s=%x e=%d acnf=%u,%u,%u,%u\n",
 		       s->stream_nr, s->source, s->error,
 		       s->avail, s->active, s->need_retry, s->failed);
 		pr_err("  str[%x] it=%llx ct=%llx t=%zx\n",
 		       s->stream_nr, atomic64_read(&s->issued_to),
 		       s->collected_to, s->transferred);
 		list_for_each_entry(sreq, &s->subrequests, rreq_link) {
 			pr_err("  sreq[%x:%x] sc=%u s=%llx t=%zx/%zx r=%d f=%lx\n",
 			       sreq->stream_nr, sreq->debug_index, sreq->source,
 			       sreq->start, sreq->transferred, sreq->len,
 			       refcount_read(&sreq->ref), sreq->flags);
 		}
 	}
 }

 /*
  * Successful completion of write of a folio to the server and/or cache.  Note
  * that we are not allowed to lock the folio here on pain of deadlocking with
  * truncate.
  */
 int netfs_folio_written_back(struct folio *folio)
 {
 	enum netfs_folio_trace why = netfs_folio_trace_endwb;
 	struct netfs_inode *ictx = netfs_inode(folio->mapping->host);
 	struct netfs_folio *finfo;
 	struct netfs_group *group = NULL;
 	int gcount = 0;

 	if ((finfo = netfs_folio_info(folio))) {
 		/* Streaming writes cannot be redirtied whilst under writeback,
 		 * so discard the streaming record.
 		 */
 		unsigned long long fend;

 		fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len;
 		if (fend > ictx->zero_point)
 			ictx->zero_point = fend;

 		folio_detach_private(folio);
 		group = finfo->netfs_group;
 		gcount++;
 		kfree(finfo);
 		why = netfs_folio_trace_endwb_s;
 		goto end_wb;
 	}

 	if ((group = netfs_folio_group(folio))) {
 		if (group == NETFS_FOLIO_COPY_TO_CACHE) {
 			why = netfs_folio_trace_endwb_cc;
 			folio_detach_private(folio);
 			goto end_wb;
 		}

 		/* Need to detach the group pointer if the page didn't get
 		 * redirtied.  If it has been redirtied, then it must be within
 		 * the same group.
 		 */
 		why = netfs_folio_trace_redirtied;
 		if (!folio_test_dirty(folio)) {
 			folio_detach_private(folio);
 			gcount++;
 			why = netfs_folio_trace_endwb_g;
 		}
 	}

 end_wb:
 	trace_netfs_folio(folio, why);
 	folio_end_writeback(folio);
 	return gcount;
 }

 /*
  * Unlock any folios we've finished with.
  */
 static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
 					  unsigned int *notes)
 {
 	struct bvecq *bvecq = wreq->collect_cursor.bvecq;
 	unsigned long long collected_to = wreq->collected_to;
 	unsigned int slot = wreq->collect_cursor.slot;

 	if (WARN_ON_ONCE(!bvecq)) {
 		pr_err("[!] Writeback unlock found empty buffer!\n");
 		netfs_dump_request(wreq);
 		return;
 	}

 	if (wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) {
 		if (netfs_pgpriv2_unlock_copied_folios(wreq))
 			*notes |= MADE_PROGRESS;
 		return;
 	}

 	if (slot >= bvecq->nr_segs) {
 		/* We need to be very careful - the cleanup can catch the
 		 * dispatcher, which could lead to us having nothing left in
 		 * the queue, causing the front and back pointers to end up on
 		 * different tracks.  To avoid this, we must always keep at
 		 * least one segment in the queue.
 		 */
 		bvecq = bvecq_buffer_delete_spent(&wreq->collect_cursor);
 		if (!bvecq)
 			return;
 		slot = 0;
 	}

 	for (;;) {
 		struct folio *folio;
 		struct netfs_folio *finfo;
 		unsigned long long fpos, fend;
 		size_t fsize, flen;

 		folio = page_folio(bvecq->bv[slot].bv_page);
 		if (WARN_ONCE(!folio_test_writeback(folio),
 			      "R=%08x: folio %lx is not under writeback\n",
 			      wreq->debug_id, folio->index))
 			trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);

 		fpos = folio_pos(folio);
 		fsize = folio_size(folio);
 		finfo = netfs_folio_info(folio);
 		flen = finfo ? finfo->dirty_offset + finfo->dirty_len : fsize;

 		fend = min_t(unsigned long long, fpos + flen, wreq->i_size);

 		trace_netfs_collect_folio(wreq, folio, fend, collected_to);

 		/* Unlock any folio we've transferred all of. */
 		if (collected_to < fend)
 			break;

 		wreq->nr_group_rel += netfs_folio_written_back(folio);
 		wreq->cleaned_to = fpos + fsize;
 		*notes |= MADE_PROGRESS;

 		/* Clean up the head bvecq.  If we clear an entire bvecq, then
 		 * we can get rid of it provided it's not also the tail bvecq
 		 * being filled by the issuer.
 		 */
 		bvecq->bv[slot].bv_page = NULL;
 		slot++;
 		if (slot >= bvecq->nr_segs) {
 			bvecq = bvecq_buffer_delete_spent(&wreq->collect_cursor);
 			if (!bvecq)
 				goto done;
 			slot = 0;
 		}

 		if (fpos + fsize >= collected_to)
 			break;
 	}

 done:
 	wreq->collect_cursor.slot = slot;
 }

 /*
  * Collect and assess the results of various write subrequests.  We may need to
  * retry some of the results - or even do an RMW cycle for content crypto.
  *
  * Note that we have a number of parallel, overlapping lists of subrequests,
  * one to the server and one to the local cache for example, which may not be
  * the same size or starting position and may not even correspond in boundary
  * alignment.
  */
 static void netfs_collect_write_results(struct netfs_io_request *wreq)
 {
 	struct netfs_io_subrequest *front, *remove;
 	struct netfs_io_stream *stream;
 	unsigned long long collected_to, issued_to;
 	unsigned int notes;
 	int s;

 	_enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
 	trace_netfs_collect(wreq);
 	trace_netfs_rreq(wreq, netfs_rreq_trace_collect);

 reassess_streams:
 	issued_to = ULLONG_MAX;
 	collected_to = ULLONG_MAX;
 	if (wreq->origin == NETFS_WRITEBACK ||
 	    wreq->origin == NETFS_WRITETHROUGH ||
 	    wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE)
 		notes = NEED_UNLOCK;
 	else
 		notes = 0;

 	/* Remove completed subrequests from the front of the streams and
 	 * advance the completion point on each stream.  We stop when we hit
 	 * something that's in progress.  The issuer thread may be adding stuff
 	 * to the tail whilst we're doing this.
 	 */
 	for (s = 0; s < NR_IO_STREAMS; s++) {
 		unsigned long long s_issued_to;

 		stream = &wreq->io_streams[s];
 		/* Read active flag before issued_to */
 		if (!smp_load_acquire(&stream->active))
 			continue;

 		for (;;) {
 			/* Order reading the issued_to point before reading the
 			 * queue it refers to.
 			 */
 			s_issued_to = atomic64_read_acquire(&stream->issued_to);
 			if (s_issued_to < issued_to)
 				issued_to = s_issued_to;

 			front = stream->front;
 			if (!front)
 				break;

 			trace_netfs_collect_sreq(wreq, front);
 			//_debug("sreq [%x] %llx %zx/%zx",
 			//       front->debug_index, front->start, front->transferred, front->len);

 			if (stream->collected_to < front->start) {
 				trace_netfs_collect_gap(wreq, stream, issued_to, 'F');
 				stream->collected_to = front->start;
 			}

 			/* Stall if the front is still undergoing I/O. */
 			if (netfs_check_subreq_in_progress(front)) {
 				notes |= HIT_PENDING;
 				break;
 			}
 			smp_rmb(); /* Read counters after I-P flag. */

 			if (stream->failed) {
 				stream->collected_to = front->start + front->len;
 				notes |= MADE_PROGRESS | SAW_FAILURE;
 				goto cancel;
 			}
 			if (front->start + front->transferred > stream->collected_to) {
 				stream->collected_to = front->start + front->transferred;
 				stream->transferred = stream->collected_to - wreq->start;
 				stream->transferred_valid = true;
 				notes |= MADE_PROGRESS;
 			}
 			if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
 				stream->failed = true;
 				stream->error = front->error;
 				if (stream->source == NETFS_UPLOAD_TO_SERVER)
 					mapping_set_error(wreq->mapping, front->error);
 				notes |= NEED_REASSESS | SAW_FAILURE;
 				break;
 			}
 			if (front->transferred < front->len) {
 				stream->need_retry = true;
 				notes |= NEED_RETRY | MADE_PROGRESS;
 				break;
 			}

 		cancel:
 			/* Remove if completely consumed. */
 			spin_lock(&wreq->lock);

 			remove = front;
 			list_del_init(&front->rreq_link);
 			front = list_first_entry_or_null(&stream->subrequests,
 							 struct netfs_io_subrequest, rreq_link);
 			stream->front = front;
 			spin_unlock(&wreq->lock);
 			netfs_put_subrequest(remove,
 					     notes & SAW_FAILURE ?
 					     netfs_sreq_trace_put_cancel :
 					     netfs_sreq_trace_put_done);
 		}

 		/* If we have an empty stream, we need to jump it forward
 		 * otherwise the collection point will never advance.
 		 */
 		if (!front && issued_to > stream->collected_to) {
 			trace_netfs_collect_gap(wreq, stream, issued_to, 'E');
 			stream->collected_to = issued_to;
 		}

 		if (stream->collected_to < collected_to)
 			collected_to = stream->collected_to;
 	}

 	if (collected_to != ULLONG_MAX && collected_to > wreq->collected_to)
 		wreq->collected_to = collected_to;

 	for (s = 0; s < NR_IO_STREAMS; s++) {
 		stream = &wreq->io_streams[s];
 		if (stream->active)
 			trace_netfs_collect_stream(wreq, stream);
 	}

 	trace_netfs_collect_state(wreq, wreq->collected_to, notes);

 	/* Unlock any folios that we have now finished with. */
 	if (notes & NEED_UNLOCK) {
 		if (wreq->cleaned_to < wreq->collected_to)
 			netfs_writeback_unlock_folios(wreq, &notes);
 	} else {
 		wreq->cleaned_to = wreq->collected_to;
 	}

 	// TODO: Discard encryption buffers

 	if (notes & NEED_RETRY)
 		goto need_retry;

 	if (notes & MADE_PROGRESS) {
 		netfs_wake_rreq_flag(wreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause);
 		//cond_resched();
 		goto reassess_streams;
 	}

 	if (notes & NEED_REASSESS) {
 		//cond_resched();
 		goto reassess_streams;
 	}

 out:
 	netfs_put_group_many(wreq->group, wreq->nr_group_rel);
 	wreq->nr_group_rel = 0;
 	_leave(" = %x", notes);
 	return;

 need_retry:
 	/* Okay...  We're going to have to retry one or both streams.  Note
 	 * that any partially completed op will have had any wholly transferred
 	 * folios removed from it.
 	 */
 	_debug("retry");
 	netfs_retry_writes(wreq);
 	goto out;
 }

 /*
  * Perform the collection of subrequests, folios and encryption buffers.
  */
 bool netfs_write_collection(struct netfs_io_request *wreq)
 {
 	struct netfs_inode *ictx = netfs_inode(wreq->inode);
 	size_t transferred;
 	bool transferred_valid = false;
 	int s;

 	_enter("R=%x", wreq->debug_id);

 	netfs_collect_write_results(wreq);

 	/* We're done when the app thread has finished posting subreqs and all
 	 * the queues in all the streams are empty.
 	 */
 	if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags))
 		return false;
 	smp_rmb(); /* Read ALL_QUEUED before lists. */

 	transferred = LONG_MAX;
 	for (s = 0; s < NR_IO_STREAMS; s++) {
 		struct netfs_io_stream *stream = &wreq->io_streams[s];
 		if (!stream->active)
 			continue;
 		if (!list_empty(&stream->subrequests))
 			return false;
 		if (stream->transferred_valid &&
 		    stream->transferred < transferred) {
 			transferred = stream->transferred;
 			transferred_valid = true;
 		}
 	}

 	/* Okay, declare that all I/O is complete. */
 	if (transferred_valid)
 		wreq->transferred = transferred;
 	trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);

 	if (wreq->io_streams[1].active &&
 	    wreq->io_streams[1].failed &&
 	    ictx->ops->invalidate_cache) {
 		/* Cache write failure doesn't prevent writeback completion
 		 * unless we're in disconnected mode.
 		 */
 		ictx->ops->invalidate_cache(wreq);
 	}

 	_debug("finished");
 	netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
 	/* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */

 	if (wreq->iocb) {
 		size_t written = min(wreq->transferred, wreq->len);
 		wreq->iocb->ki_pos += written;
 		if (wreq->iocb->ki_complete) {
 			trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete);
 			wreq->iocb->ki_complete(
 				wreq->iocb, wreq->error ? wreq->error : written);
 		}
 		wreq->iocb = VFS_PTR_POISON;
 	}

 	netfs_clear_subrequests(wreq);
 	return true;
 }

 void netfs_write_collection_worker(struct work_struct *work)
 {
 	struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);

 	netfs_see_request(rreq, netfs_rreq_trace_see_work);
 	if (netfs_check_rreq_in_progress(rreq)) {
 		if (netfs_write_collection(rreq))
 			/* Drop the ref from the IN_PROGRESS flag. */
 			netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
 		else
 			netfs_see_request(rreq, netfs_rreq_trace_see_work_complete);
 	}
 }

 /**
  * netfs_write_subrequest_terminated - Note the termination of a write operation.
  * @_op: The I/O request that has terminated.
  * @transferred_or_error: The amount of data transferred or an error code.
  *
  * This tells the library that a contributory write I/O operation has
  * terminated, one way or another, and that it should collect the results.
  *
  * The caller indicates in @transferred_or_error the outcome of the operation,
  * supplying a positive value to indicate the number of bytes transferred or a
  * negative error code.  The library will look after reissuing I/O operations
  * as appropriate and writing downloaded data to the cache.
  *
  * When this is called, ownership of the subrequest is transferred back to the
  * library, along with a ref.
  *
  * Note that %_op is a void* so that the function can be passed to
  * kiocb::term_func without the need for a casting wrapper.
  */
 void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error)
 {
 	struct netfs_io_subrequest *subreq = _op;
 	struct netfs_io_request *wreq = subreq->rreq;

 	_enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);

 	switch (subreq->source) {
 	case NETFS_UPLOAD_TO_SERVER:
 		netfs_stat(&netfs_n_wh_upload_done);
 		break;
 	case NETFS_WRITE_TO_CACHE:
 		netfs_stat(&netfs_n_wh_write_done);
 		break;
 	default:
 		BUG();
 	}

 	if (IS_ERR_VALUE(transferred_or_error)) {
 		subreq->error = transferred_or_error;
 		/* if need retry is set, error should not matter */
 		if (!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
 			set_bit(NETFS_SREQ_FAILED, &subreq->flags);
 			trace_netfs_failure(wreq, subreq, transferred_or_error, netfs_fail_write);
 		}

 		switch (subreq->source) {
 		case NETFS_WRITE_TO_CACHE:
 			netfs_stat(&netfs_n_wh_write_failed);
 			break;
 		case NETFS_UPLOAD_TO_SERVER:
 			netfs_stat(&netfs_n_wh_upload_failed);
 			break;
 		default:
 			break;
 		}
 		trace_netfs_rreq(wreq, netfs_rreq_trace_set_pause);
 		set_bit(NETFS_RREQ_PAUSE, &wreq->flags);
 	} else {
 		if (WARN(transferred_or_error > subreq->len - subreq->transferred,
 			 "Subreq excess write: R=%x[%x] %zd > %zu - %zu",
 			 wreq->debug_id, subreq->debug_index,
 			 transferred_or_error, subreq->len, subreq->transferred))
 			transferred_or_error = subreq->len - subreq->transferred;

 		subreq->error = 0;
 		subreq->transferred += transferred_or_error;

 		if (subreq->transferred < subreq->len)
 			set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
 	}

 	trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
 	netfs_subreq_clear_in_progress(subreq);
 	netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated);
 }
 EXPORT_SYMBOL(netfs_write_subrequest_terminated);
	// SPDX-License-Identifier: GPL-2.0-only
	/* Network filesystem write subrequest result collection, assessment
	* and retrying.
	*
	* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
	* Written by David Howells (dhowells@redhat.com)
	*/

	#include <linux/export.h>
	#include <linux/fs.h>
	#include <linux/mm.h>
	#include <linux/pagemap.h>
	#include <linux/slab.h>
	#include "internal.h"

	/* Notes made in the collector */
	#define HIT_PENDING 0x01 /* A front op was still pending */
	#define NEED_REASSESS 0x02 /* Need to loop round and reassess */
	#define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */
	#define NEED_UNLOCK 0x08 /* The pagecache needs unlocking */
	#define NEED_RETRY 0x10 /* A front op requests retrying */
	#define SAW_FAILURE 0x20 /* One stream or hit a permanent failure */

	static void netfs_dump_request(const struct netfs_io_request *rreq)
	{
	pr_err("Request R=%08x r=%d fl=%lx or=%x e=%ld\n",
	rreq->debug_id, refcount_read(&rreq->ref), rreq->flags,
	rreq->origin, rreq->error);
	pr_err(" st=%llx tsl=%zx/%llx/%llx\n",
	rreq->start, rreq->transferred, rreq->submitted, rreq->len);
	pr_err(" cci=%llx/%llx\n",
	rreq->cleaned_to, rreq->collected_to);
	pr_err(" iw=%pSR\n", rreq->netfs_ops->issue_write);
	for (int i = 0; i < NR_IO_STREAMS; i++) {
	const struct netfs_io_subrequest *sreq;
	const struct netfs_io_stream *s = &rreq->io_streams[i];

	pr_err(" str[%x] s=%x e=%d acnf=%u,%u,%u,%u\n",
	s->stream_nr, s->source, s->error,
	s->avail, s->active, s->need_retry, s->failed);
	pr_err(" str[%x] it=%llx ct=%llx t=%zx\n",
	s->stream_nr, atomic64_read(&s->issued_to),
	s->collected_to, s->transferred);
	list_for_each_entry(sreq, &s->subrequests, rreq_link) {
	pr_err(" sreq[%x:%x] sc=%u s=%llx t=%zx/%zx r=%d f=%lx\n",
	sreq->stream_nr, sreq->debug_index, sreq->source,
	sreq->start, sreq->transferred, sreq->len,
	refcount_read(&sreq->ref), sreq->flags);
	}
	}
	}

	/*
	* Successful completion of write of a folio to the server and/or cache. Note
	* that we are not allowed to lock the folio here on pain of deadlocking with
	* truncate.
	*/
	int netfs_folio_written_back(struct folio *folio)
	{
	enum netfs_folio_trace why = netfs_folio_trace_endwb;
	struct netfs_inode *ictx = netfs_inode(folio->mapping->host);
	struct netfs_folio *finfo;
	struct netfs_group *group = NULL;
	int gcount = 0;

	if ((finfo = netfs_folio_info(folio))) {
	/* Streaming writes cannot be redirtied whilst under writeback,
	* so discard the streaming record.
	*/
	unsigned long long fend;

	fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len;
	if (fend > ictx->zero_point)
	ictx->zero_point = fend;

	folio_detach_private(folio);
	group = finfo->netfs_group;
	gcount++;
	kfree(finfo);
	why = netfs_folio_trace_endwb_s;
	goto end_wb;
	}

	if ((group = netfs_folio_group(folio))) {
	if (group == NETFS_FOLIO_COPY_TO_CACHE) {
	why = netfs_folio_trace_endwb_cc;
	folio_detach_private(folio);
	goto end_wb;
	}

	/* Need to detach the group pointer if the page didn't get
	* redirtied. If it has been redirtied, then it must be within
	* the same group.
	*/
	why = netfs_folio_trace_redirtied;
	if (!folio_test_dirty(folio)) {
	folio_detach_private(folio);
	gcount++;
	why = netfs_folio_trace_endwb_g;
	}
	}

	end_wb:
	trace_netfs_folio(folio, why);
	folio_end_writeback(folio);
	return gcount;
	}

	/*
	* Unlock any folios we've finished with.
	*/
	static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
	unsigned int *notes)
	{
	struct bvecq *bvecq = wreq->collect_cursor.bvecq;
	unsigned long long collected_to = wreq->collected_to;
	unsigned int slot = wreq->collect_cursor.slot;

	if (WARN_ON_ONCE(!bvecq)) {
	pr_err("[!] Writeback unlock found empty buffer!\n");
	netfs_dump_request(wreq);
	return;
	}

	if (wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) {
	if (netfs_pgpriv2_unlock_copied_folios(wreq))
	*notes \|= MADE_PROGRESS;
	return;
	}

	if (slot >= bvecq->nr_segs) {
	/* We need to be very careful - the cleanup can catch the
	* dispatcher, which could lead to us having nothing left in
	* the queue, causing the front and back pointers to end up on
	* different tracks. To avoid this, we must always keep at
	* least one segment in the queue.
	*/
	bvecq = bvecq_buffer_delete_spent(&wreq->collect_cursor);
	if (!bvecq)
	return;
	slot = 0;
	}

	for (;;) {
	struct folio *folio;
	struct netfs_folio *finfo;
	unsigned long long fpos, fend;
	size_t fsize, flen;

	folio = page_folio(bvecq->bv[slot].bv_page);
	if (WARN_ONCE(!folio_test_writeback(folio),
	"R=%08x: folio %lx is not under writeback\n",
	wreq->debug_id, folio->index))
	trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);

	fpos = folio_pos(folio);
	fsize = folio_size(folio);
	finfo = netfs_folio_info(folio);
	flen = finfo ? finfo->dirty_offset + finfo->dirty_len : fsize;

	fend = min_t(unsigned long long, fpos + flen, wreq->i_size);

	trace_netfs_collect_folio(wreq, folio, fend, collected_to);

	/* Unlock any folio we've transferred all of. */
	if (collected_to < fend)
	break;

	wreq->nr_group_rel += netfs_folio_written_back(folio);
	wreq->cleaned_to = fpos + fsize;
	*notes \|= MADE_PROGRESS;

	/* Clean up the head bvecq. If we clear an entire bvecq, then
	* we can get rid of it provided it's not also the tail bvecq
	* being filled by the issuer.
	*/
	bvecq->bv[slot].bv_page = NULL;
	slot++;
	if (slot >= bvecq->nr_segs) {
	bvecq = bvecq_buffer_delete_spent(&wreq->collect_cursor);
	if (!bvecq)
	goto done;
	slot = 0;
	}

	if (fpos + fsize >= collected_to)
	break;
	}

	done:
	wreq->collect_cursor.slot = slot;
	}

	/*
	* Collect and assess the results of various write subrequests. We may need to
	* retry some of the results - or even do an RMW cycle for content crypto.
	*
	* Note that we have a number of parallel, overlapping lists of subrequests,
	* one to the server and one to the local cache for example, which may not be
	* the same size or starting position and may not even correspond in boundary
	* alignment.
	*/
	static void netfs_collect_write_results(struct netfs_io_request *wreq)
	{
	struct netfs_io_subrequest front, remove;
	struct netfs_io_stream *stream;
	unsigned long long collected_to, issued_to;
	unsigned int notes;
	int s;

	_enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
	trace_netfs_collect(wreq);
	trace_netfs_rreq(wreq, netfs_rreq_trace_collect);

	reassess_streams:
	issued_to = ULLONG_MAX;
	collected_to = ULLONG_MAX;
	if (wreq->origin == NETFS_WRITEBACK \|\|
	wreq->origin == NETFS_WRITETHROUGH \|\|
	wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE)
	notes = NEED_UNLOCK;
	else
	notes = 0;

	/* Remove completed subrequests from the front of the streams and
	* advance the completion point on each stream. We stop when we hit
	* something that's in progress. The issuer thread may be adding stuff
	* to the tail whilst we're doing this.
	*/
	for (s = 0; s < NR_IO_STREAMS; s++) {
	unsigned long long s_issued_to;

	stream = &wreq->io_streams[s];
	/* Read active flag before issued_to */
	if (!smp_load_acquire(&stream->active))
	continue;

	for (;;) {
	/* Order reading the issued_to point before reading the
	* queue it refers to.
	*/
	s_issued_to = atomic64_read_acquire(&stream->issued_to);
	if (s_issued_to < issued_to)
	issued_to = s_issued_to;

	front = stream->front;
	if (!front)
	break;

	trace_netfs_collect_sreq(wreq, front);
	//_debug("sreq [%x] %llx %zx/%zx",
	// front->debug_index, front->start, front->transferred, front->len);

	if (stream->collected_to < front->start) {
	trace_netfs_collect_gap(wreq, stream, issued_to, 'F');
	stream->collected_to = front->start;
	}

	/* Stall if the front is still undergoing I/O. */
	if (netfs_check_subreq_in_progress(front)) {
	notes \|= HIT_PENDING;
	break;
	}
	smp_rmb(); /* Read counters after I-P flag. */

	if (stream->failed) {
	stream->collected_to = front->start + front->len;
	notes \|= MADE_PROGRESS \| SAW_FAILURE;
	goto cancel;
	}
	if (front->start + front->transferred > stream->collected_to) {
	stream->collected_to = front->start + front->transferred;
	stream->transferred = stream->collected_to - wreq->start;
	stream->transferred_valid = true;
	notes \|= MADE_PROGRESS;
	}
	if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
	stream->failed = true;
	stream->error = front->error;
	if (stream->source == NETFS_UPLOAD_TO_SERVER)
	mapping_set_error(wreq->mapping, front->error);
	notes \|= NEED_REASSESS \| SAW_FAILURE;
	break;
	}
	if (front->transferred < front->len) {
	stream->need_retry = true;
	notes \|= NEED_RETRY \| MADE_PROGRESS;
	break;
	}

	cancel:
	/* Remove if completely consumed. */
	spin_lock(&wreq->lock);

	remove = front;
	list_del_init(&front->rreq_link);
	front = list_first_entry_or_null(&stream->subrequests,
	struct netfs_io_subrequest, rreq_link);
	stream->front = front;
	spin_unlock(&wreq->lock);
	netfs_put_subrequest(remove,
	notes & SAW_FAILURE ?
	netfs_sreq_trace_put_cancel :
	netfs_sreq_trace_put_done);
	}

	/* If we have an empty stream, we need to jump it forward
	* otherwise the collection point will never advance.
	*/
	if (!front && issued_to > stream->collected_to) {
	trace_netfs_collect_gap(wreq, stream, issued_to, 'E');
	stream->collected_to = issued_to;
	}

	if (stream->collected_to < collected_to)
	collected_to = stream->collected_to;
	}

	if (collected_to != ULLONG_MAX && collected_to > wreq->collected_to)
	wreq->collected_to = collected_to;

	for (s = 0; s < NR_IO_STREAMS; s++) {
	stream = &wreq->io_streams[s];
	if (stream->active)
	trace_netfs_collect_stream(wreq, stream);
	}

	trace_netfs_collect_state(wreq, wreq->collected_to, notes);

	/* Unlock any folios that we have now finished with. */
	if (notes & NEED_UNLOCK) {
	if (wreq->cleaned_to < wreq->collected_to)
	netfs_writeback_unlock_folios(wreq, &notes);
	} else {
	wreq->cleaned_to = wreq->collected_to;
	}

	// TODO: Discard encryption buffers

	if (notes & NEED_RETRY)
	goto need_retry;

	if (notes & MADE_PROGRESS) {
	netfs_wake_rreq_flag(wreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause);
	//cond_resched();
	goto reassess_streams;
	}

	if (notes & NEED_REASSESS) {
	//cond_resched();
	goto reassess_streams;
	}

	out:
	netfs_put_group_many(wreq->group, wreq->nr_group_rel);
	wreq->nr_group_rel = 0;
	_leave(" = %x", notes);
	return;

	need_retry:
	/* Okay... We're going to have to retry one or both streams. Note
	* that any partially completed op will have had any wholly transferred
	* folios removed from it.
	*/
	_debug("retry");
	netfs_retry_writes(wreq);
	goto out;
	}

	/*
	* Perform the collection of subrequests, folios and encryption buffers.
	*/
	bool netfs_write_collection(struct netfs_io_request *wreq)
	{
	struct netfs_inode *ictx = netfs_inode(wreq->inode);
	size_t transferred;
	bool transferred_valid = false;
	int s;

	_enter("R=%x", wreq->debug_id);

	netfs_collect_write_results(wreq);

	/* We're done when the app thread has finished posting subreqs and all
	* the queues in all the streams are empty.
	*/
	if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags))
	return false;
	smp_rmb(); /* Read ALL_QUEUED before lists. */

	transferred = LONG_MAX;
	for (s = 0; s < NR_IO_STREAMS; s++) {
	struct netfs_io_stream *stream = &wreq->io_streams[s];
	if (!stream->active)
	continue;
	if (!list_empty(&stream->subrequests))
	return false;
	if (stream->transferred_valid &&
	stream->transferred < transferred) {
	transferred = stream->transferred;
	transferred_valid = true;
	}
	}

	/* Okay, declare that all I/O is complete. */
	if (transferred_valid)
	wreq->transferred = transferred;
	trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);

	if (wreq->io_streams[1].active &&
	wreq->io_streams[1].failed &&
	ictx->ops->invalidate_cache) {
	/* Cache write failure doesn't prevent writeback completion
	* unless we're in disconnected mode.
	*/
	ictx->ops->invalidate_cache(wreq);
	}

	_debug("finished");
	netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
	/* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */

	if (wreq->iocb) {
	size_t written = min(wreq->transferred, wreq->len);
	wreq->iocb->ki_pos += written;
	if (wreq->iocb->ki_complete) {
	trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete);
	wreq->iocb->ki_complete(
	wreq->iocb, wreq->error ? wreq->error : written);
	}
	wreq->iocb = VFS_PTR_POISON;
	}

	netfs_clear_subrequests(wreq);
	return true;
	}

	void netfs_write_collection_worker(struct work_struct *work)
	{
	struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);

	netfs_see_request(rreq, netfs_rreq_trace_see_work);
	if (netfs_check_rreq_in_progress(rreq)) {
	if (netfs_write_collection(rreq))
	/* Drop the ref from the IN_PROGRESS flag. */
	netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
	else
	netfs_see_request(rreq, netfs_rreq_trace_see_work_complete);
	}
	}

	/**
	* netfs_write_subrequest_terminated - Note the termination of a write operation.
	* @_op: The I/O request that has terminated.
	* @transferred_or_error: The amount of data transferred or an error code.
	*
	* This tells the library that a contributory write I/O operation has
	* terminated, one way or another, and that it should collect the results.
	*
	* The caller indicates in @transferred_or_error the outcome of the operation,
	* supplying a positive value to indicate the number of bytes transferred or a
	* negative error code. The library will look after reissuing I/O operations
	* as appropriate and writing downloaded data to the cache.
	*
	* When this is called, ownership of the subrequest is transferred back to the
	* library, along with a ref.
	*
	* Note that %_op is a void* so that the function can be passed to
	* kiocb::term_func without the need for a casting wrapper.
	*/
	void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error)
	{
	struct netfs_io_subrequest *subreq = _op;
	struct netfs_io_request *wreq = subreq->rreq;

	_enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);

	switch (subreq->source) {
	case NETFS_UPLOAD_TO_SERVER:
	netfs_stat(&netfs_n_wh_upload_done);
	break;
	case NETFS_WRITE_TO_CACHE:
	netfs_stat(&netfs_n_wh_write_done);
	break;
	default:
	BUG();
	}

	if (IS_ERR_VALUE(transferred_or_error)) {
	subreq->error = transferred_or_error;
	/* if need retry is set, error should not matter */
	if (!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
	set_bit(NETFS_SREQ_FAILED, &subreq->flags);
	trace_netfs_failure(wreq, subreq, transferred_or_error, netfs_fail_write);
	}

	switch (subreq->source) {
	case NETFS_WRITE_TO_CACHE:
	netfs_stat(&netfs_n_wh_write_failed);
	break;
	case NETFS_UPLOAD_TO_SERVER:
	netfs_stat(&netfs_n_wh_upload_failed);
	break;
	default:
	break;
	}
	trace_netfs_rreq(wreq, netfs_rreq_trace_set_pause);
	set_bit(NETFS_RREQ_PAUSE, &wreq->flags);
	} else {
	if (WARN(transferred_or_error > subreq->len - subreq->transferred,
	"Subreq excess write: R=%x[%x] %zd > %zu - %zu",
	wreq->debug_id, subreq->debug_index,
	transferred_or_error, subreq->len, subreq->transferred))
	transferred_or_error = subreq->len - subreq->transferred;

	subreq->error = 0;
	subreq->transferred += transferred_or_error;

	if (subreq->transferred < subreq->len)
	set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
	}

	trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
	netfs_subreq_clear_in_progress(subreq);
	netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated);
	}
	EXPORT_SYMBOL(netfs_write_subrequest_terminated);