blob: b58ba7c78adb58127f94b1868efeb2d659ffc33d [file] [log] [blame] [edit]
// SPDX-License-Identifier: GPL-2.0-only
/* Network filesystem write retrying.
*
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include "internal.h"
/*
* Perform retries on the streams that need it.
*/
static void netfs_retry_write_stream(struct netfs_io_request *wreq,
struct netfs_io_stream *stream)
{
struct list_head *next;
_enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
if (list_empty(&stream->subrequests))
return;
if (stream->source == NETFS_UPLOAD_TO_SERVER &&
wreq->netfs_ops->retry_request)
wreq->netfs_ops->retry_request(wreq, stream);
if (unlikely(stream->failed))
return;
/* If there's no renegotiation to do, just resend each failed subreq. */
if (!stream->prepare_write) {
struct netfs_io_subrequest *subreq;
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
break;
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
struct iov_iter source = subreq->io_iter;
iov_iter_revert(&source, subreq->len - source.count);
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_write(stream, subreq, &source);
}
}
return;
}
next = stream->subrequests.next;
do {
struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
struct iov_iter source;
unsigned long long start, len;
size_t part;
bool boundary = false;
/* Go through the stream and find the next span of contiguous
* data that we then rejig (cifs, for example, needs the wsize
* renegotiating) and reissue.
*/
from = list_entry(next, struct netfs_io_subrequest, rreq_link);
to = from;
start = from->start + from->transferred;
len = from->len - from->transferred;
if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
!test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
return;
list_for_each_continue(next, &stream->subrequests) {
subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
if (subreq->start + subreq->transferred != start + len ||
test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
break;
to = subreq;
len += to->len;
}
/* Determine the set of buffers we're going to use. Each
* subreq gets a subset of a single overall contiguous buffer.
*/
netfs_reset_iter(from);
source = from->io_iter;
source.count = len;
/* Work through the sublist. */
subreq = from;
list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
if (!len)
break;
subreq->start = start;
subreq->len = len;
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
subreq->retry_count++;
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
/* Renegotiate max_len (wsize) */
stream->sreq_max_len = len;
stream->prepare_write(subreq);
part = umin(len, stream->sreq_max_len);
if (unlikely(stream->sreq_max_segs))
part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs);
subreq->len = part;
subreq->transferred = 0;
len -= part;
start += part;
if (len && subreq == to &&
__test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags))
boundary = true;
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_write(stream, subreq, &source);
if (subreq == to)
break;
}
/* If we managed to use fewer subreqs, we can discard the
* excess; if we used the same number, then we're done.
*/
if (!len) {
if (subreq == to)
continue;
list_for_each_entry_safe_from(subreq, tmp,
&stream->subrequests, rreq_link) {
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
list_del(&subreq->rreq_link);
netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
if (subreq == to)
break;
}
continue;
}
/* We ran out of subrequests, so we need to allocate some more
* and insert them after.
*/
do {
subreq = netfs_alloc_subrequest(wreq, stream->source);
subreq->start = start;
subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
subreq->stream_nr = stream->stream_nr;
subreq->retry_count = 1;
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
list_add(&subreq->rreq_link, &to->rreq_link);
to = list_next_entry(to, rreq_link);
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
stream->sreq_max_len = len;
stream->sreq_max_segs = INT_MAX;
switch (stream->source) {
case NETFS_UPLOAD_TO_SERVER:
netfs_stat(&netfs_n_wh_upload);
stream->sreq_max_len = umin(len, wreq->wsize);
break;
case NETFS_WRITE_TO_CACHE:
netfs_stat(&netfs_n_wh_write);
break;
default:
WARN_ON_ONCE(1);
}
stream->prepare_write(subreq);
part = umin(len, stream->sreq_max_len);
subreq->len = subreq->transferred + part;
len -= part;
start += part;
if (!len && boundary) {
__set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
boundary = false;
}
netfs_reissue_write(stream, subreq, &source);
if (!len)
break;
} while (len);
} while (!list_is_head(next, &stream->subrequests));
}
/*
* Perform retries on the streams that need it. If we're doing content
* encryption and the server copy changed due to a third-party write, we may
* need to do an RMW cycle and also rewrite the data to the cache.
*/
void netfs_retry_writes(struct netfs_io_request *wreq)
{
struct netfs_io_stream *stream;
int s;
netfs_stat(&netfs_n_wh_retry_write_req);
/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
set_bit(NETFS_RREQ_RETRYING, &wreq->flags);
for (s = 0; s < NR_IO_STREAMS; s++) {
stream = &wreq->io_streams[s];
if (stream->active)
netfs_wait_for_in_progress_stream(wreq, stream);
}
clear_bit(NETFS_RREQ_RETRYING, &wreq->flags);
// TODO: Enc: Fetch changed partial pages
// TODO: Enc: Reencrypt content if needed.
// TODO: Enc: Wind back transferred point.
// TODO: Enc: Mark cache pages for retry.
for (s = 0; s < NR_IO_STREAMS; s++) {
stream = &wreq->io_streams[s];
if (stream->need_retry) {
stream->need_retry = false;
netfs_retry_write_stream(wreq, stream);
}
}
}
/*
* Perform a read to a buffer from the server, slicing up the region to be read
* according to the network rsize.
*/
static bool netfs_rmw_read_one(struct netfs_io_request *rreq,
unsigned long long start, size_t len)
{
int ret = 0;
rreq->start = start;
rreq->len = len;
atomic64_set(&rreq->issued_to, 0);
do {
struct netfs_io_subrequest *subreq;
ssize_t slice;
subreq = netfs_alloc_subrequest(rreq, NETFS_DOWNLOAD_FROM_SERVER);
if (!subreq) {
ret = -ENOMEM;
break;
}
subreq->start = start;
subreq->len = len;
spin_lock(&rreq->lock);
list_add_tail(&subreq->rreq_link, &rreq->enc_subrequests);
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
spin_unlock(&rreq->lock);
netfs_stat(&netfs_n_rh_download);
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
if (ret < 0) {
netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
break;
}
}
netfs_prepare_dio_read_iterator(subreq, rreq->crypto_bsize);
slice = subreq->len;
rreq->netfs_ops->issue_read(subreq);
len -= slice;
start += slice;
atomic64_add(slice, &rreq->issued_to);
if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
break;
cond_resched();
} while (len > 0);
return ret == 0 && atomic64_read(&rreq->issued_to) == rreq->len;
}
/*
* Begin the process of reading in one or two chunks of data for use by
* unbuffered write to perform an RMW cycle. We don't read directly into the
* write buffer as this may get called to redo the read in the case that a
* conditional write fails due to conflicting 3rd-party modifications.
*/
ssize_t netfs_rmw_read(struct netfs_io_request *wreq, struct file *file,
unsigned long long start1, size_t len1,
unsigned long long start2, size_t len2)
{
struct netfs_io_request *rreq;
unsigned long long block1, block2, b1start, b2start, start;
ssize_t ret;
size_t bsize = wreq->crypto_bsize, len, bufsize = bsize;
unsigned int bshift = ilog2(bsize);
_enter("RMW:R=%x %llx-%llx %llx-%llx",
wreq->debug_id, start1, start1 + len1 - 1, start2, start2 + len2 - 1);
block1 = start1 >> bshift;
block2 = start2 >> bshift;
b1start = block1 << bshift;
b2start = block2 << bshift;
if (len1 && !len2) {
start = b1start;
len = bsize;
} else if (!len1 && len2) {
start = b2start;
len = bsize;
} else if (block1 == block2) {
start = b1start;
len = bsize;
} else {
start = b1start;
len = b2start - b1start + bsize;
bufsize = bsize * 2;
}
rreq = netfs_alloc_request(wreq->mapping, file, start, len, NETFS_RMW_READ);
if (IS_ERR(rreq))
return PTR_ERR(rreq);
__set_bit(NETFS_RREQ_USE_BOUNCE_BUFFER, &rreq->flags);
__set_bit(NETFS_RREQ_CONTENT_ENCRYPTION, &rreq->flags);
__set_bit(NETFS_RREQ_CRYPT_IN_PLACE, &rreq->flags);
/* Allocate a bounce buffer to hold both ends. If the block size is
* less than PAGE_SIZE, we'll pack them into the same folio.
*/
ret = rolling_buffer_init(&rreq->bounce, rreq->debug_id, ITER_DEST);
if (ret < 0)
goto error;
ret = netfs_alloc_bounce(rreq, bufsize, GFP_KERNEL);
if (ret < 0)
goto error;
/* Chop the reads into slices according to what the netfs wants and
* submit each one. We can merge the reads if they are adjacent.
*/
if (len1 && len2 && b2start == b1start) {
netfs_rmw_read_one(rreq, b1start, bsize);
} else if (len1 && len2 && b2start == b1start + bsize) {
netfs_rmw_read_one(rreq, b1start, bsize * 2);
} else {
if (len1 && !netfs_rmw_read_one(rreq, b1start, bsize))
goto wait;
if (len2)
netfs_rmw_read_one(rreq, b2start, bsize);
}
wait:
ret = netfs_wait_for_read(rreq);
if (ret >= 0) {
struct iov_iter *bounce = &rreq->bounce.iter;
struct folio *folio1 = folioq_folio(wreq->bounce.tail, 0);
struct folio *folio2 = wreq->rmw_tail;
bounce->data_source = ITER_SOURCE;
iov_iter_revert(bounce, bufsize - bounce->count);
if (len1) {
ret = -EIO;
if (copy_folio_from_iter(folio1, offset_in_folio(folio1, start1),
len1, bounce) != len1)
goto error;
}
if (len2) {
iov_iter_advance(bounce, bufsize - len1 - len2);
if (copy_folio_from_iter(folio2, offset_in_folio(folio2, start2),
len2, bounce) != len2)
goto error;
}
ret = 0;
}
error:
netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret;
}