| // SPDX-License-Identifier: GPL-2.0-only |
| /* Network filesystem write retrying. |
| * |
| * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. |
| * Written by David Howells (dhowells@redhat.com) |
| */ |
| |
| #include <linux/fs.h> |
| #include <linux/mm.h> |
| #include <linux/pagemap.h> |
| #include <linux/slab.h> |
| #include "internal.h" |
| |
| /* |
| * Perform retries on the streams that need it. |
| */ |
| static void netfs_retry_write_stream(struct netfs_io_request *wreq, |
| struct netfs_io_stream *stream) |
| { |
| struct list_head *next; |
| |
| _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); |
| |
| if (list_empty(&stream->subrequests)) |
| return; |
| |
| if (stream->source == NETFS_UPLOAD_TO_SERVER && |
| wreq->netfs_ops->retry_request) |
| wreq->netfs_ops->retry_request(wreq, stream); |
| |
| if (unlikely(stream->failed)) |
| return; |
| |
| /* If there's no renegotiation to do, just resend each failed subreq. */ |
| if (!stream->prepare_write) { |
| struct netfs_io_subrequest *subreq; |
| |
| list_for_each_entry(subreq, &stream->subrequests, rreq_link) { |
| if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) |
| break; |
| if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { |
| struct iov_iter source = subreq->io_iter; |
| |
| iov_iter_revert(&source, subreq->len - source.count); |
| netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); |
| netfs_reissue_write(stream, subreq, &source); |
| } |
| } |
| return; |
| } |
| |
| next = stream->subrequests.next; |
| |
| do { |
| struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp; |
| struct iov_iter source; |
| unsigned long long start, len; |
| size_t part; |
| bool boundary = false; |
| |
| /* Go through the stream and find the next span of contiguous |
| * data that we then rejig (cifs, for example, needs the wsize |
| * renegotiating) and reissue. |
| */ |
| from = list_entry(next, struct netfs_io_subrequest, rreq_link); |
| to = from; |
| start = from->start + from->transferred; |
| len = from->len - from->transferred; |
| |
| if (test_bit(NETFS_SREQ_FAILED, &from->flags) || |
| !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) |
| return; |
| |
| list_for_each_continue(next, &stream->subrequests) { |
| subreq = list_entry(next, struct netfs_io_subrequest, rreq_link); |
| if (subreq->start + subreq->transferred != start + len || |
| test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) || |
| !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) |
| break; |
| to = subreq; |
| len += to->len; |
| } |
| |
| /* Determine the set of buffers we're going to use. Each |
| * subreq gets a subset of a single overall contiguous buffer. |
| */ |
| netfs_reset_iter(from); |
| source = from->io_iter; |
| source.count = len; |
| |
| /* Work through the sublist. */ |
| subreq = from; |
| list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) { |
| if (!len) |
| break; |
| |
| subreq->start = start; |
| subreq->len = len; |
| __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); |
| subreq->retry_count++; |
| trace_netfs_sreq(subreq, netfs_sreq_trace_retry); |
| |
| /* Renegotiate max_len (wsize) */ |
| stream->sreq_max_len = len; |
| stream->prepare_write(subreq); |
| |
| part = umin(len, stream->sreq_max_len); |
| if (unlikely(stream->sreq_max_segs)) |
| part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs); |
| subreq->len = part; |
| subreq->transferred = 0; |
| len -= part; |
| start += part; |
| if (len && subreq == to && |
| __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags)) |
| boundary = true; |
| |
| netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); |
| netfs_reissue_write(stream, subreq, &source); |
| if (subreq == to) |
| break; |
| } |
| |
| /* If we managed to use fewer subreqs, we can discard the |
| * excess; if we used the same number, then we're done. |
| */ |
| if (!len) { |
| if (subreq == to) |
| continue; |
| list_for_each_entry_safe_from(subreq, tmp, |
| &stream->subrequests, rreq_link) { |
| trace_netfs_sreq(subreq, netfs_sreq_trace_discard); |
| list_del(&subreq->rreq_link); |
| netfs_put_subrequest(subreq, netfs_sreq_trace_put_done); |
| if (subreq == to) |
| break; |
| } |
| continue; |
| } |
| |
| /* We ran out of subrequests, so we need to allocate some more |
| * and insert them after. |
| */ |
| do { |
| subreq = netfs_alloc_subrequest(wreq, stream->source); |
| subreq->start = start; |
| subreq->debug_index = atomic_inc_return(&wreq->subreq_counter); |
| subreq->stream_nr = stream->stream_nr; |
| subreq->retry_count = 1; |
| |
| trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, |
| refcount_read(&subreq->ref), |
| netfs_sreq_trace_new); |
| netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); |
| |
| list_add(&subreq->rreq_link, &to->rreq_link); |
| to = list_next_entry(to, rreq_link); |
| trace_netfs_sreq(subreq, netfs_sreq_trace_retry); |
| |
| stream->sreq_max_len = len; |
| stream->sreq_max_segs = INT_MAX; |
| switch (stream->source) { |
| case NETFS_UPLOAD_TO_SERVER: |
| netfs_stat(&netfs_n_wh_upload); |
| stream->sreq_max_len = umin(len, wreq->wsize); |
| break; |
| case NETFS_WRITE_TO_CACHE: |
| netfs_stat(&netfs_n_wh_write); |
| break; |
| default: |
| WARN_ON_ONCE(1); |
| } |
| |
| stream->prepare_write(subreq); |
| |
| part = umin(len, stream->sreq_max_len); |
| subreq->len = subreq->transferred + part; |
| len -= part; |
| start += part; |
| if (!len && boundary) { |
| __set_bit(NETFS_SREQ_BOUNDARY, &to->flags); |
| boundary = false; |
| } |
| |
| netfs_reissue_write(stream, subreq, &source); |
| if (!len) |
| break; |
| |
| } while (len); |
| |
| } while (!list_is_head(next, &stream->subrequests)); |
| } |
| |
| /* |
| * Perform retries on the streams that need it. If we're doing content |
| * encryption and the server copy changed due to a third-party write, we may |
| * need to do an RMW cycle and also rewrite the data to the cache. |
| */ |
| void netfs_retry_writes(struct netfs_io_request *wreq) |
| { |
| struct netfs_io_stream *stream; |
| int s; |
| |
| netfs_stat(&netfs_n_wh_retry_write_req); |
| |
| /* Wait for all outstanding I/O to quiesce before performing retries as |
| * we may need to renegotiate the I/O sizes. |
| */ |
| set_bit(NETFS_RREQ_RETRYING, &wreq->flags); |
| for (s = 0; s < NR_IO_STREAMS; s++) { |
| stream = &wreq->io_streams[s]; |
| if (stream->active) |
| netfs_wait_for_in_progress_stream(wreq, stream); |
| } |
| clear_bit(NETFS_RREQ_RETRYING, &wreq->flags); |
| |
| // TODO: Enc: Fetch changed partial pages |
| // TODO: Enc: Reencrypt content if needed. |
| // TODO: Enc: Wind back transferred point. |
| // TODO: Enc: Mark cache pages for retry. |
| |
| for (s = 0; s < NR_IO_STREAMS; s++) { |
| stream = &wreq->io_streams[s]; |
| if (stream->need_retry) { |
| stream->need_retry = false; |
| netfs_retry_write_stream(wreq, stream); |
| } |
| } |
| } |
| |
| /* |
| * Perform a read to a buffer from the server, slicing up the region to be read |
| * according to the network rsize. |
| */ |
| static bool netfs_rmw_read_one(struct netfs_io_request *rreq, |
| unsigned long long start, size_t len) |
| { |
| int ret = 0; |
| |
| rreq->start = start; |
| rreq->len = len; |
| atomic64_set(&rreq->issued_to, 0); |
| |
| do { |
| struct netfs_io_subrequest *subreq; |
| ssize_t slice; |
| |
| subreq = netfs_alloc_subrequest(rreq, NETFS_DOWNLOAD_FROM_SERVER); |
| if (!subreq) { |
| ret = -ENOMEM; |
| break; |
| } |
| |
| subreq->start = start; |
| subreq->len = len; |
| |
| spin_lock(&rreq->lock); |
| list_add_tail(&subreq->rreq_link, &rreq->enc_subrequests); |
| trace_netfs_sreq(subreq, netfs_sreq_trace_added); |
| spin_unlock(&rreq->lock); |
| |
| netfs_stat(&netfs_n_rh_download); |
| if (rreq->netfs_ops->prepare_read) { |
| ret = rreq->netfs_ops->prepare_read(subreq); |
| if (ret < 0) { |
| netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); |
| break; |
| } |
| } |
| |
| netfs_prepare_dio_read_iterator(subreq, rreq->crypto_bsize); |
| slice = subreq->len; |
| rreq->netfs_ops->issue_read(subreq); |
| |
| len -= slice; |
| start += slice; |
| atomic64_add(slice, &rreq->issued_to); |
| |
| if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && |
| test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags)) |
| break; |
| cond_resched(); |
| } while (len > 0); |
| |
| return ret == 0 && atomic64_read(&rreq->issued_to) == rreq->len; |
| } |
| |
| /* |
| * Begin the process of reading in one or two chunks of data for use by |
| * unbuffered write to perform an RMW cycle. We don't read directly into the |
| * write buffer as this may get called to redo the read in the case that a |
| * conditional write fails due to conflicting 3rd-party modifications. |
| */ |
| ssize_t netfs_rmw_read(struct netfs_io_request *wreq, struct file *file, |
| unsigned long long start1, size_t len1, |
| unsigned long long start2, size_t len2) |
| { |
| struct netfs_io_request *rreq; |
| unsigned long long block1, block2, b1start, b2start, start; |
| ssize_t ret; |
| size_t bsize = wreq->crypto_bsize, len, bufsize = bsize; |
| unsigned int bshift = ilog2(bsize); |
| |
| _enter("RMW:R=%x %llx-%llx %llx-%llx", |
| wreq->debug_id, start1, start1 + len1 - 1, start2, start2 + len2 - 1); |
| |
| block1 = start1 >> bshift; |
| block2 = start2 >> bshift; |
| b1start = block1 << bshift; |
| b2start = block2 << bshift; |
| |
| if (len1 && !len2) { |
| start = b1start; |
| len = bsize; |
| } else if (!len1 && len2) { |
| start = b2start; |
| len = bsize; |
| } else if (block1 == block2) { |
| start = b1start; |
| len = bsize; |
| } else { |
| start = b1start; |
| len = b2start - b1start + bsize; |
| bufsize = bsize * 2; |
| } |
| |
| rreq = netfs_alloc_request(wreq->mapping, file, start, len, NETFS_RMW_READ); |
| if (IS_ERR(rreq)) |
| return PTR_ERR(rreq); |
| |
| __set_bit(NETFS_RREQ_USE_BOUNCE_BUFFER, &rreq->flags); |
| __set_bit(NETFS_RREQ_CONTENT_ENCRYPTION, &rreq->flags); |
| __set_bit(NETFS_RREQ_CRYPT_IN_PLACE, &rreq->flags); |
| |
| /* Allocate a bounce buffer to hold both ends. If the block size is |
| * less than PAGE_SIZE, we'll pack them into the same folio. |
| */ |
| ret = rolling_buffer_init(&rreq->bounce, rreq->debug_id, ITER_DEST); |
| if (ret < 0) |
| goto error; |
| ret = netfs_alloc_bounce(rreq, bufsize, GFP_KERNEL); |
| if (ret < 0) |
| goto error; |
| |
| /* Chop the reads into slices according to what the netfs wants and |
| * submit each one. We can merge the reads if they are adjacent. |
| */ |
| if (len1 && len2 && b2start == b1start) { |
| netfs_rmw_read_one(rreq, b1start, bsize); |
| } else if (len1 && len2 && b2start == b1start + bsize) { |
| netfs_rmw_read_one(rreq, b1start, bsize * 2); |
| } else { |
| if (len1 && !netfs_rmw_read_one(rreq, b1start, bsize)) |
| goto wait; |
| if (len2) |
| netfs_rmw_read_one(rreq, b2start, bsize); |
| } |
| |
| wait: |
| ret = netfs_wait_for_read(rreq); |
| if (ret >= 0) { |
| struct iov_iter *bounce = &rreq->bounce.iter; |
| struct folio *folio1 = folioq_folio(wreq->bounce.tail, 0); |
| struct folio *folio2 = wreq->rmw_tail; |
| |
| bounce->data_source = ITER_SOURCE; |
| iov_iter_revert(bounce, bufsize - bounce->count); |
| |
| if (len1) { |
| ret = -EIO; |
| if (copy_folio_from_iter(folio1, offset_in_folio(folio1, start1), |
| len1, bounce) != len1) |
| goto error; |
| } |
| |
| if (len2) { |
| iov_iter_advance(bounce, bufsize - len1 - len2); |
| if (copy_folio_from_iter(folio2, offset_in_folio(folio2, start2), |
| len2, bounce) != len2) |
| goto error; |
| } |
| ret = 0; |
| } |
| |
| error: |
| netfs_put_request(rreq, netfs_rreq_trace_put_return); |
| return ret; |
| } |