| // SPDX-License-Identifier: GPL-2.0-only |
| /* Network filesystem high-level write support. |
| * |
| * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved. |
| * Written by David Howells (dhowells@redhat.com) |
| */ |
| |
| #include <linux/export.h> |
| #include <linux/fs.h> |
| #include <linux/mm.h> |
| #include <linux/pagemap.h> |
| #include <linux/slab.h> |
| #include <linux/backing-dev.h> |
| #include "internal.h" |
| |
| /* Round up the last page of a region where the range is inclusive. */ |
| #define round_up_incl(x, to) (round_up((x) + 1, (to)) - 1) |
| |
| static size_t copy_folio_from_iter_atomic(struct folio *folio, |
| unsigned int offset, size_t size, |
| struct iov_iter *i) |
| { |
| size_t copied = 0, n; |
| |
| do { |
| unsigned int index = offset / PAGE_SIZE; |
| unsigned int poffset = offset % PAGE_SIZE; |
| unsigned int psize = min(PAGE_SIZE - offset, size); |
| |
| n = copy_page_from_iter_atomic(folio_file_page(folio, index), |
| poffset, psize, i); |
| copied += n; |
| if (n < psize) |
| break; |
| size -= n; |
| } while (size); |
| return copied; |
| } |
| |
| /* |
| * Initialise a new dirty folio group. We have to round it out to any crypto |
| * alignment. |
| */ |
| static void netfs_init_dirty_region(struct netfs_i_context *ctx, |
| struct netfs_dirty_region *region, |
| struct file *file, |
| loff_t start, size_t len) |
| { |
| region->from = start; |
| region->to = start + len; |
| region->debug_id = atomic_inc_return(&netfs_region_debug_ids); |
| |
| if (file && ctx->ops->init_dirty_region) |
| ctx->ops->init_dirty_region(region, file); |
| |
| trace_netfs_ref_region(region->debug_id, refcount_read(®ion->ref), |
| netfs_region_trace_new); |
| } |
| |
| /* |
| * Return true if two dirty regions are compatible such that b can be merged |
| * onto the end of a. |
| */ |
| bool netfs_are_regions_mergeable(struct netfs_i_context *ctx, |
| struct netfs_dirty_region *a, |
| struct netfs_dirty_region *b) |
| { |
| if (!netfs_mas_is_valid(a) || !netfs_mas_is_valid(b)) |
| return a == b; |
| if (b->waiting_on_wb != a->waiting_on_wb) |
| return false; |
| if (b->from != a->to && |
| b->from < ctx->zero_point) |
| return false; |
| if (ctx->ops->are_regions_mergeable) |
| return ctx->ops->are_regions_mergeable(ctx, a, b); |
| return true; |
| } |
| |
| /* |
| * Subsume the modifications into an existing target region. Returns true if |
| * we need to update the dirty_regions tree. |
| */ |
| static bool netfs_subsume_into_existing(struct netfs_i_context *ctx, |
| struct folio *folio, |
| struct ma_state *mas, |
| struct netfs_dirty_region **_target, |
| struct netfs_dirty_region **_to_put, |
| pgoff_t *_first, pgoff_t *_last, |
| size_t offset, size_t len) |
| { |
| struct netfs_dirty_region *target = *_target, *prev; |
| |
| target->from = min(target->from, folio_pos(folio) + offset); |
| target->to = max(target->to, folio_pos(folio) + offset + len); |
| trace_netfs_dirty(ctx, target, NULL, *_first, *_last, |
| netfs_dirty_trace_modified); |
| |
| /* We might have bridged to the previous region also. */ |
| prev = mas_prev(mas, *_first - 1); |
| if (!netfs_mas_is_valid(prev)) |
| return false; |
| |
| if (prev->to != target->from || |
| prev->waiting_on_wb != target->waiting_on_wb) |
| return false; |
| |
| *_first = mas->index; |
| prev->to = target->to; |
| *_to_put = target; |
| trace_netfs_dirty(ctx, prev, NULL, *_first, *_last, |
| netfs_dirty_trace_merged_prev); |
| return true; |
| } |
| |
| /* |
| * Try to continue modification of a preceding region. The target region must |
| * not be covered by a non-flushing dirty region. |
| */ |
| static bool netfs_continue_modification(struct netfs_i_context *ctx, |
| struct folio *folio, |
| struct ma_state *mas, |
| struct netfs_dirty_region **_target, |
| struct netfs_dirty_region **_to_put, |
| pgoff_t *_first, pgoff_t *_last, |
| size_t offset, size_t len) |
| { |
| struct netfs_dirty_region *prev, *target = *_target; |
| pgoff_t first = *_first, last = *_last; |
| unsigned long long from = first * PAGE_SIZE + offset; |
| unsigned long long to = from + len; |
| enum netfs_dirty_trace why; |
| |
| if (first == 0) |
| return false; |
| |
| mas_set(mas, first - 1); |
| prev = mas_walk(mas); |
| if (!netfs_mas_is_valid(prev)) { |
| _debug("noncont %lx-%lx %lx", mas->index, mas->last, first); |
| return false; |
| } |
| |
| _debug("cont [D=%x] %lx-%lx %lx", prev->debug_id, mas->index, mas->last, first - 1); |
| |
| /* The regions touch. The previous region and the target region can |
| * only be combined if they have the same writeback overlay state. |
| */ |
| if (prev->waiting_on_wb) { |
| if (!netfs_mas_is_valid(target) || |
| (prev->waiting_on_wb != target->waiting_on_wb && |
| prev->waiting_on_wb != target)) |
| return false; |
| } |
| |
| /* The regions must also overlap, touch or be bridgeable. */ |
| if (from > prev->to && |
| prev->to < ctx->zero_point) |
| return false; |
| |
| first = mas->index; |
| if (target == NETFS_COPY_TO_CACHE) { |
| _debug("supersede cache-copy"); |
| prev->to = to; |
| target = NULL; |
| why = netfs_dirty_trace_supersede; |
| } else if (netfs_mas_is_flushing(prev)) { |
| _debug("overlay"); |
| prev->to = to; |
| target = NULL; |
| why = netfs_dirty_trace_overlay_flush; |
| } else if (target) { |
| _debug("bridged"); |
| *_to_put = target; |
| prev->to = max(target->to, to); |
| why = netfs_dirty_trace_bridged; |
| } else { |
| _debug("no conflict"); |
| prev->to = to; |
| target = NULL; |
| why = netfs_dirty_trace_continue; |
| } |
| |
| *_target = prev; |
| *_first = first; |
| *_last = last; |
| trace_netfs_dirty(ctx, prev, target, first, last, why); |
| return true; |
| } |
| |
| /* |
| * Insert a new region. The space it occupies may be blank, being flushed or |
| * require superseding. |
| */ |
| static void netfs_add_new_region(struct netfs_i_context *ctx, |
| struct folio *folio, |
| struct ma_state *mas, |
| struct netfs_dirty_region *target, |
| struct netfs_dirty_region *old, |
| struct netfs_dirty_region **_to_put, |
| pgoff_t first, pgoff_t last, |
| size_t offset, size_t len) |
| { |
| _debug("insert %lx-%lx,%zx,%zx", first, last, offset, len); |
| |
| /* See if there's anything occupying the region we're moving into. */ |
| if (old) { |
| //pgoff_t align = 1 << old->align_order; |
| |
| //_debug("old [D=%x] %lx-%lx", (old&~1)->debug_id, first, last); |
| |
| /* If the region we're superseding is being written out at the |
| * moment, then mark this one as being unflushable for the |
| * moment and non-mergeable with regions that aren't overlying |
| * flushes. |
| */ |
| if (netfs_mas_is_flushing(old)) { |
| target->waiting_on_wb = old; |
| } else if (old == NETFS_COPY_TO_CACHE) { |
| trace_netfs_dirty(ctx, target, old, first, last, |
| netfs_dirty_trace_supersede); |
| } else { |
| kdebug("*** OLD %px", old); |
| WARN_ON(1); |
| } |
| } else { |
| trace_netfs_dirty(ctx, target, NULL, first, last, |
| netfs_dirty_trace_insert); |
| } |
| } |
| |
| /* |
| * Commit the changes to a folio. The internal structure of the dirty_regions |
| * tree must have been preallocated before any changes were made as we must not |
| * fail. |
| */ |
| static void netfs_commit_folio(struct netfs_i_context *ctx, |
| struct file *file, |
| struct netfs_dirty_region **_spare_region, |
| struct ma_state *mas, |
| struct folio *folio, |
| size_t offset, |
| size_t len) |
| { |
| struct netfs_dirty_region *old, *target, *to_put = NULL; |
| unsigned long long i_size, end, up; |
| pgoff_t first, last; |
| size_t balign = 1UL << ctx->min_bshift; |
| size_t tmp; |
| bool re_store = false; |
| |
| i_size = i_size_read(netfs_inode(ctx)); |
| end = folio_pos(folio) + offset + len; |
| tmp = offset; |
| offset = round_down(offset, balign); |
| len += tmp - offset; |
| if (end < i_size) { |
| up = min(round_up(end, balign), i_size); |
| len += up - end; |
| } |
| |
| _enter("e=%llx o=%lx l=%lx", end, offset, len); |
| |
| mtree_lock(&ctx->dirty_regions); |
| |
| /* First thing to check is whether there's a region covering the area |
| * of interest. If so, we may be able to simply subsume into it. |
| */ |
| mas_set(mas, folio->index); |
| target = mas_walk(mas); |
| first = mas->index; |
| last = mas->last; |
| if (!target) { |
| first = folio->index; |
| last = folio_next_index(folio) - 1; |
| mas_set_range(mas, first, last); |
| } |
| |
| if (netfs_mas_is_valid(target)) { |
| re_store = netfs_subsume_into_existing(ctx, folio, mas, &target, &to_put, |
| &first, &last, offset, len); |
| goto done; |
| } |
| |
| /* See if we can continue the previous region. */ |
| if (netfs_continue_modification(ctx, folio, mas, &target, &to_put, |
| &first, &last, offset, len)) { |
| re_store = true; |
| goto done; |
| } |
| |
| old = target; |
| first = folio->index; |
| last = folio_next_index(folio) - 1; |
| target = *_spare_region; |
| *_spare_region = NULL; |
| netfs_init_dirty_region(ctx, target, file, first * PAGE_SIZE + offset, len); |
| netfs_add_new_region(ctx, folio, mas, target, old, &to_put, |
| first, last, offset, len); |
| re_store = true; |
| |
| done: |
| if (re_store) { |
| _debug("store [D=%x] %lx-%lx", target->debug_id, first, last); |
| mas_set_range(mas, first, last); |
| mas_store(mas, target); |
| } |
| mtree_unlock(&ctx->dirty_regions); |
| netfs_put_dirty_region(ctx, to_put, netfs_region_trace_put_merged); |
| } |
| |
| /* |
| * See if we can merge the regions we just added with their neighbours at the |
| * end of the write. |
| */ |
| static void netfs_commit_region(struct netfs_i_context *ctx, struct ma_state *mas, |
| loff_t start, size_t len) |
| { |
| struct netfs_dirty_region *region, *prev, *next, *put[2] = {}; |
| pgoff_t first = start / PAGE_SIZE; |
| pgoff_t last = (start + len - 1) / PAGE_SIZE; |
| |
| _enter("%lx-%lx", first, last); |
| |
| if (mas_expected_entries(mas, 1) < 0) |
| return; |
| |
| mtree_lock(&ctx->dirty_regions); |
| |
| /* See if we can merge the front with the previous region. */ |
| mas_set(mas, first); |
| if (first > 0 && |
| (region = mas_walk(mas)) && |
| netfs_mas_is_valid(region) && |
| (first = mas->index) && |
| (last = mas->last) && |
| (prev = mas_prev(mas, mas->index - 1)) && |
| netfs_mas_is_valid(prev) |
| ) { |
| _debug("prev [D=%x] %lx-%lx [D=%x] %lx-%lx", |
| prev->debug_id, mas->index, mas->last, |
| region->debug_id, first, last); |
| |
| if (netfs_are_regions_mergeable(ctx, prev, region)) { |
| _debug("- merge"); |
| first = mas->index; |
| prev->to = region->to; |
| trace_netfs_dirty(ctx, prev, region, first, last, |
| netfs_dirty_trace_merged_prev); |
| mas_set_range(mas, first, last); |
| mas_store(mas, prev); |
| put[0] = region; |
| } |
| } |
| |
| mtree_unlock(&ctx->dirty_regions); |
| |
| if (mas_expected_entries(mas, 1) < 0) |
| return; |
| |
| mtree_lock(&ctx->dirty_regions); |
| |
| /* See if we can merge the end with the next region. */ |
| mas_set_range(mas, last, last); |
| if (last != ULONG_MAX && |
| (region = mas_walk(mas)) && |
| netfs_mas_is_valid(region) && |
| (first = mas->index, true) && |
| (last = mas->last) != ULONG_MAX && |
| (next = mas_next(mas, mas->last + 1)) && |
| netfs_mas_is_valid(next) |
| ) { |
| _debug("next [D=%x] %lx-%lx [D=%x] %lx-%lx", |
| region->debug_id, first, last, |
| next->debug_id, mas->index, mas->last); |
| |
| if (!region->waiting_on_wb && |
| netfs_are_regions_mergeable(ctx, region, next)) { |
| _debug("- annex next"); |
| last = mas->last; |
| region->to = next->to; |
| trace_netfs_dirty(ctx, region, next, first, last, |
| netfs_dirty_trace_merged_next); |
| mas_set_range(mas, first, last); |
| mas_store(mas, region); |
| put[1] = next; |
| } |
| } |
| |
| mtree_unlock(&ctx->dirty_regions); |
| |
| netfs_put_dirty_region(ctx, put[0], netfs_region_trace_put_merged); |
| netfs_put_dirty_region(ctx, put[1], netfs_region_trace_put_merged); |
| } |
| |
| enum netfs_handle_nonuptodate { |
| NETFS_FOLIO_IS_UPTODATE, /* Folio is uptodate already */ |
| NETFS_JUST_PREFETCH, /* We have to read the folio anyway */ |
| NETFS_WHOLE_FOLIO_MODIFY, /* We're going to overwrite the whole folio */ |
| NETFS_MODIFY_AND_CLEAR, /* We can assume there is no data to be downloaded. */ |
| }; |
| |
| /* |
| * Decide how we should handle a non-uptodate folio that we want to modify. We |
| * might be attempting to do write-streaming, in which case we don't want to a |
| * local RMW cycle if we can avoid it. If we're doing local caching or content |
| * crypto, we award that priority over avoiding RMW. If the file is open |
| * readably, then we also assume that we may want to written what we wrote. |
| */ |
| static enum netfs_handle_nonuptodate netfs_handle_nonuptodate_folio(struct netfs_i_context *ctx, |
| struct file *file, |
| struct folio *folio, |
| size_t offset, |
| size_t len, |
| bool always_fill) |
| { |
| size_t min_bsize = 1UL << ctx->min_bshift; |
| loff_t pos = folio_file_pos(folio); |
| |
| _enter("f=%lx,z=%llx", ctx->flags, ctx->zero_point); |
| |
| if (folio_test_uptodate(folio)) |
| return NETFS_FOLIO_IS_UPTODATE; |
| |
| if (pos >= ctx->zero_point) |
| return NETFS_MODIFY_AND_CLEAR; |
| |
| if (always_fill) |
| return NETFS_JUST_PREFETCH; |
| |
| if (offset == 0 && |
| len >= folio_size(folio) && |
| len >= min_bsize) |
| return NETFS_WHOLE_FOLIO_MODIFY; |
| |
| if (file->f_mode & FMODE_READ || |
| test_bit(NETFS_ICTX_ENCRYPTED, &ctx->flags) || |
| test_bit(NETFS_ICTX_DO_RMW, &ctx->flags)) |
| return NETFS_JUST_PREFETCH; |
| |
| if (netfs_i_cookie(file_inode(file)) || |
| min_bsize > 0) |
| return NETFS_JUST_PREFETCH; |
| |
| /* TODO: Handle streaming writes where we avoid doing client-side RMW |
| * by not bringing pages fully uptodate. |
| * |
| * TODO: Consider doing a streaming write if we're about to completely |
| * overwrite a number of blocks. Could also do a streaming write if |
| * we're willing to do one or more reads to fill up the edges of a |
| * partially modified block prior to writing it back. |
| */ |
| return NETFS_JUST_PREFETCH; |
| } |
| |
| /* |
| * Grab a folio for writing. We don't lock it at this point as we have yet to |
| * preemptively trigger a fault-in - but we need to know how large the folio |
| * will be before we try that. |
| */ |
| static struct folio *netfs_grab_folio_for_write(struct address_space *mapping, |
| pgoff_t index, size_t len_remaining) |
| { |
| return __filemap_get_folio(mapping, index, |
| FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE, |
| mapping_gfp_mask(mapping)); |
| } |
| |
| /* |
| * Write data into a prereserved region of the pagecache attached to a netfs |
| * inode. |
| */ |
| static ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter) |
| { |
| struct netfs_dirty_region *spare_region = NULL; |
| struct file *file = iocb->ki_filp; |
| struct inode *inode = file_inode(file); |
| struct netfs_i_context *ctx = netfs_i_context(inode); |
| struct folio *folio; |
| enum netfs_handle_nonuptodate nupt; |
| ssize_t written = 0, ret; |
| loff_t i_size, pos = iocb->ki_pos; |
| bool always_fill = false; |
| bool locked = false; |
| |
| MA_STATE(mas, &ctx->dirty_regions, pos / PAGE_SIZE, |
| (pos + iov_iter_count(iter) - 1) / PAGE_SIZE); |
| |
| ret = ctx->ops->validate_for_write(inode, file); |
| if (ret < 0) |
| return ret; |
| |
| do { |
| size_t plen; |
| size_t offset; /* Offset into pagecache folio */ |
| size_t bytes; /* Bytes to write to folio */ |
| size_t copied; /* Bytes copied from user */ |
| |
| folio = netfs_grab_folio_for_write(file->f_mapping, |
| pos / PAGE_SIZE, |
| iov_iter_count(iter)); |
| if (!folio) { |
| ret = -ENOMEM; |
| goto out; |
| } |
| |
| plen = folio_size(folio); |
| offset = pos - folio_file_pos(folio); |
| bytes = min_t(size_t, plen - offset, iov_iter_count(iter)); |
| locked = true; |
| |
| if (!folio_test_uptodate(folio)) { |
| folio_unlock(folio); /* Avoid deadlocking fault-in */ |
| locked = false; |
| } |
| |
| /* Bring in the user page that we will copy from _first_. |
| * Otherwise there's a nasty deadlock on copying from the same |
| * page as we're writing to, without it being marked |
| * up-to-date. |
| * |
| * Not only is this an optimisation, but it is also required to |
| * check that the address is actually valid, when atomic |
| * usercopies are used, below. |
| */ |
| if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { |
| ret = -EFAULT; |
| goto error_folio; |
| } |
| |
| if (fatal_signal_pending(current)) { |
| ret = -EINTR; |
| goto error_folio; |
| } |
| |
| if (!locked) { |
| ret = folio_lock_killable(folio); |
| if (ret < 0) |
| goto error_folio; |
| } |
| |
| redo_prefetch: |
| /* See if we need to prefetch the area we're going to modify. |
| * We need to do this before we get a lock on the folio in case |
| * there's more than one writer competing for the same cache |
| * block. |
| */ |
| nupt = netfs_handle_nonuptodate_folio(ctx, file, folio, |
| offset, bytes, always_fill); |
| _debug("nupt %u", nupt); |
| switch (nupt) { |
| case NETFS_JUST_PREFETCH: |
| ret = netfs_prefetch_for_write(file, folio, bytes); |
| if (ret < 0) { |
| _debug("prefetch = %zd", ret); |
| goto error_folio; |
| } |
| nupt = NETFS_FOLIO_IS_UPTODATE; |
| fallthrough; |
| case NETFS_FOLIO_IS_UPTODATE: |
| break; |
| case NETFS_MODIFY_AND_CLEAR: |
| zero_user_segment(&folio->page, 0, offset); |
| break; |
| case NETFS_WHOLE_FOLIO_MODIFY: |
| break; |
| } |
| |
| /* Preallocate the space we need in the dirty region list. */ |
| ret = mas_expected_entries(&mas, 1); |
| if (ret < 0) |
| goto error_folio; |
| |
| if (!spare_region) { |
| spare_region = netfs_alloc_dirty_region(); |
| if (IS_ERR(spare_region)) { |
| ret = PTR_ERR(spare_region); |
| spare_region = NULL; |
| goto error_folio; |
| } |
| } |
| |
| if (mapping_writably_mapped(folio_file_mapping(folio))) |
| flush_dcache_folio(folio); |
| copied = copy_folio_from_iter_atomic(folio, offset, bytes, iter); |
| flush_dcache_folio(folio); |
| |
| /* Deal with a (partially) failed copy */ |
| if (!folio_test_uptodate(folio)) { |
| if (copied == 0) { |
| ret = -EFAULT; |
| goto error_folio; |
| } |
| if (copied < bytes) { |
| iov_iter_revert(iter, copied); |
| always_fill = true; |
| goto redo_prefetch; |
| } |
| switch (nupt) { |
| case NETFS_JUST_PREFETCH: |
| case NETFS_FOLIO_IS_UPTODATE: |
| /* We have the folio locked, so it really ought |
| * to be uptodate. |
| */ |
| WARN(true, "Locked folio %lx became non-uptodate\n", |
| folio_index(folio)); |
| ret = -EIO; |
| goto error_folio; |
| case NETFS_MODIFY_AND_CLEAR: |
| zero_user_segment(&folio->page, offset + copied, plen); |
| fallthrough; |
| case NETFS_WHOLE_FOLIO_MODIFY: |
| folio_mark_uptodate(folio); |
| break; |
| } |
| } |
| |
| /* Update the inode size if we moved the EOF marker */ |
| pos += copied; |
| i_size = i_size_read(inode); |
| if (pos > i_size) { |
| if (ctx->ops->update_i_size) { |
| ctx->ops->update_i_size(inode, pos); |
| } else { |
| i_size_write(inode, pos); |
| fscache_update_cookie(ctx->cache, NULL, &pos); |
| } |
| } |
| |
| netfs_commit_folio(ctx, file, &spare_region, &mas, |
| folio, offset, copied); |
| |
| folio_mark_dirty(folio); |
| folio_unlock(folio); |
| folio_put(folio); |
| folio = NULL; |
| |
| cond_resched(); |
| |
| written += copied; |
| |
| balance_dirty_pages_ratelimited(file->f_mapping); |
| } while (iov_iter_count(iter)); |
| |
| out: |
| if (likely(written)) { |
| netfs_commit_region(ctx, &mas, iocb->ki_pos, written); |
| |
| iocb->ki_pos += written; |
| |
| #if 0 |
| /* Flush and wait for a write that requires immediate synchronisation. */ |
| if (iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) { |
| _debug("dsync"); |
| mtree_lock(&ctx->dirty_regions); |
| netfs_flush_region(ctx, region, netfs_dirty_trace_flush_dsync); |
| mtree_unlock(&ctx->dirty_regions); |
| |
| ret = wait_on_region(region, NETFS_REGION_IS_COMPLETE); |
| if (ret < 0) |
| written = ret; |
| } |
| #endif |
| } |
| |
| mas_destroy(&mas); |
| return written ? written : ret; |
| |
| error_folio: |
| if (locked) |
| folio_unlock(folio); |
| folio_put(folio); |
| goto out; |
| } |
| |
| /** |
| * netfs_file_write_iter - write data to a file |
| * @iocb: IO state structure |
| * @from: iov_iter with data to write |
| * |
| * This is a wrapper around __generic_file_write_iter() to be used by most |
| * filesystems. It takes care of syncing the file in case of O_SYNC file and |
| * acquires i_mutex as needed. |
| * Return: |
| * * negative error code if no data has been written at all of |
| * vfs_fsync_range() failed for a synchronous write |
| * * number of bytes written, even for truncated writes |
| */ |
| ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| { |
| struct file *file = iocb->ki_filp; |
| struct inode *inode = file->f_mapping->host; |
| struct netfs_i_context *ctx = netfs_i_context(inode); |
| ssize_t ret; |
| |
| _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); |
| |
| inode_lock(inode); |
| ret = generic_write_checks(iocb, from); |
| if (ret <= 0) |
| goto error_unlock; |
| |
| trace_netfs_write_iter(iocb, from); |
| |
| /* We can write back this queue in page reclaim */ |
| current->backing_dev_info = inode_to_bdi(inode); |
| ret = file_remove_privs(file); |
| if (ret) |
| goto error; |
| |
| ret = file_update_time(file); |
| if (ret) |
| goto error; |
| |
| ret = netfs_flush_conflicting_writes(ctx, file, iocb->ki_pos, |
| iov_iter_count(from), NULL); |
| if (ret < 0 && ret != -EAGAIN) |
| goto error; |
| |
| if (iocb->ki_flags & IOCB_DIRECT) |
| ret = netfs_direct_write_iter(iocb, from); |
| else |
| ret = netfs_perform_write(iocb, from); |
| |
| error: |
| inode_unlock(inode); |
| /* TODO: Wait for DSYNC region here. */ |
| current->backing_dev_info = NULL; |
| return ret; |
| error_unlock: |
| inode_unlock(inode); |
| return ret; |
| } |
| EXPORT_SYMBOL(netfs_file_write_iter); |
| |
| /* |
| * Notification that a previously read-only page is about to become writable. |
| * Note that the caller indicates a single page of a multipage folio. |
| */ |
| vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf) |
| { |
| struct netfs_dirty_region *spare_region; |
| struct folio *folio = page_folio(vmf->page); |
| struct file *file = vmf->vma->vm_file; |
| struct inode *inode = file_inode(file); |
| struct netfs_i_context *ctx = netfs_i_context(inode); |
| vm_fault_t ret = VM_FAULT_RETRY; |
| int err; |
| |
| MA_STATE(mas, &ctx->dirty_regions, vmf->page->index, PAGE_SIZE); |
| |
| _enter("%lx", folio->index); |
| |
| if (ctx->ops->validate_for_write(inode, file) < 0) |
| return VM_FAULT_SIGBUS; |
| |
| sb_start_pagefault(inode->i_sb); |
| |
| if (folio_wait_writeback_killable(folio)) |
| goto out; |
| |
| if (folio_lock_killable(folio) < 0) |
| goto out; |
| |
| if (mas_expected_entries(&mas, 2) < 0) { |
| ret = VM_FAULT_OOM; |
| goto out; |
| } |
| |
| spare_region = netfs_alloc_dirty_region(); |
| if (IS_ERR(spare_region)) { |
| ret = VM_FAULT_OOM; |
| goto out; |
| } |
| |
| err = netfs_flush_conflicting_writes(ctx, file, folio_pos(folio), |
| folio_size(folio), folio); |
| switch (err) { |
| case 0: |
| break; |
| case -EAGAIN: |
| ret = VM_FAULT_RETRY; |
| goto out; |
| case -ENOMEM: |
| ret = VM_FAULT_OOM; |
| goto out; |
| default: |
| ret = VM_FAULT_SIGBUS; |
| goto out; |
| } |
| |
| netfs_commit_folio(ctx, file, &spare_region, &mas, |
| folio, 0, folio_size(folio)); |
| netfs_commit_region(ctx, &mas, folio_pos(folio), folio_size(folio)); |
| file_update_time(file); |
| |
| ret = VM_FAULT_LOCKED; |
| out: |
| sb_end_pagefault(inode->i_sb); |
| mas_destroy(&mas); |
| netfs_put_dirty_region(ctx, spare_region, netfs_region_trace_put_discard); |
| return ret; |
| } |
| |
| /* |
| * Try to note in the dirty region list that a range of pages needs writing to |
| * the cache. We do this by storing marked entries in the dirty_region list. |
| * Future writes can split the entries and then writepages will gather them up. |
| */ |
| static void netfs_copy_to_cache(struct netfs_io_request *rreq, |
| loff_t start, size_t len) |
| { |
| struct netfs_i_context *ctx = netfs_i_context(rreq->inode); |
| pgoff_t first = start / PAGE_SIZE; |
| pgoff_t last = (start + len - 1) / PAGE_SIZE; |
| pgoff_t hlast; |
| void *x; |
| |
| MA_STATE(mas, &ctx->dirty_regions, first, last); |
| |
| mas_expected_entries(&mas, 1); |
| |
| mtree_lock(&ctx->dirty_regions); |
| |
| /* Find the span of the hole into which we're inserting. */ |
| x = mas_walk(&mas); |
| if (WARN_ON(x != NULL || mas.last < last)) { |
| pr_warn("Unexpected span %lx-%lx < %lx [x=%px]\n", |
| mas.index, mas.last, last, x); |
| goto skip; |
| } |
| |
| hlast = mas.last; |
| if (mas.index == first && mas.index > 0) { |
| x = mas_prev(&mas, first - 1); |
| if (x == NETFS_COPY_TO_CACHE) |
| first = mas.index; |
| } |
| |
| if (hlast == last) { |
| mas_set(&mas, last); |
| x = mas_next(&mas, last + 1); |
| if (x == NETFS_COPY_TO_CACHE) |
| last = mas.last; |
| } |
| |
| trace_netfs_dirty(ctx, NULL, NULL, first, last, |
| netfs_dirty_trace_mark_copy_to_cache); |
| mas_set_range(&mas, first, last); |
| mas_store(&mas, NETFS_COPY_TO_CACHE); |
| |
| skip: |
| mtree_unlock(&ctx->dirty_regions); |
| mas_destroy(&mas); |
| } |
| |
| /* |
| * If we downloaded some data and it now needs writing to the cache, we add it |
| * to the dirty region list and let that flush it. This way it can get merged |
| * with writes. |
| * |
| * We inherit a ref from the caller. |
| */ |
| void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq) |
| { |
| struct netfs_io_subrequest *subreq, *next, *p; |
| |
| trace_netfs_rreq(rreq, netfs_rreq_trace_copy_mark); |
| |
| list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { |
| if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { |
| list_del_init(&subreq->rreq_link); |
| netfs_put_subrequest(subreq, false, |
| netfs_sreq_trace_put_no_copy); |
| } |
| } |
| |
| list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { |
| loff_t start = subreq->start; |
| size_t len = subreq->len; |
| |
| /* Amalgamate adjacent writes */ |
| while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { |
| next = list_next_entry(subreq, rreq_link); |
| if (next->start != start + len) |
| break; |
| len += next->len; |
| list_del_init(&next->rreq_link); |
| netfs_put_subrequest(next, false, |
| netfs_sreq_trace_put_merged); |
| } |
| |
| netfs_copy_to_cache(rreq, start, len); |
| } |
| |
| netfs_rreq_completed(rreq, false); |
| } |