| /* VFS-based union mounts for Linux |
| * |
| * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH. |
| * Copyright (C) 2007-2009 Novell Inc. |
| * Copyright (C) 2009-2012 Red Hat, Inc. |
| * |
| * Author(s): Jan Blunck (j.blunck@tu-harburg.de) |
| * Valerie Aurora <vaurora@redhat.com> |
| * David Howells <dhowells@redhat.com> |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License |
| * as published by the Free Software Foundation; version 2 |
| * of the License. |
| */ |
| #define DEBUG |
| #include <linux/module.h> |
| #include <linux/fs.h> |
| #include <linux/mount.h> |
| #include <linux/fs_struct.h> |
| #include <linux/slab.h> |
| #include <linux/fsnotify.h> |
| #include <linux/xattr.h> |
| #include <linux/file.h> |
| #include <linux/security.h> |
| #include <linux/splice.h> |
| |
| #include "internal.h" |
| #include "union.h" |
| |
| /** |
| * union_alloc - allocate a union stack |
| * @path: path of topmost directory |
| * |
| * Allocate a union_stack large enough to contain the maximum number |
| * of layers in this union mount. |
| */ |
| struct union_stack *union_alloc(struct path *topmost) |
| { |
| unsigned int layers = topmost->dentry->d_sb->s_union_count; |
| return kcalloc(sizeof(struct path), layers, GFP_KERNEL); |
| } |
| |
| /** |
| * d_free_unions - free all unions for this dentry |
| * @dentry: topmost dentry in the union stack to remove |
| * |
| * This must be called when freeing a dentry. d_inode may point to a defunct |
| * inode or may have been cleared by the time we get here. |
| */ |
| void d_free_unions(struct dentry *topmost) |
| { |
| struct path *path; |
| unsigned int i, layers = topmost->d_sb->s_union_count; |
| |
| if (topmost->d_union_stack) { |
| if (topmost->d_flags & DCACHE_UNION_PINNING_LOWER) { |
| /* A negative non-dir upper dentry is pinning |
| * a single lower dentry so that f_inode |
| * doesn't have to. |
| */ |
| printk("free pin: %pq\n", &topmost->d_name); |
| dput(topmost->d_fallthru); |
| } else { |
| /* A positive directory dentry is pinning a |
| * stack of lower dirs. |
| */ |
| printk("free dirstack: %pq\n", &topmost->d_name); |
| |
| for (i = 0; i < layers; i++) { |
| path = union_find_dir(topmost, i); |
| if (path->mnt) |
| path_put(path); |
| } |
| kfree(topmost->d_union_stack); |
| } |
| topmost->d_union_stack = NULL; |
| } |
| } |
| |
| /** |
| * union_add_dir - Add another layer to a unioned directory |
| * @topmost: topmost directory |
| * @lower: directory in the current layer |
| * @layer: index of layer to add this at |
| * |
| * @layer counts starting at 0 for the dir below the topmost dir. |
| * |
| * This transfers the caller's references to the constituents of *lower to the |
| * union stack. |
| */ |
| int union_add_dir(struct path *topmost, struct path *lower, unsigned layer) |
| { |
| struct dentry *dentry = topmost->dentry; |
| struct path *path; |
| |
| BUG_ON(layer >= dentry->d_sb->s_union_count); |
| BUG_ON(d_is_fallthru(dentry)); |
| |
| if (!dentry->d_union_stack) |
| dentry->d_union_stack = union_alloc(topmost); |
| if (!dentry->d_union_stack) |
| return -ENOMEM; |
| |
| path = union_find_dir(dentry, layer); |
| *path = *lower; |
| return 0; |
| } |
| |
| /** |
| * union_copyup_xattr |
| * @new: dentry of new copy |
| * @old: dentry of original file |
| * |
| * Copy up extended attributes from the original file to the new one. |
| * |
| * XXX - Permissions? For now, copying up every xattr. |
| */ |
| static int union_copyup_xattr(struct path *new, struct dentry *old) |
| { |
| ssize_t list_size, size; |
| char *buf, *name, *value; |
| int error; |
| |
| /* Check for xattr support */ |
| if (!old->d_inode->i_op->getxattr || |
| !new->dentry->d_inode->i_op->getxattr) |
| return 0; |
| |
| /* Find out how big the list of xattrs is */ |
| list_size = vfs_listxattr(old, NULL, 0); |
| if (list_size <= 0) |
| return list_size; |
| |
| /* Allocate memory for the list */ |
| buf = kzalloc(list_size, GFP_KERNEL); |
| if (!buf) |
| return -ENOMEM; |
| |
| /* Allocate memory for the xattr's value */ |
| error = -ENOMEM; |
| value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); |
| if (!value) |
| goto out; |
| |
| /* Actually get the list of xattrs */ |
| list_size = vfs_listxattr(old, buf, list_size); |
| if (list_size <= 0) { |
| error = list_size; |
| goto out_free_value; |
| } |
| |
| for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { |
| /* XXX Locking? old is on read-only fs */ |
| size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); |
| if (size <= 0) { |
| error = size; |
| goto out_free_value; |
| } |
| /* XXX do we really need to check for size overflow? */ |
| /* XXX locks new dentry, lock ordering problems? */ |
| error = vfs_setxattr(new, name, value, size, 0); |
| if (error) |
| goto out_free_value; |
| } |
| |
| out_free_value: |
| kfree(value); |
| out: |
| kfree(buf); |
| return error; |
| } |
| |
| /** |
| * union_create_topmost_dir - Create a matching dir in the topmost file system |
| * @parent - parent of target on topmost layer |
| * @topmost - path of target on topmost layer |
| * @lower - path of source on lower layer |
| * |
| * As we lookup each directory on the lower layer of a union, we create a |
| * matching directory on the topmost layer if it does not already exist. |
| * |
| * We don't use vfs_mkdir() for a few reasons: don't want to do the security |
| * check, don't want to make the dir opaque, don't need to sanitize the mode. |
| * |
| * The caller must hold the parent i_mutex lock and the mnt_want_write lock. |
| * |
| * XXX - owner is wrong, set credentials properly |
| * XXX - rmdir() directory on failure of xattr copyup |
| * XXX - not atomic w/ respect to crash |
| */ |
| int union_create_topmost_dir(struct path *parent, |
| struct path *topmost, struct dentry *lower) |
| { |
| struct inode *dir = parent->dentry->d_inode; |
| int mode = lower->d_inode->i_mode; |
| int error; |
| |
| BUG_ON(topmost->dentry->d_inode); |
| |
| /* XXX - Do we even need to check this? */ |
| if (!dir->i_op->mkdir) |
| return -EPERM; |
| |
| error = dir->i_op->mkdir(dir, topmost->dentry, mode); |
| if (error) |
| return error; |
| |
| error = union_copyup_xattr(topmost, lower); |
| if (error) |
| goto out_rmdir; |
| |
| fsnotify_mkdir(dir, topmost->dentry); |
| return 0; |
| |
| out_rmdir: |
| /* XXX rm created dir */ |
| dput(topmost->dentry); |
| return error; |
| } |
| |
| struct union_iterate_context { |
| struct dir_context ctx; |
| struct dentry *topmost_dentry; |
| int error; |
| }; |
| |
| /** |
| * union_copyup_one_dirent - copy up a single directory entry |
| * |
| * Individual directory entry copyup function for union_copyup_dir. |
| * We get the entries from higher level layers first. |
| */ |
| static int union_copyup_one_dirent(void *buf, const char *name, int namelen, |
| loff_t offset, u64 ino, unsigned int d_type) |
| { |
| struct union_iterate_context *uic = (struct union_iterate_context *)buf; |
| struct dentry *topmost_dentry = uic->topmost_dentry; |
| struct dentry *dentry; |
| int err = 0; |
| |
| switch (namelen) { |
| case 2: |
| if (name[1] != '.') |
| break; |
| case 1: |
| if (name[0] != '.') |
| break; |
| return 0; |
| } |
| |
| /* Lookup this entry in the topmost directory */ |
| dentry = lookup_one_len(name, topmost_dentry, namelen); |
| |
| if (IS_ERR(dentry)) { |
| printk(KERN_WARNING "%s: error looking up %*.*s\n", |
| __func__, namelen, namelen, name); |
| err = PTR_ERR(dentry); |
| goto out; |
| } |
| |
| /* XXX do we need to revalidate on readdir anyway? think NFS */ |
| if (dentry->d_op && dentry->d_op->d_revalidate) |
| goto fallthru; |
| |
| /* If the entry already exists, one of the following is true: it was |
| * already copied up (due to an earlier lookup), an entry with the same |
| * name already exists on the topmost file system, it is a whiteout, or |
| * it is a fallthru. In each case, the top level entry masks any |
| * entries from lower file systems, so don't copy up this entry. |
| */ |
| if (dentry->d_inode || d_is_whiteout(dentry) || d_is_fallthru(dentry)) |
| goto out_dput; |
| |
| /* If the entry doesn't exist, create a fallthru entry in the topmost |
| * file system. All possible directory types are used, so each file |
| * system must implement its own way of storing a fallthru entry. |
| */ |
| fallthru: |
| err = topmost_dentry->d_inode->i_op->fallthru(topmost_dentry->d_inode, |
| dentry); |
| |
| /* It's okay if it exists, ultimate responsibility rests with |
| * ->fallthru() */ |
| if (err == -EEXIST) |
| err = 0; |
| out_dput: |
| dput(dentry); |
| out: |
| if (err) |
| uic->error = err; |
| return err; |
| } |
| |
| /** |
| * __union_copyup_one_dir - Non-recursive directory copy up |
| * |
| * Copy up the specified directory only, without recursing into the subtree |
| * rooted at this point. |
| * |
| * During the operation, where a directory entry exists in one of the lower |
| * directories, a fallthrough dentry will be created in the upper directory if |
| * the upper directory doesn't already have an entry that obscures it. At the |
| * end of the operation, the upper directory will be marked opaque on the |
| * medium - thus preventing further copy up attempts on this directory. |
| * |
| * TODO: At some point in the future, on-medium whiteouts should be culled from |
| * a directory that is marked opaque as they then serve no purpose. |
| * |
| * The primary reason for this function is that readdir() is difficult to |
| * support on union file systems for two reasons: We must eliminate duplicates |
| * and apply whiteouts, and we must return something in f_pos that lets us |
| * restart in the same place when we return. Our solution is to, on first |
| * readdir() of the directory, copy up all visible entries from the low-level |
| * file systems and mark the entries that refer to low-level file system |
| * objects as "fallthrough" entries. |
| * |
| * Sadly, this function is also necessary for rmdir(). To work out whether a |
| * directory is empty, we have to work out if there are entries in lower |
| * directories that are not obscured by whiteouts in the upper. This is not a |
| * trivial operation. The simplest way is, therefore, to copy up and then |
| * check the combined opaque directory. |
| * |
| * |
| * Locking strategy: We hold the topmost dir's i_mutex on entry. We grab the |
| * i_mutex on lower directories one by one. So the locking order is: |
| * |
| * Writable/topmost layers > Read-only/lower layers |
| * |
| * So there is no problem with lock ordering for union stacks with |
| * multiple lower layers. E.g.: |
| * |
| * (topmost) A->B->C (bottom) |
| * (topmost) D->C->B (bottom) |
| * |
| */ |
| int __union_copyup_one_dir(struct path *topmost_path) |
| { |
| struct dentry *topmost_dentry = topmost_path->dentry; |
| unsigned int i, layers = topmost_dentry->d_sb->s_union_count; |
| int error = 0; |
| |
| struct union_iterate_context uic = { |
| .ctx.actor = union_copyup_one_dirent, |
| .topmost_dentry = topmost_dentry, |
| }; |
| |
| |
| if (IS_OPAQUE(topmost_dentry->d_inode)) |
| return 0; |
| |
| if (!topmost_dentry->d_inode->i_op || |
| !topmost_dentry->d_inode->i_op->fallthru) |
| return -EOPNOTSUPP; |
| |
| for (i = 0; i < layers; i++) { |
| struct inode *inode; |
| struct file *ftmp; |
| struct path *path; |
| |
| path = union_find_dir(topmost_dentry, i); |
| if (!path->mnt) |
| continue; |
| |
| ftmp = dentry_open(path, O_RDONLY | O_DIRECTORY | O_NOATIME, |
| current_cred()); |
| if (IS_ERR(ftmp)) { |
| printk(KERN_ERR "unable to open dir %pq for " |
| "directory copyup: %ld\n", |
| &path->dentry->d_name, PTR_ERR(ftmp)); |
| error = PTR_ERR(ftmp); |
| break; |
| } |
| |
| inode = file_inode(ftmp); |
| mutex_lock(&inode->i_mutex); |
| |
| error = -ENOENT; |
| if (IS_DEADDIR(inode)) |
| goto out_fput; |
| |
| /* Read the whole directory, calling our directory entry copyup |
| * function on each entry. |
| */ |
| uic.ctx.pos = 0; |
| uic.error = 0; |
| error = ftmp->f_op->iterate(ftmp, &uic.ctx); |
| out_fput: |
| mutex_unlock(&inode->i_mutex); |
| fput(ftmp); |
| |
| if (uic.error) |
| error = uic.error; |
| if (error) |
| break; |
| |
| /* XXX Should process directories below an opaque directory in |
| * case there are fallthrus in it |
| */ |
| if (IS_OPAQUE(path->dentry->d_inode)) |
| break; |
| } |
| |
| /* Mark this dir opaque to show that we have already copied up the |
| * lower entries. Be sure to do this AFTER the directory entries have |
| * been copied up so that if we crash in the middle of copyup, we will |
| * try to copyup the dir next time we read it. |
| * |
| * XXX - Could leave directory non-opaque, and force reread/copyup of |
| * directory each time it is read in from disk. That would make it |
| * easy to update lower file systems (when not union mounted) and have |
| * the changes show up when union mounted again. |
| */ |
| if (!error) { |
| topmost_dentry->d_inode->i_flags |= S_OPAQUE; |
| mark_inode_dirty(topmost_dentry->d_inode); |
| } |
| |
| return error; |
| } |
| |
| /* Relationship between i_mode and the DT_xxx types */ |
| static inline unsigned char dt_type(struct inode *inode) |
| { |
| return (inode->i_mode >> 12) & 15; |
| } |
| |
| /** |
| * generic_readdir_fallthru - Helper to lookup target of a fallthru |
| * @topmost_dentry: dentry for the topmost dentry of the dir being read |
| * @name: name of fallthru dirent |
| * @namelen: length of @name |
| * @ino: return inode number of target, if found |
| * @d_type: return directory type of target, if found |
| * |
| * In readdir(), client file systems need to lookup the target of a |
| * fallthru in a lower layer for three reasons: (1) fill in d_ino, (2) |
| * fill in d_type, (2) make sure there is something to fall through to |
| * (and if not, don't return this dentry). Upon detecting a fallthru |
| * dentry in readdir(), the client file system should call this function. |
| * |
| * Returns 0 on success and -ENOENT if no matching directory entry was |
| * found (which can happen when the topmost file system is unmounted |
| * and remounted over a different file system than). Any other errors |
| * are unexpected. |
| */ |
| int generic_readdir_fallthru(struct dentry *topmost_dentry, const char *name, |
| int namlen, ino_t *ino, unsigned char *d_type) |
| { |
| struct path *parent; |
| struct dentry *dentry; |
| unsigned int i, layers = topmost_dentry->d_sb->s_union_count; |
| |
| BUG_ON(!mutex_is_locked(&topmost_dentry->d_inode->i_mutex)); |
| |
| for (i = 0; i < layers; i++) { |
| parent = union_find_dir(topmost_dentry, i); |
| mutex_lock(&parent->dentry->d_inode->i_mutex); |
| dentry = lookup_one_len(name, parent->dentry, namlen); |
| mutex_unlock(&parent->dentry->d_inode->i_mutex); |
| if (IS_ERR(dentry)) |
| return PTR_ERR(dentry); |
| if (dentry->d_inode) { |
| *ino = dentry->d_inode->i_ino; |
| *d_type = dt_type(dentry->d_inode); |
| dput(dentry); |
| return 0; |
| } |
| dput(dentry); |
| } |
| return -ENOENT; |
| } |
| EXPORT_SYMBOL(generic_readdir_fallthru); |
| |
| /* |
| * Get the inode and path for a dentry where that inode may exist on a lower |
| * layer in a union. |
| * |
| * The caller must preclear the elements of *_lower_cache and prime *_actual |
| * with the contents of *upper (as is done by wrappers in union.h) and must |
| * also hold parent->i_mutex. |
| * |
| * Note that we don't get a ref on the inode or the lower vfsmount (if |
| * returned). We leave it to the caller to iget/mntget them if appropriate. |
| * This should be safe as the caller holds parent->i_mutex. The lower dentry |
| * (if returned) is dget'd, however. |
| * |
| * The pointers returned in *_actual are not dget'd/mntget'd as it is assumed |
| * they're pinned by the caller's ref on upper->mnt (if set), upper->dentry; or |
| * by the fact that parent->i_mutex is locked and _lower_cache->dentry is |
| * dget'd. |
| */ |
| struct inode *__union_get_inode_locked(struct dentry *parent, |
| struct path *upper, |
| struct path *_lower_cache, |
| struct path *_actual) |
| { |
| const struct union_stack *d; |
| struct dentry *dentry = upper->dentry; |
| struct path lower; |
| unsigned i, layers = parent->d_sb->s_union_count; |
| int ret; |
| |
| pr_devel("-->%s(%pq,)\n", __func__, &dentry->d_name); |
| |
| BUG_ON(d_is_whiteout(dentry)); |
| |
| /* Check for a race with copy up. */ |
| if (likely(dentry->d_inode)) { |
| pr_devel("<--%s() = upper\n", __func__); |
| *_actual = *upper; |
| return dentry->d_inode; |
| } |
| |
| if (dentry->d_flags & DCACHE_UNION_PINNING_LOWER) { |
| pr_devel("<--%s() = fall\n", __func__); |
| smp_rmb(); |
| _actual->dentry = dentry->d_fallthru; |
| d = parent->d_union_stack; |
| for (i = 0; i < layers; i++) { |
| if (d->u_dirs[i].dentry == dentry->d_fallthru->d_parent) { |
| _lower_cache->mnt = d->u_dirs[i].mnt; |
| break; |
| } |
| } |
| if (unlikely(!_lower_cache->mnt)) |
| goto out_badcache; |
| _actual->mnt = mntget(_lower_cache->mnt); |
| return dentry->d_fallthru->d_inode; |
| } |
| |
| /* Search down through the union stack of the parent of the target for |
| * the lower dentry we're going to use. |
| */ |
| for (i = 0; i < layers; i++) { |
| /* Get the parent directory for this layer and look the target |
| * up in it. |
| */ |
| const struct path *lower_parent = union_find_dir(parent, i); |
| if (!lower_parent->mnt) |
| continue; |
| |
| mutex_lock(&lower_parent->dentry->d_inode->i_mutex); |
| lower.dentry = __lookup_hash(&dentry->d_name, |
| lower_parent->dentry, 0); |
| mutex_unlock(&lower_parent->dentry->d_inode->i_mutex); |
| if (IS_ERR(lower.dentry)) { |
| ret = PTR_ERR(lower.dentry); |
| goto out_err; |
| } |
| |
| /* A negative dentry can mean several things: a plain negative |
| * dentry is ignored and lookup continues to the next layer, |
| * but a whiteout or a non-fallthru in an opaque dir covers |
| * everything below it. |
| */ |
| if (!lower.dentry->d_inode) { |
| if (d_is_whiteout(lower.dentry)) |
| goto out_hit_barrier; |
| if (IS_OPAQUE(lower_parent->dentry->d_inode) && |
| !d_is_fallthru(lower.dentry)) |
| goto out_hit_barrier; |
| dput(lower.dentry); |
| continue; |
| } |
| |
| /* TODO: Deal with mountpoints and suchlike */ |
| lower.mnt = mntget(lower_parent->mnt); |
| goto out_found_file; |
| } |
| |
| out_enoent: |
| if (d_is_fallthru(dentry)) { |
| pr_devel("<--%s() = -ENOENT\n", __func__); |
| return ERR_PTR(-ENOENT); |
| } |
| pr_devel("<--%s() = NULL\n", __func__); |
| return NULL; |
| |
| out_hit_barrier: |
| dput(lower.dentry); |
| goto out_enoent; |
| |
| out_found_file: |
| *_actual = *_lower_cache = lower; |
| pr_devel("<--%s() = lower\n", __func__); |
| return lower.dentry->d_inode; |
| |
| out_err: |
| pr_devel("<--%s() = %d\n", __func__, ret); |
| return ERR_PTR(ret); |
| |
| out_badcache: |
| printk_ratelimited(KERN_WARNING "UNION: Bad cached fallthru (%pq/%pq)\n", |
| &parent->d_name, &upper->dentry->d_name); |
| return ERR_PTR(-EIO); |
| } |
| |
| /* |
| * Get the inode for a dentry where that inode may exist on a lower layer in a |
| * union. |
| * |
| * Note that we don't get a ref on the inode, so we may need to pin it by |
| * getting a ref on a dentry pointing to it - in which case, a pointer to that |
| * dentry will be returned in *_lower and the caller is expected to dput() the |
| * ref on it. |
| */ |
| struct inode *__union_get_inode(struct path *upper, struct path *_lower_cache, |
| struct path *_actual) |
| { |
| struct dentry *parent, *dentry = upper->dentry; |
| struct inode *inode; |
| int ret; |
| |
| pr_devel("-->%s(%pq,)\n", __func__, &dentry->d_name); |
| |
| /* We need the parent directory so that we can find the stack of lower |
| * directories in which to do lookups. Use the rename mutex to prevent |
| * rename from getting underfoot whilst we get the parent. |
| */ |
| if (mutex_lock_interruptible(&dentry->d_sb->s_vfs_rename_mutex) < 0) |
| return ERR_PTR(-EINTR); |
| |
| parent = dget_parent(dentry); |
| if (IS_OPAQUE(parent->d_inode) && !d_is_fallthru(dentry)) { |
| mutex_unlock(&dentry->d_sb->s_vfs_rename_mutex); |
| inode = NULL; |
| } else { |
| ret = mutex_lock_interruptible(&parent->d_inode->i_mutex); |
| mutex_unlock(&dentry->d_sb->s_vfs_rename_mutex); |
| if (ret < 0) { |
| inode = ERR_PTR(ret); |
| } else { |
| inode = __union_get_inode_locked(parent, upper, |
| _lower_cache, _actual); |
| mutex_unlock(&parent->d_inode->i_mutex); |
| } |
| } |
| dput(parent); |
| return inode; |
| } |
| |
| /** |
| * union_create_file |
| * @parent: path of the upper parent directory |
| * @upper: path of the negative dentry to become new file |
| * @lower: path of the source file |
| * |
| * Must already have mnt_want_write() on the mnt and the parent's i_mutex. |
| */ |
| static int union_create_file(struct path *parent, struct path *upper, |
| struct path *lower) |
| { |
| struct inode *dir = parent->dentry->d_inode; |
| int ret; |
| |
| BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex)); |
| |
| if (!dir->i_op->tmpfile) |
| return -EPERM; |
| |
| ret = dir->i_op->tmpfile(dir, upper->dentry, |
| lower->dentry->d_inode->i_mode); |
| if (ret == 0) { |
| spin_lock(&upper->dentry->d_inode->i_lock); |
| upper->dentry->d_inode->i_state |= I_LINKABLE; |
| spin_unlock(&upper->dentry->d_inode->i_lock); |
| } |
| return ret; |
| } |
| |
| /** |
| * union_create_symlink |
| * @parent: Upper parent of the symlink |
| * @upper: Path of the negative dentry to become new symlink. |
| * @lower: Path of the source symlink |
| * |
| * Must already have mnt_want_write() on the mnt and the parent's i_mutex. |
| */ |
| static int union_create_symlink(struct path *parent, struct path *upper, |
| struct path *lower) |
| { |
| struct inode *inode = lower->dentry->d_inode; |
| char *content; |
| int error; |
| |
| BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex)); |
| |
| content = kmalloc(PATH_MAX + 2, GFP_KERNEL); |
| if (!content) |
| return -ENOMEM; |
| |
| error = inode->i_op->readlink(lower->dentry, content, PATH_MAX + 1); |
| if (error < 0) |
| goto error; |
| content[error] = 0; |
| |
| error = vfs_symlink(parent->dentry->d_inode, upper->dentry, content); |
| error: |
| kfree(content); |
| return error; |
| } |
| |
| /** |
| * union_copy_up_data - Copy up len bytes of old's data to new |
| * @path: path of target file |
| * @actual: path of source file in lower layer |
| * @truncate_to: number of bytes to copy (or NULL if all) |
| */ |
| static int union_copy_up_data(struct path *path, struct path *actual, |
| const loff_t *truncate_to) |
| { |
| const struct cred *cred = current_cred(); |
| struct file *lower_file; |
| struct file *new_file; |
| loff_t filesize, offset = 0; |
| size_t len; |
| long bytes; |
| int error = 0; |
| |
| filesize = i_size_read(actual->dentry->d_inode); |
| if (truncate_to && *truncate_to < filesize) |
| filesize = *truncate_to; |
| |
| /* Check for overflow of file size */ |
| len = filesize; |
| if (len != filesize) |
| return -EFBIG; |
| |
| if (len == 0) |
| return 0; |
| |
| lower_file = dentry_open(actual, O_RDONLY, cred); |
| if (IS_ERR(lower_file)) |
| return PTR_ERR(lower_file); |
| |
| new_file = dentry_open(path, O_WRONLY, cred); |
| if (IS_ERR(new_file)) { |
| error = PTR_ERR(new_file); |
| goto out_fput; |
| } |
| |
| bytes = do_splice_direct(lower_file, &offset, new_file, len, |
| SPLICE_F_MOVE); |
| if (bytes < 0) |
| error = bytes; |
| |
| fput(new_file); |
| out_fput: |
| fput(lower_file); |
| return error; |
| } |
| |
| /* |
| * Create a temporary file. We don't want to inline this as it uses quite a |
| * lot of stack space. |
| * |
| * The caller should make sure _tmpfile->mnt is set to the upper vfsmount and |
| * that ->dentry is NULL. |
| * |
| * Note: we don't return with a ref on _tmpfile->mnt as path is holding a ref. |
| * Further, we may return with a dentry in _tmpfile->dentry that needs |
| * dput'ing, even if an error occurred. |
| */ |
| static int union_create_tmpfile(struct path *parent, struct path *path, |
| struct path *actual, struct path *_tmpfile) |
| { |
| static const struct qstr nameless = { .name = "", .len = 0, .hash = 0 }; |
| struct dentry *dentry; |
| int ret; |
| |
| pr_devel("-->%s(%pq)\n", |
| __func__, &path->dentry->d_name); |
| |
| /* Create a nameless file not directly attached to the parent |
| * directory, but still associated with it for layout optimisation |
| * reasons. The upperfs should check for the file being of zero |
| * length. |
| * |
| * We will then hard link the file into place when we're done copying |
| * up - and mount/fsck will clean it up in the event of a crash and |
| * dget() will clean it up in the event of an error. |
| */ |
| mutex_lock(&parent->dentry->d_inode->i_mutex); |
| |
| dentry = d_alloc(parent->dentry, &nameless); |
| if (!IS_ERR(dentry)) { |
| _tmpfile->dentry = dentry; |
| if (S_ISREG(actual->dentry->d_inode->i_mode)) |
| ret = union_create_file(parent, _tmpfile, actual); |
| else if (S_ISLNK(actual->dentry->d_inode->i_mode)) |
| ret = union_create_symlink(parent, _tmpfile, actual); |
| else |
| BUG(); |
| } else { |
| ret = PTR_ERR(dentry); |
| } |
| |
| mutex_unlock(&parent->dentry->d_inode->i_mutex); |
| pr_devel("<--%s() = %d\n", __func__, ret); |
| return ret; |
| } |
| |
| /** |
| * Copy up a file or symlink to a temporary file in the specially prepared |
| * directory and return the dentry of that. |
| */ |
| static int union_copy_up_to_tmpfile(struct path *parent, struct path *path, |
| struct path *actual, struct path *_tmpfile, |
| const loff_t *truncate_to) |
| { |
| struct dentry *dentry = actual->dentry; |
| int ret; |
| |
| ret = union_create_tmpfile(parent, path, actual, _tmpfile); |
| |
| if (ret == 0 && S_ISREG(dentry->d_inode->i_mode)) |
| ret = union_copy_up_data(_tmpfile, actual, truncate_to); |
| if (ret == 0) |
| ret = union_copyup_xattr(_tmpfile, actual->dentry); |
| return ret; |
| } |
| |
| /* |
| * Create a hardlink from the temporary file to the actual location. |
| */ |
| static int union_hard_link_to_tmpfile(struct path *parent, struct path *path, |
| struct path *tmpfile) |
| { |
| int ret; |
| |
| pr_devel("-->%s(%pq,%pq,%pq)\n", |
| __func__, &parent->dentry->d_name, &path->dentry->d_name, |
| &tmpfile->dentry->d_name); |
| |
| mutex_lock(&parent->dentry->d_inode->i_mutex); |
| ret = vfs_link(tmpfile->dentry, parent->dentry->d_inode, path->dentry); |
| mutex_unlock(&parent->dentry->d_inode->i_mutex); |
| return ret; |
| } |
| |
| /** |
| * union_copy_up_via_tmpfile - Copy up lower file via temporary file |
| * |
| * Copy up a file or symlink to a temporary file in the specially prepared |
| * directory, then hard link across and unlink the temp file. |
| */ |
| static int union_copy_up_via_tmpfile(struct path *parent, struct path *path, |
| struct path *actual, const loff_t *truncate_to) |
| { |
| const struct cred *saved_cred; |
| struct cred *override_cred; |
| struct path tmpfile = { .mnt = path->mnt, .dentry = NULL }; |
| int ret; |
| |
| pr_devel("-->%s(,%pq,%pq,%pq,,%lld)\n", |
| __func__, &parent->dentry->d_name, &path->dentry->d_name, |
| &actual->dentry->d_name, truncate_to ? *truncate_to : -1); |
| |
| override_cred = prepare_kernel_cred(NULL); |
| if (!override_cred) |
| return -ENOMEM; |
| |
| override_cred->fsuid = actual->dentry->d_inode->i_uid; |
| override_cred->fsgid = actual->dentry->d_inode->i_gid; |
| |
| saved_cred = override_creds(override_cred); |
| |
| ret = union_copy_up_to_tmpfile(parent, path, actual, &tmpfile, |
| truncate_to); |
| |
| if (ret == 0) |
| ret = union_hard_link_to_tmpfile(parent, path, &tmpfile); |
| |
| /* Discard the temporary dentry */ |
| dput(tmpfile.dentry); |
| |
| revert_creds(saved_cred); |
| |
| put_cred(override_cred); |
| pr_devel("<--%s() = %d\n", __func__, ret); |
| return ret; |
| } |
| |
| /* |
| * Make copy-up an exclusive operation on a file. The caller must have the |
| * parent i_mutex locked - which we will unlock during this function. |
| */ |
| static int __union_copy_up_exclusive(struct path *parent, struct path *path, |
| struct path *actual, const loff_t *truncate_to) |
| __releases(parent->dentry->d_inode->i_mutex) |
| { |
| struct dentry *upper = path->dentry; |
| int ret; |
| |
| spin_lock(&upper->d_lock); |
| if (upper->d_flags & DCACHE_UNION_COPYING_UP) { |
| /* Copy up already in progress */ |
| spin_unlock(&upper->d_lock); |
| mutex_unlock(&parent->dentry->d_inode->i_mutex); |
| pr_devel("UNION: wait on copyup\n"); |
| |
| /* Abuse the bit-wait system to get hold of a waitqueue we can |
| * use (d_flags may be smaller than an unsigned long). |
| */ |
| do { |
| wait_queue_head_t *wq = |
| bit_waitqueue(&upper->d_flags, ilog2(DCACHE_UNION_COPYING_UP)); |
| DEFINE_WAIT(__wait); |
| |
| ret = -EAGAIN; |
| for (;;) { |
| prepare_to_wait(wq, &__wait, TASK_INTERRUPTIBLE); |
| if (!(upper->d_flags & DCACHE_UNION_COPYING_UP)) |
| break; |
| if (!signal_pending(current)) { |
| schedule(); |
| continue; |
| } |
| ret = -ERESTARTSYS; |
| break; |
| } |
| finish_wait(wq, &__wait); |
| } while (0); |
| return ret; /* There might have been an error or a signal */ |
| } |
| |
| /* Commence copying up. |
| * |
| * Mark the dentry so that other potential copy-uppers will wait for us |
| * and drop the locks so that we can use splice. |
| */ |
| upper->d_flags |= DCACHE_UNION_COPYING_UP; |
| spin_unlock(&upper->d_lock); |
| mutex_unlock(&parent->dentry->d_inode->i_mutex); |
| |
| pr_devel("UNION: copyup begin\n"); |
| ret = union_copy_up_via_tmpfile(parent, path, actual, truncate_to); |
| pr_devel("UNION: copyup done\n"); |
| |
| spin_lock(&upper->d_lock); |
| upper->d_flags &= ~DCACHE_UNION_COPYING_UP; |
| spin_unlock(&upper->d_lock); |
| |
| wake_up_bit(&upper->d_flags, ilog2(DCACHE_UNION_COPYING_UP)); |
| return 0; |
| } |
| |
| /** |
| * __union_copy_up - Copy a non-directory file up to the upper layer. |
| */ |
| int __union_copy_up(struct path *path, struct path *actual, const loff_t *truncate_to) |
| { |
| struct dentry *upper = path->dentry; |
| struct path parent; |
| int ret; |
| |
| pr_devel("-->%s(%pq)\n", __func__, &path->dentry->d_name); |
| |
| /* We don't currently support copyup of special files, though in theory |
| * there's no reason we couldn't at least copy up blockdev and chrdev |
| * files. FIFO files are problematic if open. Socket files are |
| * managed by AF_UNIX and would need help from there. Directories are |
| * handled by pathwalk. |
| */ |
| if (!S_ISREG(actual->dentry->d_inode->i_mode) && |
| !S_ISLNK(actual->dentry->d_inode->i_mode)) |
| return -EACCES; |
| |
| parent.mnt = path->mnt; |
| do { |
| /* We need to get the parent directory and then we need to lock |
| * it. Use the rename mutex to prevent rename from getting |
| * underfoot whilst we do this. |
| */ |
| if (mutex_lock_interruptible(&upper->d_sb->s_vfs_rename_mutex) < 0) |
| return -EINTR; |
| |
| if (upper->d_inode) { |
| mutex_unlock(&upper->d_sb->s_vfs_rename_mutex); |
| goto already_copied_up; |
| } |
| |
| parent.dentry = dget_parent(upper); |
| BUG_ON(IS_OPAQUE(parent.dentry->d_inode) && !d_is_fallthru(upper)); |
| BUG_ON(d_is_whiteout(upper)); |
| |
| ret = mutex_lock_interruptible(&parent.dentry->d_inode->i_mutex); |
| mutex_unlock(&upper->d_sb->s_vfs_rename_mutex); |
| if (ret < 0) { |
| dput(parent.dentry); |
| goto out; |
| } |
| |
| if (upper->d_inode) |
| goto already_copied_up_unlock; |
| |
| /* Do the copy up (unlocks the parent) */ |
| ret = __union_copy_up_exclusive(&parent, path, actual, truncate_to); |
| dput(parent.dentry); |
| } while (ret == -EAGAIN); |
| |
| out: |
| pr_devel("<--%s() = %d\n", __func__, ret); |
| return ret; |
| |
| already_copied_up_unlock: |
| mutex_unlock(&parent.dentry->d_inode->i_mutex); |
| dput(parent.dentry); |
| already_copied_up: |
| pr_devel("<--%s() = 0 [already done]\n", __func__); |
| *actual = *path; |
| return 0; |
| } |
| |
| /* |
| * Copy up a file for do last. This gives us the parent, but we still |
| * need to work out the lower dentry. |
| */ |
| int __union_copy_up_for_do_last(struct path *parent, struct path *path, |
| bool will_truncate) |
| { |
| struct path lower_cache, actual; |
| struct inode *inode; |
| loff_t zero = 0; |
| int ret; |
| |
| pr_devel("-->%s(%pq)\n", __func__, &path->dentry->d_name); |
| |
| do { |
| ret = mutex_lock_interruptible(&parent->dentry->d_inode->i_mutex); |
| if (ret < 0) |
| return ret; |
| |
| /* Check to see if we raced with another copy-up or an unlink */ |
| ret = 0; |
| if (path->dentry->d_parent != parent->dentry || |
| path->dentry->d_inode) |
| goto unlock_out; |
| |
| inode = union_get_inode_locked(parent->dentry, path, |
| &lower_cache, &actual); |
| if (IS_ERR(inode)) { |
| ret = PTR_ERR(inode); |
| goto unlock_out; |
| } |
| |
| /* Do the copy up (unlocks the parent). */ |
| ret = __union_copy_up_exclusive(parent, path, &actual, |
| will_truncate ? &zero : 0); |
| path_put_maybe(&lower_cache); |
| } while (ret == -EAGAIN); |
| |
| pr_devel("<--%s() = %d [post]\n", __func__, ret); |
| return ret; |
| |
| unlock_out: |
| mutex_unlock(&parent->dentry->d_inode->i_mutex); |
| pr_devel("<--%s() = %d [pre]\n", __func__, ret); |
| return ret; |
| } |