Development
diff --git a/fs/Makefile b/fs/Makefile index 4fe6df3..65b25e3 100644 --- a/fs/Makefile +++ b/fs/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_FHANDLE) += fhandle.o obj-y += quota/ +obj-$(CONFIG_UNION_MOUNT) += union.o obj-$(CONFIG_PROC_FS) += proc/ obj-$(CONFIG_SYSFS) += sysfs/
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index ca49f47..9fea4b5 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c
@@ -391,7 +391,8 @@ path.dentry = dir; path_to_graveyard.mnt = cache->mnt; path_to_graveyard.dentry = cache->graveyard; - ret = security_path_rename(&path, rep, &path_to_graveyard, grave); + ret = security_path_rename(&path, rep, &path_to_graveyard, grave, + rep->d_inode); if (ret < 0) { cachefiles_io_error(cache, "Rename security error %d", ret); } else {
diff --git a/fs/dcache.c b/fs/dcache.c index 6e9895a..bf9071b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c
@@ -40,6 +40,7 @@ #include <linux/list_lru.h> #include "internal.h" #include "mount.h" +#include "union.h" /* * Usage: @@ -445,6 +446,7 @@ if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); + d_free_unions(dentry); /* * dentry_iput drops the locks, at which point nobody (except * transient RCU lookups) can reach this dentry. @@ -1522,6 +1524,9 @@ dentry->d_sb = sb; dentry->d_op = NULL; dentry->d_fsdata = NULL; +#ifdef CONFIG_UNION_MOUNT + dentry->d_union_stack = NULL; +#endif INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); @@ -2384,6 +2389,7 @@ } dentry->d_flags &= ~DCACHE_CANT_MOUNT; dentry_unlink_inode(dentry); + d_free_unions(dentry); fsnotify_nameremove(dentry, isdir); return; } @@ -2393,6 +2399,12 @@ spin_unlock(&dentry->d_lock); + /* Remove any associated unions. While someone still has this + * directory open (ref count > 0), we could not have deleted it unless + * it was empty, and therefore has no references to directories below + * it. So we don't need the unions. + */ + d_free_unions(dentry); fsnotify_nameremove(dentry, isdir); } EXPORT_SYMBOL(d_delete);
diff --git a/fs/inode.c b/fs/inode.c index 8377d55..bb439c0 100644 --- a/fs/inode.c +++ b/fs/inode.c
@@ -1553,6 +1553,11 @@ struct inode *inode = path->dentry->d_inode; struct timespec now; +#ifdef CONFIG_UNION_MOUNT + if (unlikely(!inode)) + return; +#endif + if (inode->i_flags & S_NOATIME) return; if (IS_NOATIME(inode))
diff --git a/fs/internal.h b/fs/internal.h index 4657424..6ab8541 100644 --- a/fs/internal.h +++ b/fs/internal.h
@@ -46,6 +46,8 @@ extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); +extern int sb_permission(struct super_block *, struct inode *, int); +extern struct dentry *__lookup_hash(struct qstr *, struct dentry *, unsigned); /* * namespace.c
diff --git a/fs/libfs.c b/fs/libfs.c index c8360bc..2f9b458 100644 --- a/fs/libfs.c +++ b/fs/libfs.c
@@ -153,6 +153,7 @@ struct list_head *p, *q = &cursor->d_u.d_child; ino_t ino; char d_type; + int err = 0; if (!dir_emit_dots(file, ctx)) return 0; @@ -172,9 +173,13 @@ spin_unlock(&next->d_lock); spin_unlock(&dentry->d_lock); if (d_is_fallthru(next)) { - /* XXX placeholder until generic_readdir_fallthru() arrives */ - ino = 1; - d_type = DT_UNKNOWN; + /* On tmpfs, should only fail with ENOMEM, EIO, etc. */ + err = generic_readdir_fallthru(file->f_path.dentry, + next->d_name.name, + next->d_name.len, + &ino, &d_type); + if (err) + return err; } else { ino = next->d_inode->i_ino; d_type = dt_type(next->d_inode);
diff --git a/fs/namei.c b/fs/namei.c index a1b97c9..d87aa8d 100644 --- a/fs/namei.c +++ b/fs/namei.c
@@ -38,6 +38,7 @@ #include "internal.h" #include "mount.h" +#include "union.h" /* [Feb-1997 T. Schoebel-Theuer] * Fundamental changes in the pathname lookup mechanisms (namei) @@ -411,7 +412,7 @@ * * Separate out file-system wide checks from inode-specific permission checks. */ -static int sb_permission(struct super_block *sb, struct inode *inode, int mask) +int sb_permission(struct super_block *sb, struct inode *inode, int mask) { if (unlikely(mask & MAY_WRITE)) { umode_t mode = inode->i_mode; @@ -585,6 +586,7 @@ static int complete_walk(struct nameidata *nd) { struct dentry *dentry = nd->path.dentry; + struct inode *inode; int status; if (nd->flags & LOOKUP_RCU) { @@ -607,6 +609,8 @@ mntput(nd->path.mnt); return -ECHILD; } + inode = d_inode_or_lower(dentry); + BUG_ON(nd->inode != inode); rcu_read_unlock(); } @@ -683,9 +687,16 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) { - struct inode *inode = link->dentry->d_inode; + struct dentry *dentry = link->dentry; + struct inode *inode; + + /* If the link was on the lower layer of a union when we started + * following it, then follow_link() must have updated link->dentry to + * point to that. + */ + inode = dentry->d_inode; if (inode->i_op->put_link) - inode->i_op->put_link(link->dentry, nd, cookie); + inode->i_op->put_link(dentry, nd, cookie); path_put(link); } @@ -718,6 +729,10 @@ /* Allowed if owner and follower match. */ inode = link->dentry->d_inode; +#ifdef CONFIG_UNION_MOUNT + if (!inode) + inode = link->dentry->d_fallthru->d_inode; +#endif if (uid_eq(current_cred()->fsuid, inode->i_uid)) return 0; @@ -785,21 +800,19 @@ */ static int may_linkat(struct path *link) { - const struct cred *cred; - struct inode *inode; + struct inode *inode = link->dentry->d_inode; if (!sysctl_protected_hardlinks) return 0; - cred = current_cred(); - inode = link->dentry->d_inode; - /* Source inode owner (or CAP_FOWNER) can hardlink all they like, * otherwise, it must be a safe source. */ - if (uid_eq(cred->fsuid, inode->i_uid) || safe_hardlink_source(inode) || - capable(CAP_FOWNER)) + if (uid_eq(current_cred()->fsuid, inode->i_uid) || + safe_hardlink_source(inode) || + capable(CAP_FOWNER)) { return 0; + } audit_log_link_denied("linkat", link); return -EPERM; @@ -827,7 +840,20 @@ touch_atime(link); nd_set_link(nd, NULL); - error = security_inode_follow_link(link->dentry, nd); +#ifdef CONFIG_UNION_MOUNT + if (unlikely(!dentry->d_inode)) { + /* If the link is on the lower layer of a union, then we need + * to save this fact so that put_link() can call the correct + * ->put_link() op if the link gets copied whilst we're using + * it. + */ + link->dentry = dentry->d_fallthru; + dput(dentry); + dentry = dget(link->dentry); + } +#endif + + error = security_inode_follow_link(dentry, nd); if (error) goto out_put_nd_path; @@ -1257,6 +1283,378 @@ } /* + * Inspect the lower layers of a potentially unioned file and appropriately + * annotate the upper dentry. Returns: + * + * (*) 0 if encountered a dir first - the union stack will be filled in, but + * will not be attached to the dentry. The caller must create the top + * dentry first and only then attach it. + * + * (*) -ENOTDIR if encountered a symlink first - and upper d_fallthru will be + * set to point to the lower symlink and DCACHE_SYMLINK will be set. + * + * (*) -ENOTDIR if we encountered any other type of file first. + * + * (*) -ENOENT if we didn't encounter anything. + * + * The caller must hold i_mutex on the parent dir. + */ +static int union_annotate_dentry(struct path *parent, struct path *path, + struct union_stack *d) +{ + struct dentry *dentry = path->dentry, *lower = NULL; + unsigned flags, i, layers = parent->dentry->d_sb->s_union_count; + int ret = -ENOENT; + + printk("UNION: -->union_annotate_dentry(%pd/%pd {%x})\n", + parent->dentry, dentry, dentry->d_flags); + + BUG_ON(dentry->d_flags & DCACHE_UNION_PINNING_LOWER); + + if (d_is_whiteout(dentry)) { + spin_lock(&dentry->d_lock); + BUG_ON(!d_is_whiteout(dentry)); + dentry->d_flags |= DCACHE_UNION_LOOKUP_DONE; + spin_unlock(&dentry->d_lock); + return -ENOENT; + } + + if (IS_OPAQUE(parent->dentry->d_inode) && !d_is_fallthru(dentry)) { + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_UNION_LOOKUP_DONE; + spin_unlock(&dentry->d_lock); + return -ENOENT; + } + + for (i = 0; i < layers; i++) { + /* Get the parent directory for this layer and lookup + * the target in it. + */ + struct path *lower_parent = union_find_dir(parent->dentry, i); + if (!lower_parent->mnt) + continue; + + mutex_lock(&lower_parent->dentry->d_inode->i_mutex); + lower = __lookup_hash(&dentry->d_name, lower_parent->dentry, 0); + mutex_unlock(&lower_parent->dentry->d_inode->i_mutex); + + if (IS_ERR(lower)) { + ret = PTR_ERR(lower); + goto error_no_dput; + } + + /* A negative dentry can mean several things: a plain negative + * dentry is ignored and lookup continues to the next layer, + * but a whiteout or a non-fallthru in an opaque dir covers + * everything below it. + */ + if (!lower->d_inode) { + if (d_is_whiteout(lower)) + goto found_blocker; + if (!d_is_fallthru(lower) && + IS_OPAQUE(lower_parent->dentry->d_inode)) + goto found_blocker; + dput(lower); + lower = NULL; + continue; + } + + /* Non-directories block everything below them. Special case: + * If we find a file below a directory (which makes no sense), + * just ignore the file and return the directory above it. + */ + if (!d_is_directory(lower)) { + if (ret != -ENOENT) + break; + goto found_nondir_first; + } + + printk("UNION: layer %u is dir\n", i); + + /* Mountpoints and automount points on a lowerfs just confuse + * everything, so refuse to handle them for the moment. + */ + if (unlikely(d_mountpoint(lower))) { + if (ret == -ENOENT) + ret = -EXDEV; + goto error_dput; + } + if (unlikely(d_managed(lower))) { + if (ret == -ENOENT) + ret = -EREMOTE; + goto error_dput; + } + + d->u_dirs[i].dentry = lower; + d->u_dirs[i].mnt = mntget(lower_parent->mnt); + lower = NULL; + ret = 0; + } + + /* We may have found a lower directory at this point. If we did, we + * don't annotate the dentry, but rather leave that to the caller to do + * when creating the upper directory. + * + * If there was nothing underneath, then annotate the top as being + * negative. + */ + if (ret == -ENOENT) { + printk("UNION: Nothing underneath\n"); + flags = DCACHE_MISS_TYPE | DCACHE_UNION_LOOKUP_DONE; + goto set_negative; + } +found_directory: + dput(lower); + return 0; + + /* We found a blocking dentry in the lower levels so we mark the top + * dentry as blocking too. The whiteout/negative type is propagated + * upwards. + */ +found_blocker: + printk("UNION: Found opaque/whiteout first\n"); + if (ret == 0) + goto found_directory; + flags = __d_entry_type(lower) | DCACHE_UNION_LOOKUP_DONE; + dput(lower); +set_negative: + spin_lock(&dentry->d_lock); + if (!(dentry->d_flags & DCACHE_UNION_LOOKUP_DONE)) { + BUG_ON(!d_is_miss(dentry)); + dentry->d_flags |= flags; + } + spin_unlock(&dentry->d_lock); + return -ENOENT; + + /* A dentry that covers a lower file of any type is flagged and given a + * reference to the underlying file to hold. We do the attachment here + * so as not to have to pass the lower dentry back to the caller. + */ +found_nondir_first: + printk("UNION: Found non-dir first\n"); + flags = __d_entry_type(lower) | DCACHE_UNION_LOOKUP_DONE; + spin_lock(&dentry->d_lock); + BUG_ON(!d_is_miss(dentry)); + if (!(dentry->d_flags & DCACHE_UNION_LOOKUP_DONE)) { + d_pin_lower(dentry, lower); + dentry->d_flags |= flags; + printk("UNION: pin lower %x\n", dentry->d_flags); + lower = NULL; + } else { + printk("UNION: lower already pinned\n"); + } + spin_unlock(&dentry->d_lock); + dput(lower); + return -ENOTDIR; + +error_dput: + dput(lower); +error_no_dput: + return ret; +} + +/** + * __union_lookup_point_locked - Look up the current dentry in lower layers under lock + * @parent: The parent of @path + * @path: Path of the target on the upper file system + * + * The caller must be holding the parent dir's i_mutex and must have locked the + * mount point for write. + */ +static int __union_lookup_point_locked(struct path *parent, struct path *path) +{ + struct union_stack *d; + struct dentry *dentry = path->dentry; + int ret; + + printk("UNION: -->__union_lookup_point_locked(%pd/%pd)\n", + parent->dentry, dentry); + + d = union_alloc_stack(path); + if (!d) + return -ENOMEM; + + ret = union_annotate_dentry(parent, path, d); + if (ret < 0) { + if (ret == -ENOTDIR) + ret = 0; + goto out_kill_stack; + } + + /* It's a directory, so it must be created on the upper level */ + printk("UNION: May need to create dir\n"); + + ret = union_create_topmost_dir(parent, path, d); + if (ret < 0) + goto out_kill_stack; + + spin_lock(&dentry->d_lock); + d_set_union_stack(dentry, d); + dentry->d_flags |= DCACHE_UNION_LOOKUP_DONE; + spin_unlock(&dentry->d_lock); + ret = 0; +out: + printk("UNION: <--__union_lookup_point_locked() = %d\n", ret); + return ret; + +out_kill_stack: + union_free(parent, d); + goto out; +} + +static int union_lookup_point_locked(struct path *parent, struct path *path) +{ + if (!IS_PATH_UNIONED(parent) || + path->dentry->d_flags & DCACHE_UNION_LOOKUP_DONE) + return 0; + + return __union_lookup_point_locked(parent, path); +} + +/** + * __union_lookup_point - Look up the current point, raising a dir to upper level + * @parent: The parent of @path + * @path: Path of the target on the upper file system + * @got_write: The caller is holding antifreeze on the upper mount. + */ +static int __union_lookup_point(struct path *parent, struct path *path, + bool got_write) +{ + struct union_stack *d; + struct dentry *dentry = path->dentry; + struct inode *dir = parent->dentry->d_inode; + int ret; + + printk("UNION: -->__union_lookup_point(%pd/%pd)\n", + parent->dentry, path->dentry); + + d = union_alloc_stack(path); + if (!d) + return -ENOMEM; + + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + if (dentry->d_flags & DCACHE_UNION_LOOKUP_DONE) { + printk("UNION: already (1)\n"); + ret = 0; + goto out_unlock_mutex; + } + + ret = union_annotate_dentry(parent, path, d); + mutex_unlock(&dir->i_mutex); + + if (ret < 0) { + if (ret == -ENOTDIR) { + printk("UNION: sym/file\n"); + ret = 0; + } + goto out; + } + + /* It's a directory, so it must be raised to the upper level. However, + * we had to drop the parent lock so that we can take the locks in the + * right order. + */ + printk("UNION: May need to raise dir\n"); + if (!got_write) { + ret = mnt_want_write(parent->mnt); + if (ret < 0) + goto out; + } + + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + + if (!(dentry->d_flags & DCACHE_UNION_LOOKUP_DONE)) { + printk("UNION: Need to raise dir\n"); + ret = union_create_topmost_dir(parent, path, d); + if (ret == 0) { + spin_lock(&dentry->d_lock); + d_set_union_stack(dentry, d); + dentry->d_flags |= DCACHE_UNION_LOOKUP_DONE; + spin_unlock(&dentry->d_lock); + d = NULL; + } + } else { + printk("UNION: already (2)\n"); + ret = 0; + } + + if (!got_write) + mnt_drop_write(parent->mnt); +out_unlock_mutex: + mutex_unlock(&dir->i_mutex); +out: + union_free(parent, d); + printk("UNION: <--__union_lookup_point() = %d\n", ret); + return ret; +} + +static int union_lookup_point(struct nameidata *nd, struct path *path, + bool got_write) +{ + if (!IS_PATH_UNIONED(&nd->path) || + path->dentry->d_flags & DCACHE_UNION_LOOKUP_DONE) + return 0; + + if (nd->flags & LOOKUP_RCU) { + printk("UNION: unlazy for union_lookup_point()\n"); + if (unlikely(unlazy_walk(nd, path->dentry))) + return -ECHILD; + } + + return __union_lookup_point(&nd->path, path, got_write); +} + +/* + * lookup_union_rcu - Handle union mounted dentries in RCU-walk mode + * @parent: The parent directory. + * @path: The point just looked up in @parent. + * @inode: The inode at @dentry (*@inode is NULL if negative dentry). + * + * Handle a dentry that represents a non-directory file or a hole/reference in + * a union mount upperfs + * + * We return true if we don't need to do anything or if we've successfully + * updated the path. If we need to drop out of RCU-walk and go to refwalk + * mode, we return false. + */ +static bool lookup_union_rcu(struct path *parent, + struct path *path, + struct inode **inode) +{ + struct dentry *dentry = path->dentry; + + /* Handle non-unionmount dentries first. */ + if (likely(!IS_PATH_UNIONED(parent))) + return true; + + printk("UNION: Dir is unioned (RCU)\n"); + + /* If it's positive then no further lookup is needed: the file or + * directory has been copied up and the user gets to play with that. + */ + if (*inode) + return true; + + /* If this dentry is a blocker, then stop here. */ + if (d_is_negative(dentry)) + return true; + + /* If we need to look below, then we should break out of RCU walk mode + * with immediate effect. There are three cases: + * + * (1) We've encountered a lower directory. This must be copied up. + * + * (2) We've encountered a symlink. Symlinks are walked in refwalk + * mode (or (3) applies if NOFOLLOW). + * + * (3) We've encountered some other type of file. This must terminate + * the pathwalk immediately, one way or another. + */ + printk("UNION: Drop out of RCU\n"); + return false; +} + +/* * This looks up the name in dcache, possibly revalidates the old dentry and * allocates a new one if not found or not valid. In the need_lookup argument * returns whether i_op->lookup is necessary. @@ -1321,7 +1719,7 @@ return dentry; } -static struct dentry *__lookup_hash(struct qstr *name, +struct dentry *__lookup_hash(struct qstr *name, struct dentry *base, unsigned int flags) { bool need_lookup; @@ -1339,8 +1737,8 @@ * small and for now I'd prefer to have fast path as straight as possible. * It _is_ time-critical. */ -static int lookup_fast(struct nameidata *nd, - struct path *path, struct inode **inode) +static noinline int lookup_fast(struct nameidata *nd, + struct path *path, struct inode **_inode) { struct vfsmount *mnt = nd->path.mnt; struct dentry *dentry, *parent = nd->path.dentry; @@ -1348,22 +1746,29 @@ int status = 1; int err; + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: --> lookup_fast(%*.*s)\n", + nd->last.len, nd->last.len, nd->last.name); + /* * Rename seqlock is not required here because in the off chance * of a false negative due to a concurrent rename, we're going to * do the non-racy lookup, below. */ if (nd->flags & LOOKUP_RCU) { - unsigned seq; + unsigned seq, pseq; dentry = __d_lookup_rcu(parent, &nd->last, &seq); - if (!dentry) + if (!dentry) { + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: __d_lookup_rcu\n"); goto unlazy; + } /* * This sequence count validates that the inode matches * the dentry name information from lookup. */ - *inode = dentry->d_inode; + *_inode = d_inode_or_lower(dentry); if (read_seqcount_retry(&dentry->d_seq, seq)) return -ECHILD; @@ -1374,7 +1779,8 @@ * The memory barrier in read_seqcount_begin of child is * enough, we can use __read_seqcount_retry here. */ - if (__read_seqcount_retry(&parent->d_seq, nd->seq)) + pseq = nd->seq; + if (__read_seqcount_retry(&parent->d_seq, pseq)) return -ECHILD; nd->seq = seq; @@ -1383,20 +1789,37 @@ if (unlikely(status <= 0)) { if (status != -ECHILD) need_reval = 0; + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: d_revalidate\n"); goto unlazy; } } path->mnt = mnt; path->dentry = dentry; - if (unlikely(!__follow_mount_rcu(nd, path, inode))) + if (unlikely(!lookup_union_rcu(&nd->path, path, _inode))) { + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: !lookup_union_rcu\n"); goto unlazy; - if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) + } + if (unlikely(!__follow_mount_rcu(nd, path, _inode))) { + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: !__follow_mount_rcu\n"); goto unlazy; + } + if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) { + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: need_automount\n"); + goto unlazy; + } return 0; unlazy: + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: unlazy\n"); if (unlazy_walk(nd, dentry)) return -ECHILD; } else { + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: !RCU\n"); dentry = __d_lookup(parent, &nd->last); } @@ -1425,15 +1848,18 @@ } if (err) nd->flags |= LOOKUP_JUMPED; - *inode = path->dentry->d_inode; + + *_inode = d_inode_or_lower(path->dentry); return 0; need_lookup: + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: need_lookup\n"); return 1; } /* Fast lookup failed, do it the slow way */ -static int lookup_slow(struct nameidata *nd, struct path *path) +static noinline int lookup_slow(struct nameidata *nd, struct path *path) { struct dentry *dentry, *parent; int err; @@ -1441,9 +1867,17 @@ parent = nd->path.dentry; BUG_ON(nd->inode != parent->d_inode); + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: --> lookup_slow(%*.*s)\n", + nd->last.len, nd->last.len, nd->last.name); + mutex_lock(&parent->d_inode->i_mutex); dentry = __lookup_hash(&nd->last, parent, nd->flags); mutex_unlock(&parent->d_inode->i_mutex); + + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: slow: __lookup_hash() = %p\n", dentry); + if (IS_ERR(dentry)) return PTR_ERR(dentry); path->mnt = nd->path.mnt; @@ -1458,7 +1892,7 @@ return 0; } -static inline int may_lookup(struct nameidata *nd) +static noinline int may_lookup(struct nameidata *nd) { if (nd->flags & LOOKUP_RCU) { int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); @@ -1505,11 +1939,16 @@ return unlikely(d_is_symlink(dentry)) ? follow : 0; } -static inline int walk_component(struct nameidata *nd, struct path *path, +static noinline int walk_component(struct nameidata *nd, struct path *path, int follow) { struct inode *inode; int err; + + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: --> walk_component(%*.*s)\n", + nd->last.len, nd->last.len, nd->last.name); + /* * "." and ".." are special - ".." especially so because it has * to be able to know about the current root directory and @@ -1526,11 +1965,30 @@ if (err < 0) goto out_err; - inode = path->dentry->d_inode; + inode = d_inode_or_lower(path->dentry); } - err = -ENOENT; - if (!inode) - goto out_path_put; + + if (IS_PATH_UNIONED(path)) { + printk("UNION: walk_comp: path->mnt UPPER%s\n", + nd->flags & LOOKUP_RCU ? " RCU" : ""); + printk("UNION: dentry %pd: %p(%p)\n", + path->dentry, path->dentry, path->dentry->d_inode); + } + + if (!inode) { + if (likely(!IS_MNT_UNION(path->mnt))) + goto enoent; + + err = union_lookup_point(nd, path, false); + if (err < 0) + goto out_path_put; + if (should_follow_link(path->dentry, follow)) + return 1; + inode = path->dentry->d_inode; + if (!inode && nd->flags & LOOKUP_PARENT) + goto enoent; + goto success; + } if (should_follow_link(path->dentry, follow)) { if (nd->flags & LOOKUP_RCU) { @@ -1539,13 +1997,15 @@ goto out_err; } } - BUG_ON(inode != path->dentry->d_inode); return 1; } +success: path_to_nameidata(path, nd); nd->inode = inode; return 0; +enoent: + err = -ENOENT; out_path_put: path_to_nameidata(path, nd); out_err: @@ -1925,6 +2385,9 @@ current->total_link_count = 0; err = link_path_walk(name, nd); + if (!err && IS_PATH_UNIONED(&nd->path)) + printk("UNION: link_path_walk returned nd->path.mnt UPPER%s\n>>>\n", + flags & LOOKUP_PARENT ? " PARENT" : ""); /* At this point we've processed all the non-terminal parts of the path * and are ready to tackle the final section. The final section may @@ -1936,16 +2399,33 @@ while (err > 0) { void *cookie; struct path link = terminal_symlink; + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: path_lookupat: may_follow_link\n"); err = may_follow_link(&link, nd); if (unlikely(err)) break; nd->flags |= LOOKUP_PARENT; + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: path_lookupat: follow_link\n"); err = follow_link(&link, nd, &cookie); if (err) break; + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: path_lookupat: lookup_last\n"); err = lookup_last(nd, &terminal_symlink); + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: path_lookupat: put_link\n"); put_link(nd, &link, cookie); } + + if (!err) { + if (!nd) + printk("UNION: path_lookupat: !nd\n"); + else if (!nd->path.mnt) + printk("UNION: path_lookupat: !nd->path.mnt\n"); + else if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: path_lookupat: nd->path.mnt UPPER\n"); + } } if (!err) @@ -2049,22 +2529,33 @@ /* * Restricted form of lookup. Doesn't follow links, single-component only, - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. + * needs parent already locked. Doesn't follow mounts. Does annotate the + * dentry for unionmount. SMP-safe. */ static int lookup_hash(struct nameidata *nd, struct path *path) { struct dentry *result; + int ret; result = __lookup_hash(&nd->last, nd->path.dentry, nd->flags); if (IS_ERR(result)) { - path->mnt = NULL; - path->dentry = NULL; - return PTR_ERR(result); + ret = PTR_ERR(result); + goto error; } + path->mnt = nd->path.mnt; path->dentry = result; + ret = union_lookup_point_locked(&nd->path, path); + if (ret) + goto error_dput; return 0; + +error_dput: + dput(path->dentry); +error: + path->mnt = NULL; + path->dentry = NULL; + return ret; } /** @@ -2401,12 +2892,15 @@ */ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) { - struct inode *inode = victim->d_inode; + struct inode *inode = d_inode_or_lower(victim); int error; if (d_is_negative(victim)) return -ENOENT; - BUG_ON(!inode); + if (!inode) { + pr_err("### DENTRY %pd {%x}\n", victim, victim->d_flags); + BUG(); + } audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); @@ -2513,10 +3007,8 @@ return error; } -static int may_open(struct path *path, int acc_mode, int flag) +static int may_open(struct path *path, struct inode *inode, int acc_mode, int flag) { - struct dentry *dentry = path->dentry; - struct inode *inode = dentry->d_inode; int error; /* O_PATH? */ @@ -2537,7 +3029,7 @@ case S_IFCHR: if (path->mnt->mnt_flags & MNT_NODEV) return -EACCES; - /*FALLTHRU*/ + /* fallthrough */ case S_IFIFO: case S_IFSOCK: flag &= ~O_TRUNC; @@ -2637,6 +3129,10 @@ BUG_ON(dentry->d_inode); + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: --> atomic_open(%*.*s)\n", + nd->last.len, nd->last.len, nd->last.name); + /* Don't create child dentry for a dead directory. */ if (unlikely(IS_DEADDIR(dir))) { error = -ENOENT; @@ -2738,7 +3234,7 @@ fsnotify_create(dir, dentry); acc_mode = MAY_OPEN; } - error = may_open(&file->f_path, acc_mode, open_flag); + error = may_open(&file->f_path, file->f_inode, acc_mode, open_flag); if (error) fput(file); @@ -2791,6 +3287,10 @@ * * FILE_CREATE will be set in @*opened if the dentry was created and will be * cleared otherwise prior to returning. + * + * If an entry on a union mount is being considered, we pass back a file from + * the lower layer if there is one and leave it up to do_last() to copy up if + * need be. */ static int lookup_open(struct nameidata *nd, struct path *path, struct file *file, @@ -2803,24 +3303,43 @@ int error; bool need_lookup; + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: --> lookup_open(%*.*s)\n", + nd->last.len, nd->last.len, nd->last.name); + *opened &= ~FILE_CREATED; dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup); if (IS_ERR(dentry)) return PTR_ERR(dentry); + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: lookup_dcache() = %p [%p]\n", + dentry, dentry ? dentry->d_inode : NULL); + /* Cached positive dentry: will open in f_op->open */ if (!need_lookup && dentry->d_inode) goto out_no_open; - if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { + /* Perform an atomic open if that is available - but not if a file on + * the upper filesystem of a union is being opened for writing + */ + if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open && + !(IS_MNT_UNION(nd->path.mnt) && + op->acc_mode & (MAY_WRITE | MAY_APPEND))) { + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: open atomic\n"); return atomic_open(nd, dentry, path, file, op, got_write, need_lookup, opened); } + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: don't open atomic\n"); if (need_lookup) { BUG_ON(dentry->d_inode); dentry = lookup_real(dir_inode, dentry, nd->flags); + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: lookup_real() = %p\n", dentry); if (IS_ERR(dentry)) return PTR_ERR(dentry); } @@ -2830,8 +3349,8 @@ umode_t mode = op->mode; if (!IS_POSIXACL(dir->d_inode)) mode &= ~current_umask(); - /* - * This write is needed to ensure that a + + /* This write is needed to ensure that a * rw->ro transition does not occur between * the time when the file is created and when * a permanent write count is taken through @@ -2841,16 +3360,81 @@ error = -EROFS; goto out_dput; } + + /* If the negative dentry is on the upper layer of a union + * mount then we may need to copy up or turn a whiteout into a + * file. The negative dentry will not be on a lower layer at + * this point. + * + * If the dentry is a whiteout or a normal negative dentry in + * an opaque directory then we can just create over it. + * + * If O_CREAT|O_TRUNC|O_EXCL is specified then we fail if + * there's a file in the lower layer or succeed without copying + * up otherwise. + * + * If O_CREAT|O_TRUNC is specified then we need to copy up the + * attributes if there's a lower file. + * + * If O_CREAT|O_RDONLY is specified and the file exists in the + * lower layer, we just use the lower file. + * + * Otherwise we need to copy up the whole file. + */ + if (IS_PATH_UNIONED(&nd->path)) { + struct path tmp = { + .mnt = nd->path.mnt, + .dentry = dentry, + }; + + printk("UNION: deal with O_CREAT\n"); + + error = union_lookup_point_locked(&nd->path, &tmp); + if (error == -ENOENT) + goto just_create; + if (error < 0) + goto out_dput; + if (d_is_directory(dentry)) { + error = -EISDIR; + if (op->open_flag & O_EXCL) + error = -EEXIST; + goto out_dput; + } + + if (d_is_symlink(dentry)) + goto out_no_open; + if (d_is_negative(dentry)) { + printk("UNION: lower blocked\n"); + goto just_create; /* Lower is blocked off */ + } + + printk("UNION: deal with O_CREAT\n"); + if (d_is_pinning_lower(dentry)) { + BUG_ON(!d_get_fallthru(dentry)->d_inode); + printk("UNION: lower available (O_CREAT ignored)\n"); + goto out_no_open; + } + + printk("UNION: create over lower\n"); + } + + just_create: *opened |= FILE_CREATED; error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) goto out_dput; error = vfs_create(dir->d_inode, dentry, mode, nd->flags & LOOKUP_EXCL); + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: vfs_create() = %d [%pd: %p]\n", + error, dentry, dentry); if (error) goto out_dput; } + out_no_open: + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: out_no_open\n"); path->dentry = dentry; path->mnt = nd->path.mnt; return 1; @@ -2878,6 +3462,9 @@ bool retried = false; int error; + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: --> do_last()\n"); + nd->flags &= ~LOOKUP_PARENT; nd->flags |= op->intent; @@ -2895,8 +3482,11 @@ symlink_ok = true; /* we _can_ be in RCU mode here */ error = lookup_fast(nd, path, &inode); - if (likely(!error)) + if (likely(!error)) { + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: --> do_last: goto finish_lookup\n"); goto finish_lookup; + } if (error < 0) goto out; @@ -2923,18 +3513,24 @@ retry_lookup: if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { error = mnt_want_write(nd->path.mnt); - if (!error) + if (!error) { + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: got_write = true\n"); got_write = true; + } /* * do _not_ fail yet - we might not need that or fail with * a different error; let lookup_open() decide; we'll be * dropping this one anyway. */ } + mutex_lock(&dir->d_inode->i_mutex); error = lookup_open(nd, path, file, op, got_write, opened); mutex_unlock(&dir->d_inode->i_mutex); + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: lookup_open() = %d\n", error); if (error <= 0) { if (error) goto out; @@ -2947,12 +3543,16 @@ goto opened; } + /* At this point, the file may have been looked up and created or + * truncated but hasn't been opened yet - however, since we dropped the + * lock, things may have changed in the filesystem. + */ if (*opened & FILE_CREATED) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; will_truncate = false; acc_mode = MAY_OPEN; - inode = path->dentry->d_inode; + inode = d_inode_or_lower(path->dentry); path_to_nameidata(path, nd); goto finish_open_created; } @@ -2962,7 +3562,7 @@ */ if (d_is_positive(path->dentry)) { audit_inode(name, path->dentry, 0); - inode = path->dentry->d_inode; + inode = d_inode_or_lower(path->dentry); } /* @@ -2976,40 +3576,138 @@ } error = -EEXIST; - if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) + if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) { + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: created, but O_EXCL\n"); goto exit_dput; + } + + if (IS_PATH_UNIONED(path) && + d_is_pinning_lower(path->dentry) && + d_managed(d_get_fallthru(path->dentry))) { + error = -EREMOTE; + goto exit_dput; + } error = follow_managed(path, nd->flags); - if (error < 0) + if (error < 0) { + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: follow_managed() = %d\n", error); goto exit_dput; + } if (error) nd->flags |= LOOKUP_JUMPED; BUG_ON(nd->flags & LOOKUP_RCU); - inode = path->dentry->d_inode; + inode = d_inode_or_lower(path->dentry); finish_lookup: + if (IS_MNT_UNION(path->mnt)) + printk("UNION: do_last: finish_lookup: at upper\n"); + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: do_last: nd->path.mnt at upper\n"); /* we _can_ be in RCU mode here */ - error = -ENOENT; if (d_is_negative(path->dentry)) { - path_to_nameidata(path, nd); - goto out; + if (likely(!IS_PATH_UNIONED(&nd->path))) + goto noent; + + printk("UNION: %pd: d_flags = %x\n", + path->dentry, path->dentry->d_flags); + + error = union_lookup_point(nd, path, got_write); + if (error) + goto exit_dput; + + if (d_is_negative(path->dentry)) + goto noent; + + inode = d_inode_or_lower(path->dentry); + if (!inode) + goto noent; + + printk("UNION: got lower from d_fallthru\n"); } if (should_follow_link(path->dentry, !symlink_ok)) { + /* The dentry is either a symlink on this fs or it's a + * fallthrough to a symlink in a lower fs (in which case inode + * will be NULL). + */ + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: should_follow_link() -> true\n"); if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { error = -ECHILD; goto out; } } - BUG_ON(inode != path->dentry->d_inode); + if (got_write) + mnt_drop_write(nd->path.mnt); return 1; } + if (IS_PATH_UNIONED(&nd->path) && + !path->dentry->d_inode && + (op->acc_mode & (MAY_WRITE | MAY_APPEND) || + op->open_flag & O_TRUNC) && + S_ISREG(inode->i_mode)) { + printk("UNION: WWWWW Need to copy up\n"); + + if (nd->flags & LOOKUP_RCU && + unlikely(unlazy_walk(nd, path->dentry))) { + path_to_nameidata(path, nd); + error = -ECHILD; + goto out; + } + + if (op->open_flag & O_DIRECTORY) { + error = -ENOTDIR; + goto exit_dput; + } + + /* Like inode_permission(), but inode->i_sb != dentry->d_sb */ + error = sb_permission(path->dentry->d_sb, inode, MAY_WRITE); + if (error < 0) + goto exit_dput; + error = __inode_permission(inode, MAY_WRITE); + if (error < 0) + goto exit_dput; + + error = mnt_want_write(nd->path.mnt); + if (error) + goto exit_dput; + + error = union_copy_up_for_do_last(&nd->path, path, will_truncate); + mnt_drop_write(nd->path.mnt); + if (error) + goto exit_dput; + + if (path->mnt != nd->path.mnt) + printk("UNION: !!! mnt not changed by copyup\n"); + + printk("UNION: copied up lower\n"); + BUG_ON(path->mnt != nd->path.mnt); + + inode = path->dentry->d_inode; + if (!inode) + goto noent; + + open_flag &= ~O_TRUNC; + will_truncate = false; + acc_mode = MAY_OPEN; + } + if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { + if (IS_PATH_UNIONED(&nd->path)) { + if (nd->flags & LOOKUP_RCU) + printk("UNION: in rcu mode\n"); + if (nd->path.mnt != path->mnt) + printk("UNION: nd->path.mnt != path->mnt\n"); + } path_to_nameidata(path, nd); } else { + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: not in rcu mode\n"); save_parent.dentry = nd->path.dentry; save_parent.mnt = mntget(path->mnt); nd->path.dentry = path->dentry; @@ -3018,7 +3716,11 @@ nd->inode = inode; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ finish_open: + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: --> complete_walk()\n"); error = complete_walk(nd); + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: <-- complete_walk() = %d\n", error); if (error) { path_put(&save_parent); return error; @@ -3030,23 +3732,30 @@ (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry))) goto out; error = -ENOTDIR; - if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry)) + if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry)) { + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: !can_lookup\n"); goto out; + } if (!S_ISREG(inode->i_mode)) will_truncate = false; - if (will_truncate) { + if (will_truncate && !got_write) { error = mnt_want_write(nd->path.mnt); if (error) goto out; got_write = true; } finish_open_created: - error = may_open(&nd->path, acc_mode, open_flag); + error = may_open(&nd->path, inode, acc_mode, open_flag); + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: <-- may_open() = %d\n", error); if (error) goto out; file->f_path.mnt = nd->path.mnt; error = finish_open(file, nd->path.dentry, inode, NULL, opened); + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: <-- finish_open() = %d\n", error); if (error) { if (error == -EOPENSTALE) goto stale_open; @@ -3072,6 +3781,13 @@ terminate_walk(nd); return error; +noent: + if (IS_PATH_UNIONED(&nd->path)) + printk("UNION: %pd: ENOENT\n", path->dentry); + path_to_nameidata(path, nd); + error = -ENOENT; + goto out; + exit_dput: path_put_conditional(path, nd); goto out; @@ -3136,7 +3852,7 @@ if (error) goto out2; audit_inode(pathname, nd->path.dentry, 0); - error = may_open(&nd->path, op->acc_mode, op->open_flag); + error = may_open(&nd->path, nd->path.dentry->d_inode, op->acc_mode, op->open_flag); if (error) goto out2; file->f_path.mnt = nd->path.mnt; @@ -3327,6 +4043,7 @@ } *path = nd.path; return new_path.dentry; + fail: dput(new_path.dentry); unlock: @@ -3472,8 +4189,17 @@ return -EMLINK; error = dir->i_op->mkdir(dir, dentry, mode); - if (!error) - fsnotify_mkdir(dir, dentry); + if (error) + return error; + + /* XXX racy - crash now and dir isn't opaque */ + if (IS_DIR_UNIONED(dentry->d_parent)) { + dentry->d_inode->i_flags |= S_OPAQUE; + mark_inode_dirty(dentry->d_inode); + } + + fsnotify_mkdir(dir, dentry); + return error; } @@ -3507,6 +4233,145 @@ return sys_mkdirat(AT_FDCWD, pathname, mode); } +/** + * vfs_whiteout: Create a whiteout for the given directory entry + * @parent: Parent directory + * @old_path: Directory entry to whiteout + * @isdir: The file at @old_path is a directory + * + * Create a whiteout for the given directory entry. A whiteout prevents lookup + * from dropping down to a lower layer of a union mounted file system. + * + * There are two important cases: (a) The directory entry to be whited out may + * already exist, in which case it must first be deleted before we create the + * whiteout, and (b) no such directory entry exists and we only have to create + * the whiteout itself. + * + * The caller must pass in a dentry for the directory entry to be whited out - + * a positive one if it exists, and a negative if not. When this function + * returns, the caller should dput() the old, now defunct dentry it passed in. + * The dentry for the whiteout itself is created inside this function. + * + * The caller must hold the i_mutex lock on the parent directory. + */ +static int vfs_whiteout(struct dentry *parent, struct path *old_path, int isdir) +{ + struct dentry *old_dentry = old_path->dentry; + struct inode *dir = parent->d_inode, *old_inode = old_dentry->d_inode; + int err = 0; + + BUG_ON(old_dentry->d_parent != parent); + + if (!dir->i_op || !dir->i_op->whiteout) + return -EOPNOTSUPP; + + /* If the old dentry is positive, then we have to delete this entry + * before we create the whiteout. The file system ->whiteout() op does + * the actual delete, but we do all the VFS-level checks and changes + * here. + */ + if (old_inode) { + mutex_lock(&old_inode->i_mutex); + if (d_mountpoint(old_dentry)) { + mutex_unlock(&old_inode->i_mutex); + return -EBUSY; + } + if (isdir) + err = security_inode_rmdir(dir, old_dentry); + else + err = security_inode_unlink(dir, old_dentry); + if (err) + goto error_unlock; + + /* If we're removing a directory, we need to work out if it is + * empty - but if the directory has not yet been copied up, we + * cannot tell that by simply reading the lower dirs. We have + * to subtract the set of whiteouts in the top dir from the + * union of the sets of dirents from the lower dirs - ie. do a + * copyup. + */ + if (isdir) { + err = union_copy_up_dir(old_path); + if (err) + goto error_unlock; + } + } + + err = dir->i_op->whiteout(dir, old_dentry); + if (err) + goto error_unlock; + + if (old_inode) { + mutex_unlock(&old_inode->i_mutex); + if (isdir) { + old_inode->i_flags |= S_DEAD; + dont_mount(old_dentry); + } else { + fsnotify_link_count(old_inode); + } + d_drop(old_dentry); + } + return err; + +error_unlock: + if (old_inode) + mutex_unlock(&old_inode->i_mutex); + return err; +} + +static int do_whiteout(struct nameidata *nd, struct path *path, int isdir) +{ + struct path safe = nd->path; + struct dentry *dentry = path->dentry; + int err; + + path_get(&safe); + + err = may_delete(nd->path.dentry->d_inode, dentry, isdir); + if (err) + goto out; + + err = vfs_whiteout(nd->path.dentry, path, isdir); + +out: + path_put(&safe); + return err; +} + +/* + * Create a whiteout to finish off a rename from a unionmounted directory. + * This prevents any file of the same name in the lowerfs from showing through. + */ +static int vfs_whiteout_after_rename(struct dentry *parent, + const struct qstr *name) +{ + struct inode *dir = parent->d_inode; + struct dentry *dummy; + int err; + + if (!dir->i_op || !dir->i_op->whiteout) + return -EOPNOTSUPP; + + /* Rename moved the old dentry somewhere else, so there can't be one + * here now (the caller's locks see to that) and so there's no need to + * call lookup, especially as the ->whiteout() op is expected to add + * the new dentry into the tree. + */ + dummy = d_alloc(parent, name); + if (!dummy) + return -ENOMEM; + + /* I think it's okay to pass the new whiteout as the old dentry here. + * What it seems to want is the name, the parent dentry and the inode. + * However, we know the inode no longer resides there and d_inode will + * be NULL. + */ + err = dir->i_op->whiteout(dir, dummy); + + dput(dummy); + return err; +} + /* * The dentry_unhash() helper will try to drop the dentry early: we * should have a usage count of 1 if we're the only user of this @@ -3601,14 +4466,13 @@ error = lookup_hash(&nd, &path); if (error) goto exit2; - if (!path.dentry->d_inode) { - error = -ENOENT; - goto exit3; - } error = security_path_rmdir(&nd.path, path.dentry); if (error) goto exit3; - error = vfs_rmdir(nd.path.dentry->d_inode, path.dentry); + if (IS_DIR_UNIONED(nd.path.dentry)) + error = do_whiteout(&nd, &path, 1); + else + error = vfs_rmdir(nd.path.dentry->d_inode, path.dentry); exit3: path_put_conditional(&path, &nd); exit2: @@ -3699,6 +4563,7 @@ struct inode *inode = NULL; struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0; + retry: name = user_path_parent(dfd, pathname, &nd, lookup_flags); if (IS_ERR(name)) @@ -3715,22 +4580,47 @@ retry_deleg: mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); error = lookup_hash(&nd, &path); - if (!error) { - /* Why not before? Because we want correct error value */ - if (nd.last.name[nd.last.len]) - goto slashes; + if (error) + goto exit2; + + /* Why not before? Because we want correct error value */ + if (d_is_negative(path.dentry)) { + if (IS_PATH_UNIONED(&nd.path)) + printk("UNION: unlink neg\n"); + BUG_ON(path.dentry->d_inode); + error = -ENOENT; + } else if (nd.last.name[nd.last.len]) { + if (IS_PATH_UNIONED(&nd.path)) + printk("UNION: unlink slash: %x\n", path.dentry->d_flags); + BUG_ON(!(path.dentry->d_flags & DCACHE_UNION_PINNING_LOWER) && + !path.dentry->d_inode); + error = d_is_directory(path.dentry) ? -EISDIR : -ENOTDIR; + } else if (!path.dentry->d_inode) { + if (IS_PATH_UNIONED(&nd.path)) + printk("UNION: unlink lower\n"); + error = security_path_unlink(&nd.path, path.dentry); + if (!error) { + if (IS_PATH_UNIONED(&nd.path)) + printk("UNION: call do_whiteout()\n"); + error = do_whiteout(&nd, &path, 0); + } + } else { + if (IS_PATH_UNIONED(&nd.path)) + printk("UNION: unlink upper\n"); inode = path.dentry->d_inode; - if (d_is_negative(path.dentry)) - goto slashes; ihold(inode); error = security_path_unlink(&nd.path, path.dentry); - if (error) - goto exit2; - error = vfs_unlink(nd.path.dentry->d_inode, path.dentry, - &delegated_inode); -exit2: - path_put_conditional(&path, &nd); + if (!error) { + if (IS_DIR_UNIONED(nd.path.dentry)) + error = do_whiteout(&nd, &path, 0); + else + error = vfs_unlink(nd.path.dentry->d_inode, path.dentry, + &delegated_inode); + } } + + path_put_conditional(&path, &nd); +exit2: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); if (inode) iput(inode); /* truncate the inode here */ @@ -3750,15 +4640,6 @@ goto retry; } return error; - -slashes: - if (d_is_negative(path.dentry)) - error = -ENOENT; - else if (d_is_directory(path.dentry) || d_is_autodir(path.dentry)) - error = -EISDIR; - else - error = -ENOTDIR; - goto exit2; } SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag) @@ -3918,8 +4799,8 @@ int, newdfd, const char __user *, newname, int, flags) { struct dentry *new_dentry; - struct path old_path, new_path; - struct inode *delegated_inode = NULL; + struct path old_path, new_path, lower_cache, actual; + struct inode *inode, *delegated_inode = NULL; int how = 0; int error; @@ -3938,11 +4819,22 @@ if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; + retry: error = user_path_at(olddfd, oldname, how, &old_path); if (error) return error; + inode = union_get_inode(&old_path, &lower_cache, &actual); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + goto out; + } + error = union_copy_up(&old_path, &actual); + path_put_maybe(&lower_cache); + if (error < 0) + goto out; + new_dentry = user_path_create(newdfd, newname, &new_path, (how & LOOKUP_REVAL)); error = PTR_ERR(new_dentry); @@ -4168,7 +5060,8 @@ int, newdfd, const char __user *, newname) { struct dentry *old_dir, *new_dir; - struct path old, new; + struct inode *old_inode; + struct path old, new, old_lower_cache, old_actual; struct dentry *trap; struct nameidata oldnd, newnd; struct inode *delegated_inode = NULL; @@ -4193,7 +5086,6 @@ error = -EXDEV; if (oldnd.path.mnt != newnd.path.mnt) goto exit2; - old_dir = oldnd.path.dentry; error = -EBUSY; if (oldnd.last_type != LAST_NORM) @@ -4218,6 +5110,7 @@ if (error) goto exit3; /* source must exist */ + old_inode = d_inode_or_lower(old.dentry); error = -ENOENT; if (d_is_negative(old.dentry)) goto exit4; @@ -4233,6 +5126,11 @@ error = -EINVAL; if (old.dentry == trap) goto exit4; + error = -EXDEV; + /* Can't rename a directory from a lower layer */ + if (IS_DIR_UNIONED(oldnd.path.dentry) && + IS_DIR_UNIONED(old.dentry)) + goto exit4; error = lookup_hash(&newnd, &new); if (error) goto exit4; @@ -4240,17 +5138,44 @@ error = -ENOTEMPTY; if (new.dentry == trap) goto exit5; + error = -EXDEV; + /* Can't rename over directories on the lower layer */ + if (IS_DIR_UNIONED(newnd.path.dentry) && + IS_DIR_UNIONED(new.dentry)) + goto exit5; error = security_path_rename(&oldnd.path, old.dentry, - &newnd.path, new.dentry); + &newnd.path, new.dentry, + old_inode); if (error) goto exit5; + + error = union_copy_up_locked(&oldnd.path, &old, &old_actual); + if (error) + goto exit5; + error = vfs_rename(old_dir->d_inode, old.dentry, new_dir->d_inode, new.dentry, &delegated_inode); + if (error) + goto exit5; + + /* Now whiteout the source. We may have exposed a positive lower level + * dentry, so we have to make sure it doesn't get resurrected. We + * could probe the lower levels at this point to find out whether there + * is actually anything that needs whiting out. + * + * Note that if this fails, it may leave the lower dentry exposed, and + * we may not be able to recover by simply renaming back (say we + * encountered ENOMEM or ENOSPC conditions). + */ + if (IS_DIR_UNIONED(oldnd.path.dentry)) + error = vfs_whiteout_after_rename(old_dir, &oldnd.last); + exit5: path_put_conditional(&new, &newnd); exit4: + path_put_maybe(&old_lower_cache); path_put_conditional(&old, &oldnd); exit3: unlock_rename(new_dir, old_dir); @@ -4311,6 +5236,7 @@ int res; nd.depth = 0; + dentry = d_dentry_or_lower(dentry); cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); if (IS_ERR(cookie)) return PTR_ERR(cookie);
diff --git a/fs/namespace.c b/fs/namespace.c index 6a147ab..e1a6a16 100644 --- a/fs/namespace.c +++ b/fs/namespace.c
@@ -25,6 +25,7 @@ #include <linux/magic.h> #include "pnode.h" #include "internal.h" +#include "union.h" #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) #define HASH_SIZE (1UL << HASH_SHIFT) @@ -1403,10 +1404,9 @@ #endif -static bool is_mnt_ns_file(struct dentry *dentry) +static bool is_mnt_ns_file(struct inode *inode) { /* Is this a proxy for a mount namespace? */ - struct inode *inode = dentry->d_inode; struct proc_ns *ei; if (!proc_ns_inode(inode)) @@ -1419,16 +1419,16 @@ return true; } -static bool mnt_ns_loop(struct dentry *dentry) +static bool mnt_ns_loop(struct inode *inode) { /* Could bind mounting the mount namespace inode cause a * mount namespace loop? */ struct mnt_namespace *mnt_ns; - if (!is_mnt_ns_file(dentry)) + if (!is_mnt_ns_file(inode)) return false; - mnt_ns = get_proc_ns(dentry->d_inode)->ns; + mnt_ns = get_proc_ns(inode)->ns; return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; } @@ -1440,7 +1440,7 @@ if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt)) return ERR_PTR(-EINVAL); - if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry)) + if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry->d_inode)) return ERR_PTR(-EINVAL); res = q = clone_mnt(mnt, dentry, flag); @@ -1463,7 +1463,7 @@ continue; } if (!(flag & CL_COPY_MNT_NS_FILE) && - is_mnt_ns_file(s->mnt.mnt_root)) { + is_mnt_ns_file(s->mnt.mnt_root->d_inode)) { s = skip_mnt_tree(s); continue; } @@ -1557,6 +1557,202 @@ return 0; } +/** + * check_topmost_union_mnt - mount-time checks for union mount + * @topmost_mnt: vfsmount of the topmost union filed system + * @mnt_flags: mount flags for the topmost mount + * + * Our readdir() solution of copying up directory entries requires + * that the topmost layer be writeable and support whiteouts and + * fallthrus. The topmost file system can't be mounted elsewhere + * because it's Too Hard(tm). + */ +static int check_topmost_union_mnt(struct mount *topmost_mnt, int mnt_flags) +{ +#ifndef CONFIG_UNION_MOUNT + printk(KERN_INFO "union mount: not supported by the kernel\n"); + return -EINVAL; +#else + struct super_block *sb = topmost_mnt->mnt.mnt_sb; + + if (mnt_flags & MNT_READONLY) + return -EROFS; + + if (atomic_read(&sb->s_active) != 1) { + printk(KERN_INFO "union mount: topmost fs mounted elsewhere\n"); + return -EBUSY; + } + + if (!(sb->s_flags & MS_WHITEOUT)) { + printk(KERN_INFO "union mount: whiteouts not supported by fs\n"); + return -EINVAL; + } + + if (!(sb->s_flags & MS_FALLTHRU)) { + printk(KERN_INFO "union mount: fallthrus not supported by fs\n"); + return -EINVAL; + } + + return 0; +#endif +} + +void put_union_sb(struct super_block *sb) +{ + if (unlikely(sb->s_union_lower_mnts)) { + drop_collected_mounts(sb->s_union_lower_mnts); + sb->s_union_lower_mnts = NULL; + sb->s_union_count = 0; + } +} + +/** + * clone_union_tree - Clone all union-able mounts at this mountpoint + * @topmost: vfsmount of topmost layer + * @mntpnt: target of union mount + * + * Given the target mountpoint of a union mount, clone all the mounts at that + * mountpoint (well, pathname) that qualify as a union lower layer. Increment + * the hard readonly count of the lower layer superblocks. + * + * Returns error if any of the mounts or submounts mounted on or below this + * pathname are unsuitable for union mounting. This means you can't construct + * a union mount at the root of an existing mount without unioning it. + * + * XXX - Maybe should take # of layers to go down as an argument. But how to + * pass this in through mount options? All solutions look ugly. Currently you + * express your intention through mounting file systems on the same mountpoint, + * which is pretty elegant. + */ +static int clone_union_tree(struct mount *topmost, struct path *mntpnt) +{ + struct mount *mnt, *cloned_tree; + + if (!IS_ROOT(mntpnt->dentry)) { + printk(KERN_INFO "union mount: mount point must be a root dir\n"); + return -EINVAL; + } + + /* Look for the "lowest" layer to union. */ + mnt = real_mount(mntpnt->mnt); + while (mnt->mnt_parent->mnt.mnt_root == mnt->mnt_mountpoint) { + /* Got root (mnt)? */ + if (mnt->mnt_parent == mnt) + break; + mnt = mnt->mnt_parent; + } + + /* Clone all the read-only mounts and submounts, only if they + * are not shared or slave, and increment the hard read-only + * users count on each one. If this can't be done for every + * mount and submount below this one, fail. + */ + cloned_tree = copy_tree(mnt, mnt->mnt.mnt_root, + CL_COPY_ALL | CL_PRIVATE | + CL_NO_SHARED | CL_NO_SLAVE | + CL_MAKE_HARD_READONLY); + if (IS_ERR(cloned_tree)) + return PTR_ERR(cloned_tree); + + topmost->mnt.mnt_sb->s_union_lower_mnts = &cloned_tree->mnt; + return 0; +} + +/** + * build_root_union - Create the union stack for the root dir + * @topmost_mnt - vfsmount of topmost mount + * + * Build the union stack for the root dir. Annoyingly, we have to traverse + * union "up" from the root of the cloned tree to find the topmost read-only + * mount, and then traverse back "down" to build the stack. + */ +static int build_root_union(struct mount *topmost_mnt) +{ + struct mount *mnt, *topmost_ro_mnt; + struct path lower, topmost_path; + unsigned int i, layers = 1; + int err = 0; + + /* Find the topmost read-only mount */ + topmost_ro_mnt = real_mount(topmost_mnt->mnt.mnt_sb->s_union_lower_mnts); + for (mnt = topmost_ro_mnt; mnt; mnt = next_mnt(mnt, topmost_ro_mnt)) { + if (mnt->mnt_parent == topmost_ro_mnt && + mnt->mnt_mountpoint == topmost_ro_mnt->mnt.mnt_root) { + topmost_ro_mnt = mnt; + layers++; + } + } + topmost_mnt->mnt.mnt_sb->s_union_count = layers; + + // SHOULD USE collect_mounts() here rather than merely mntgetting + + /* Build the root dir's union stack from the top down */ + topmost_path.mnt = &topmost_mnt->mnt; + topmost_path.dentry = topmost_mnt->mnt.mnt_root; + mnt = topmost_ro_mnt; + for (i = 0; i < layers; i++) { + lower.mnt = mntget(&mnt->mnt); // !!!!!!!!!! TODO: FIX + lower.dentry = dget(mnt->mnt.mnt_root); + err = union_add_dir(&topmost_path, &lower, i); + if (err) + goto out; + mnt = mnt->mnt_parent; + } + return 0; + +out: + d_free_unions(topmost_path.dentry); + topmost_mnt->mnt.mnt_sb->s_union_count = 0; + return err; +} + +/** + * prepare_mnt_union - do setup necessary for a union mount + * @topmost_mnt: vfsmount of topmost layer + * @mntpnt: path of requested mountpoint + * + * We union every underlying file system that is mounted on the same mountpoint + * (well, pathname), read-only, and not shared. If we get at least one layer, + * we don't return an error, although we will complain in the kernel log if we + * hit a mount that can't be unioned. + * + * Caller needs namespace_sem, but can't have vfsmount_lock. + */ +static int prepare_mnt_union(struct mount *topmost_mnt, struct path *mntpnt) +{ + int err; + + if (d_unlinked(mntpnt->dentry)) + return -ENOENT; + + printk("UNION: prepare\n"); + + err = check_topmost_union_mnt(topmost_mnt, topmost_mnt->mnt.mnt_flags); + if (err) + return err; + + err = clone_union_tree(topmost_mnt, mntpnt); + if (err) + return err; + + err = build_root_union(topmost_mnt); + if (err) + goto out; + + printk("UNION: prepared\n"); + return 0; + +out: + put_union_sb(topmost_mnt->mnt.mnt_sb); + return err; +} + +static void cleanup_mnt_union(struct mount *topmost_mnt) +{ + d_free_unions(topmost_mnt->mnt.mnt_root); + put_union_sb(topmost_mnt->mnt.mnt_sb); +} + /* * @source_mnt : mount tree to be attached * @nd : place the mount tree @source_mnt is attached @@ -1788,37 +1984,56 @@ static int do_loopback(struct path *path, const char *old_name, int recurse) { - struct path old_path; + struct path old_path, lower_cache, actual; struct mount *mnt = NULL, *old, *parent; struct mountpoint *mp; + struct inode *inode; int err; if (!old_name || !*old_name) return -EINVAL; - err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path); + + err = user_path_at(AT_FDCWD, old_name, + LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT, &old_path); if (err) return err; + inode = union_get_inode(&old_path, &lower_cache, &actual); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + err = -EINVAL; - if (mnt_ns_loop(old_path.dentry)) - goto out; + if (mnt_ns_loop(inode)) + goto out_lower; mp = lock_mount(path); err = PTR_ERR(mp); if (IS_ERR(mp)) - goto out; + goto out_lower; old = real_mount(old_path.mnt); - parent = real_mount(path->mnt); err = -EINVAL; if (IS_MNT_UNBINDABLE(old)) - goto out2; + goto out_unlock; + /* If we're bind-mounting a file that's on a lower fs in a union then + * we must first copy the file up as the copied mount stack attached to + * the superblock is independent of any namespace and will fail the + * check_mnt() test. Directories are copied up during the pathwalk, so + * we need not worry about those. + */ + err = union_copy_up(&old_path, &actual); + if (err < 0) + goto out_unlock; + + parent = real_mount(path->mnt); if (!check_mnt(parent) || !check_mnt(old)) - goto out2; + goto out_unlock; if (!recurse && has_locked_children(old, old_path.dentry)) - goto out2; + goto out_unlock; if (recurse) mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE); @@ -1827,7 +2042,7 @@ if (IS_ERR(mnt)) { err = PTR_ERR(mnt); - goto out2; + goto out_unlock; } mnt->mnt.mnt_flags &= ~MNT_LOCKED; @@ -1838,8 +2053,10 @@ umount_tree(mnt, 0); unlock_mount_hash(); } -out2: +out_unlock: unlock_mount(mp); +out_lower: + path_put_maybe(&lower_cache); out: path_put(&old_path); return err; @@ -1880,6 +2097,18 @@ if (!check_mnt(mnt)) return -EINVAL; + if ((path->mnt->mnt_flags & MNT_UNION) && + !(mnt_flags & MNT_UNION)) + return -EINVAL; + + if ((mnt_flags & MNT_UNION) && + !(path->mnt->mnt_flags & MNT_UNION)) + return -EINVAL; + + if ((path->mnt->mnt_flags & MNT_UNION) && + (mnt_flags & MNT_READONLY)) + return -EINVAL; + if (path->dentry != path->mnt->mnt_root) return -EINVAL; @@ -2015,6 +2244,7 @@ { struct mountpoint *mp; struct mount *parent; + bool unioned = false; int err; mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT); @@ -2045,7 +2275,17 @@ goto unlock; newmnt->mnt.mnt_flags = mnt_flags; + + if (IS_MNT_UNION(&newmnt->mnt)) { + err = prepare_mnt_union(newmnt, path); + if (err) + goto unlock; + unioned = true; + } + err = graft_tree(newmnt, parent, mp); + if (err < 0 && unioned) + cleanup_mnt_union(newmnt); unlock: unlock_mount(mp);
diff --git a/fs/open.c b/fs/open.c index 5c30ce3..1b48281 100644 --- a/fs/open.c +++ b/fs/open.c
@@ -33,6 +33,7 @@ #include <linux/compat.h> #include "internal.h" +#include "union.h" int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, struct file *filp) @@ -65,29 +66,55 @@ long vfs_truncate(struct path *path, loff_t length) { + struct path lower_cache, actual; struct inode *inode; long error; - inode = path->dentry->d_inode; + if (IS_PATH_UNIONED(path)) + printk("UNION: truncate: path.mnt: at upper\n"); + + inode = union_get_inode(path, &lower_cache, &actual); + if (IS_ERR(inode)) + return PTR_ERR(inode); /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ + error = -EISDIR; if (S_ISDIR(inode->i_mode)) - return -EISDIR; + goto out; + error = -EINVAL; if (!S_ISREG(inode->i_mode)) - return -EINVAL; + goto out; error = mnt_want_write(path->mnt); if (error) goto out; - error = inode_permission(inode, MAY_WRITE); - if (error) - goto mnt_drop_write_and_out; + if (unlikely(d_is_unioned(path->dentry, &actual))) { + /* We have to be able to write to the upperfs. */ + error = -EROFS; + if (path->dentry->d_sb->s_flags & MS_RDONLY) + goto mnt_drop_write_and_out; + + /* But the lowerfs inode must offer write permission - if the + * lowerfs was mounted writably. */ + error = __inode_permission(inode, MAY_WRITE); + if (error) + goto mnt_drop_write_and_out; + } else { + error = inode_permission(inode, MAY_WRITE); + if (error) + goto mnt_drop_write_and_out; + } error = -EPERM; if (IS_APPEND(inode)) goto mnt_drop_write_and_out; + error = union_truncated_copy_up(path, &actual, &length); + if (error) + goto mnt_drop_write_and_out; + + inode = path->dentry->d_inode; error = get_write_access(inode); if (error) goto mnt_drop_write_and_out; @@ -111,6 +138,7 @@ mnt_drop_write_and_out: mnt_drop_write(path->mnt); out: + path_put_maybe(&lower_cache); return error; } EXPORT_SYMBOL_GPL(vfs_truncate); @@ -332,7 +360,7 @@ if (res) goto out; - inode = path.dentry->d_inode; + inode = d_inode_or_lower(path.dentry); if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { /* @@ -344,7 +372,15 @@ goto out_path_release; } - res = inode_permission(inode, mode | MAY_ACCESS); + /* For unionmount files, we need to check the permissions on the upper + * superblock and the lower inode. + */ + res = sb_permission(path.dentry->d_sb, inode, mode); + if (res != 0) + goto out_path_release; + + res = __inode_permission(inode, mode | MAY_ACCESS); + /* SuS v2 requires we report a read only fs too */ if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) goto out_path_release; @@ -464,19 +500,32 @@ static int chmod_common(struct path *path, umode_t mode) { - struct inode *inode = path->dentry->d_inode; - struct inode *delegated_inode = NULL; + struct inode *inode, *delegated_inode = NULL; + struct path lower_cache, actual; struct iattr newattrs; int error; + inode = union_get_inode(path, &lower_cache, &actual); + if (IS_ERR(inode)) + return PTR_ERR(inode); + error = mnt_want_write(path->mnt); if (error) - return error; + goto out_lower; retry_deleg: +again: mutex_lock(&inode->i_mutex); - error = security_path_chmod(path, mode); + error = security_path_chmod(&actual, mode); if (error) goto out_unlock; + if (d_is_unioned(path->dentry, &actual)) { + mutex_unlock(&inode->i_mutex); + error = union_copy_up(path, &actual); + if (error < 0) + goto out_drop_write; + inode = actual.dentry->d_inode; + goto again; + } newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; error = notify_change(path->dentry, &newattrs, &delegated_inode); @@ -487,7 +536,10 @@ if (!error) goto retry_deleg; } +out_drop_write: mnt_drop_write(path->mnt); +out_lower: + path_put_maybe(&lower_cache); return error; } @@ -498,7 +550,10 @@ if (f.file) { audit_inode(NULL, f.file->f_path.dentry, 0); - err = chmod_common(&f.file->f_path, mode); + if (f.file->f_inode != f.file->f_path.dentry->d_inode) + err = -EACCES; /* Unioned, but can't copy up. */ + else + err = chmod_common(&f.file->f_path, mode); fdput(f); } return err; @@ -509,6 +564,7 @@ struct path path; int error; unsigned int lookup_flags = LOOKUP_FOLLOW; + retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (!error) { @@ -529,8 +585,8 @@ static int chown_common(struct path *path, uid_t user, gid_t group) { - struct inode *inode = path->dentry->d_inode; - struct inode *delegated_inode = NULL; + struct path lower_cache, actual; + struct inode *inode, *delegated_inode = NULL; int error; struct iattr newattrs; kuid_t uid; @@ -542,31 +598,54 @@ newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { if (!uid_valid(uid)) - return -EINVAL; + goto einval; newattrs.ia_valid |= ATTR_UID; newattrs.ia_uid = uid; } if (group != (gid_t) -1) { if (!gid_valid(gid)) - return -EINVAL; + goto einval; newattrs.ia_valid |= ATTR_GID; newattrs.ia_gid = gid; } + + inode = union_get_inode(path, &lower_cache, &actual); + if (IS_ERR(inode)) + return PTR_ERR(inode); if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; retry_deleg: +again: mutex_lock(&inode->i_mutex); error = security_path_chown(path, uid, gid); - if (!error) - error = notify_change(path->dentry, &newattrs, &delegated_inode); + if (error < 0) + goto error; + + if (d_is_unioned(path->dentry, &actual)) { + mutex_unlock(&inode->i_mutex); + error = union_copy_up(path, &actual); + if (error < 0) + goto error; + inode = actual.dentry->d_inode; + goto again; + } + + error = notify_change(path->dentry, &newattrs, &delegated_inode); mutex_unlock(&inode->i_mutex); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } +error: + mutex_unlock(&inode->i_mutex); + path_put_maybe(&lower_cache); return error; + +einval: + path_put_maybe(&lower_cache); + return -EINVAL; } SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, @@ -620,6 +699,11 @@ if (!f.file) goto out; + if (f.file->f_inode != f.file->f_path.dentry->d_inode) { + error = -EACCES; /* Unioned, but can't copy up. */ + goto out_fput; + } + error = mnt_want_write_file(f.file); if (error) goto out_fput;
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index bafb5a3..5ad8ad0 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c
@@ -65,6 +65,8 @@ { MNT_NOATIME, ",noatime" }, { MNT_NODIRATIME, ",nodiratime" }, { MNT_RELATIME, ",relatime" }, + { MNT_SHARED, ",shared" }, + { MNT_UNBINDABLE, ",nobind" }, { MNT_UNION, ",union" }, { 0, NULL } };
diff --git a/fs/readdir.c b/fs/readdir.c index 483bd75..ebe5665 100644 --- a/fs/readdir.c +++ b/fs/readdir.c
@@ -20,6 +20,8 @@ #include <asm/uaccess.h> +#include "union.h" + int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); @@ -31,9 +33,27 @@ if (res) goto out; - res = mutex_lock_killable(&inode->i_mutex); - if (res) - goto out; + if (unlikely(IS_DIR_UNIONED(file->f_path.dentry)) && + !IS_OPAQUE(file->f_path.dentry->d_inode)) { + res = mnt_want_write(file->f_path.mnt); + if (res < 0) + goto out; + + res = mutex_lock_killable(&inode->i_mutex); + if (res < 0) { + mnt_drop_write(file->f_path.mnt); + goto out; + } + + res = union_copy_up_dir(&file->f_path); + mnt_drop_write(file->f_path.mnt); + if (res < 0) + goto out_unlock; + } else { + res = mutex_lock_killable(&inode->i_mutex); + if (res) + goto out; + } res = -ENOENT; if (!IS_DEADDIR(inode)) { @@ -42,6 +62,7 @@ file->f_pos = ctx->pos; file_accessed(file); } +out_unlock: mutex_unlock(&inode->i_mutex); out: return res;
diff --git a/fs/stat.c b/fs/stat.c index ae0c3ce..909d3cb 100644 --- a/fs/stat.c +++ b/fs/stat.c
@@ -17,6 +17,7 @@ #include <asm/uaccess.h> #include <asm/unistd.h> +#include "union.h" void generic_fillattr(struct inode *inode, struct kstat *stat) { @@ -51,25 +52,50 @@ */ int vfs_getattr_nosec(struct path *path, struct kstat *stat) { - struct inode *inode = path->dentry->d_inode; + struct path lower_cache, actual; + struct inode *inode; + int retval; - if (inode->i_op->getattr) - return inode->i_op->getattr(path->mnt, path->dentry, stat); + inode = union_get_inode(path, &lower_cache, &actual); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + if (inode->i_op->getattr) { + retval = inode->i_op->getattr(actual.mnt, actual.dentry, stat); + goto out; + } generic_fillattr(inode, stat); - return 0; +out: + path_put_maybe(&lower_cache); + return retval; } EXPORT_SYMBOL(vfs_getattr_nosec); int vfs_getattr(struct path *path, struct kstat *stat) { + struct path lower_cache, actual; + struct inode *inode; int retval; - retval = security_inode_getattr(path->mnt, path->dentry); + inode = union_get_inode(path, &lower_cache, &actual); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + retval = security_inode_getattr(actual.mnt, actual.dentry); if (retval) - return retval; - return vfs_getattr_nosec(path, stat); + goto out; + + if (inode->i_op->getattr) { + retval = inode->i_op->getattr(actual.mnt, actual.dentry, stat); + goto out; + } + + generic_fillattr(inode, stat); +out: + path_put_maybe(&lower_cache); + return retval; } EXPORT_SYMBOL(vfs_getattr); @@ -326,7 +352,12 @@ retry: error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty); if (!error) { - struct inode *inode = path.dentry->d_inode; + struct inode *inode = d_inode_or_lower(path.dentry); + + if (IS_MNT_UNION(path.mnt)) { + printk("readlink inode: %p -> %p [%x %d]\n", + path.dentry->d_inode, inode, path.dentry->d_flags, empty); + } error = empty ? -ENOENT : -EINVAL; if (inode->i_op->readlink) {
diff --git a/fs/super.c b/fs/super.c index 4c9a2fe..8a67018 100644 --- a/fs/super.c +++ b/fs/super.c
@@ -283,6 +283,7 @@ unregister_shrinker(&s->s_shrink); put_filesystem(fs); + put_union_sb(s); put_super(s); } else { up_write(&s->s_umount);
diff --git a/fs/union.c b/fs/union.c new file mode 100644 index 0000000..0188bf1 --- /dev/null +++ b/fs/union.c
@@ -0,0 +1,931 @@ +/* VFS-based union mounts for Linux + * + * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright (C) 2007-2009 Novell Inc. + * Copyright (C) 2009-2012 Red Hat, Inc. + * + * Author(s): Jan Blunck (j.blunck@tu-harburg.de) + * Valerie Aurora <vaurora@redhat.com> + * David Howells <dhowells@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#define DEBUG +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/fs_struct.h> +#include <linux/slab.h> +#include <linux/fsnotify.h> +#include <linux/xattr.h> +#include <linux/file.h> +#include <linux/security.h> +#include <linux/splice.h> +#include <linux/ratelimit.h> + +#include "internal.h" +#include "union.h" + +/** + * d_free_unions - free all unions for this dentry + * @dentry: topmost dentry in the union stack to remove + * + * This must be called when freeing a dentry. d_inode may point to a defunct + * inode or may have been cleared by the time we get here. + */ +void d_free_unions(struct dentry *topmost) +{ + struct path *path; + unsigned int i, layers = topmost->d_sb->s_union_count; + + if (topmost->d_union_stack) { + if (topmost->d_flags & DCACHE_UNION_PINNING_LOWER) { + /* A negative non-dir upper dentry is pinning + * a single lower dentry so that f_inode + * doesn't have to. + */ + printk("free pin: %pd\n", topmost); + dput(topmost->d_fallthru); + } else { + /* A positive directory dentry is pinning a + * stack of lower dirs. + */ + printk("free dirstack: %pd\n", topmost); + + for (i = 0; i < layers; i++) { + path = union_find_dir(topmost, i); + if (path->mnt) + path_put(path); + } + kfree(topmost->d_union_stack); + } + topmost->d_union_stack = NULL; + } +} + +/** + * union_add_dir - Add another layer to a unioned directory + * @topmost: topmost directory + * @lower: directory in the current layer + * @layer: index of layer to add this at + * + * @layer counts starting at 0 for the dir below the topmost dir. + * + * This transfers the caller's references to the constituents of *lower to the + * union stack. + */ +int union_add_dir(struct path *topmost, struct path *lower, unsigned layer) +{ + struct dentry *dentry = topmost->dentry; + struct path *path; + + BUG_ON(layer >= dentry->d_sb->s_union_count); + BUG_ON(d_is_fallthru(dentry)); + + if (!dentry->d_union_stack) + dentry->d_union_stack = union_alloc_stack(topmost); + if (!dentry->d_union_stack) + return -ENOMEM; + + path = union_find_dir(dentry, layer); + *path = *lower; + return 0; +} + +/** + * union_copy_up_xattr + * @new: dentry of new copy + * @old: dentry of original file + * + * Copy up extended attributes from the original file to the new one. + * + * XXX - Permissions? For now, copying up every xattr. + */ +static int union_copy_up_xattr(struct path *new, struct dentry *old) +{ + ssize_t list_size, size; + char *buf, *name, *value; + int error; + + /* Check for xattr support */ + if (!old->d_inode->i_op->getxattr || + !new->dentry->d_inode->i_op->getxattr) + return 0; + + /* Find out how big the list of xattrs is */ + list_size = vfs_listxattr(old, NULL, 0); + if (list_size <= 0) + return list_size; + + /* Allocate memory for the list */ + buf = kzalloc(list_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Allocate memory for the xattr's value */ + error = -ENOMEM; + value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); + if (!value) + goto out; + + /* Actually get the list of xattrs */ + list_size = vfs_listxattr(old, buf, list_size); + if (list_size <= 0) { + error = list_size; + goto out_free_value; + } + + for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { + /* XXX Locking? old is on read-only fs */ + size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); + if (size <= 0) { + error = size; + goto out_free_value; + } + /* XXX do we really need to check for size overflow? */ + /* XXX locks new dentry, lock ordering problems? */ + error = vfs_setxattr(new, name, value, size, 0); + if (error) + goto out_free_value; + } + +out_free_value: + kfree(value); +out: + kfree(buf); + return error; +} + +/** + * union_create_topmost_dir - Create a matching dir in the topmost file system + * @parent - parent of target on topmost layer + * @topmost - path of target on topmost layer + * @d - stack of source directories in lower layers + * + * As we lookup each directory on the lower layer of a union, we create a + * matching directory on the topmost layer if it does not already exist. + * + * We don't use vfs_mkdir() for a few reasons: don't want to do the security + * check, don't want to make the dir opaque, don't need to sanitize the mode. + * + * The caller must hold the parent i_mutex lock and the mnt_want_write lock. + * + * XXX - owner is wrong, set credentials properly + * XXX - rmdir() directory on failure of xattr copyup + * XXX - not atomic w/ respect to crash + */ +int union_create_topmost_dir(struct path *parent, + struct path *topmost, + struct union_stack *d) +{ + struct dentry *lower; + struct inode *dir = parent->dentry->d_inode; + unsigned i, layers = parent->dentry->d_sb->s_union_count; + int error; + + BUG_ON(topmost->dentry->d_inode); + + for (i = 0; i < layers; i++) + if ((lower = d->u_dirs[i].dentry)) + break; + + /* XXX - Do we even need to check this? */ + if (!dir->i_op->mkdir) + return -EPERM; + + error = dir->i_op->mkdir(dir, topmost->dentry, lower->d_inode->i_mode); + if (error) + return error; + + error = union_copy_up_xattr(topmost, lower); + if (error) + goto out_rmdir; + + fsnotify_mkdir(dir, topmost->dentry); + return 0; + +out_rmdir: + /* XXX rm created dir */ + dput(topmost->dentry); + return error; +} + +struct union_iterate_context { + struct dir_context ctx; + struct dentry *topmost_dentry; + int error; +}; + +/** + * union_copy_up_one_dirent - copy up a single directory entry + * + * Individual directory entry copyup function for union_copy_up_dir. + * We get the entries from higher level layers first. + */ +static int union_copy_up_one_dirent(void *buf, const char *name, int namelen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct union_iterate_context *uic = (struct union_iterate_context *)buf; + struct dentry *topmost_dentry = uic->topmost_dentry; + struct dentry *dentry; + int err = 0; + + switch (namelen) { + case 2: + if (name[1] != '.') + break; + case 1: + if (name[0] != '.') + break; + return 0; + } + + /* Lookup this entry in the topmost directory */ + dentry = lookup_one_len(name, topmost_dentry, namelen); + + if (IS_ERR(dentry)) { + printk(KERN_WARNING "%s: error looking up %*.*s\n", + __func__, namelen, namelen, name); + err = PTR_ERR(dentry); + goto out; + } + + /* XXX do we need to revalidate on readdir anyway? think NFS */ + if (dentry->d_op && dentry->d_op->d_revalidate) + goto fallthru; + + /* If the entry already exists, one of the following is true: it was + * already copied up (due to an earlier lookup), an entry with the same + * name already exists on the topmost file system, it is a whiteout, or + * it is a fallthru. In each case, the top level entry masks any + * entries from lower file systems, so don't copy up this entry. + */ + if (dentry->d_inode || d_is_whiteout(dentry) || d_is_fallthru(dentry)) + goto out_dput; + + /* If the entry doesn't exist, create a fallthru entry in the topmost + * file system. All possible directory types are used, so each file + * system must implement its own way of storing a fallthru entry. + */ +fallthru: + err = topmost_dentry->d_inode->i_op->fallthru(topmost_dentry->d_inode, + dentry); + + /* It's okay if it exists, ultimate responsibility rests with + * ->fallthru() */ + if (err == -EEXIST) + err = 0; +out_dput: + dput(dentry); +out: + if (err) + uic->error = err; + return err; +} + +/** + * __union_copy_up_dir - Non-recursive directory copy up + * + * Copy up the specified directory only, without recursing into the subtree + * rooted at this point. + * + * During the operation, where a directory entry exists in one of the lower + * directories, a fallthrough dentry will be created in the upper directory if + * the upper directory doesn't already have an entry that obscures it. At the + * end of the operation, the upper directory will be marked opaque on the + * medium - thus preventing further copy up attempts on this directory. + * + * TODO: At some point in the future, on-medium whiteouts should be culled from + * a directory that is marked opaque as they then serve no purpose. + * + * The primary reason for this function is that readdir() is difficult to + * support on union file systems for two reasons: We must eliminate duplicates + * and apply whiteouts, and we must return something in f_pos that lets us + * restart in the same place when we return. Our solution is to, on first + * readdir() of the directory, copy up all visible entries from the low-level + * file systems and mark the entries that refer to low-level file system + * objects as "fallthrough" entries. + * + * Sadly, this function is also necessary for rmdir(). To work out whether a + * directory is empty, we have to work out if there are entries in lower + * directories that are not obscured by whiteouts in the upper. This is not a + * trivial operation. The simplest way is, therefore, to copy up and then + * check the combined opaque directory. + * + * + * Locking strategy: We hold the topmost dir's i_mutex on entry. We grab the + * i_mutex on lower directories one by one. So the locking order is: + * + * Writable/topmost layers > Read-only/lower layers + * + * So there is no problem with lock ordering for union stacks with + * multiple lower layers. E.g.: + * + * (topmost) A->B->C (bottom) + * (topmost) D->C->B (bottom) + * + */ +int __union_copy_up_dir(struct path *topmost_path) +{ + struct dentry *topmost_dentry = topmost_path->dentry; + unsigned int i, layers = topmost_dentry->d_sb->s_union_count; + int error = 0; + + struct union_iterate_context uic = { + .ctx.actor = union_copy_up_one_dirent, + .topmost_dentry = topmost_dentry, + }; + + + if (IS_OPAQUE(topmost_dentry->d_inode)) + return 0; + + if (!topmost_dentry->d_inode->i_op || + !topmost_dentry->d_inode->i_op->fallthru) + return -EOPNOTSUPP; + + for (i = 0; i < layers; i++) { + struct inode *inode; + struct file *ftmp; + struct path *path; + + path = union_find_dir(topmost_dentry, i); + if (!path->mnt) + continue; + + ftmp = dentry_open(path, O_RDONLY | O_DIRECTORY | O_NOATIME, + current_cred()); + if (IS_ERR(ftmp)) { + printk(KERN_ERR "unable to open dir %pd for " + "directory copyup: %ld\n", + path->dentry, PTR_ERR(ftmp)); + error = PTR_ERR(ftmp); + break; + } + + inode = file_inode(ftmp); + mutex_lock(&inode->i_mutex); + + error = -ENOENT; + if (IS_DEADDIR(inode)) + goto out_fput; + + /* Read the whole directory, calling our directory entry copyup + * function on each entry. + */ + uic.ctx.pos = 0; + uic.error = 0; + error = ftmp->f_op->iterate(ftmp, &uic.ctx); +out_fput: + mutex_unlock(&inode->i_mutex); + fput(ftmp); + + if (uic.error) + error = uic.error; + if (error) + break; + + /* XXX Should process directories below an opaque directory in + * case there are fallthrus in it + */ + if (IS_OPAQUE(path->dentry->d_inode)) + break; + } + + /* Mark this dir opaque to show that we have already copied up the + * lower entries. Be sure to do this AFTER the directory entries have + * been copied up so that if we crash in the middle of copyup, we will + * try to copyup the dir next time we read it. + * + * XXX - Could leave directory non-opaque, and force reread/copyup of + * directory each time it is read in from disk. That would make it + * easy to update lower file systems (when not union mounted) and have + * the changes show up when union mounted again. + */ + if (!error) { + topmost_dentry->d_inode->i_flags |= S_OPAQUE; + mark_inode_dirty(topmost_dentry->d_inode); + } + + return error; +} + +/* Relationship between i_mode and the DT_xxx types */ +static inline unsigned char dt_type(struct inode *inode) +{ + return (inode->i_mode >> 12) & 15; +} + +/** + * generic_readdir_fallthru - Helper to lookup target of a fallthru + * @topmost_dentry: dentry for the topmost dentry of the dir being read + * @name: name of fallthru dirent + * @namelen: length of @name + * @ino: return inode number of target, if found + * @d_type: return directory type of target, if found + * + * In readdir(), client file systems need to lookup the target of a + * fallthru in a lower layer for three reasons: (1) fill in d_ino, (2) + * fill in d_type, (2) make sure there is something to fall through to + * (and if not, don't return this dentry). Upon detecting a fallthru + * dentry in readdir(), the client file system should call this function. + * + * Returns 0 on success and -ENOENT if no matching directory entry was + * found (which can happen when the topmost file system is unmounted + * and remounted over a different file system than). Any other errors + * are unexpected. + */ +int generic_readdir_fallthru(struct dentry *topmost_dentry, const char *name, + int namlen, ino_t *ino, unsigned char *d_type) +{ + struct path *parent; + struct dentry *dentry; + unsigned int i, layers = topmost_dentry->d_sb->s_union_count; + + BUG_ON(!mutex_is_locked(&topmost_dentry->d_inode->i_mutex)); + + for (i = 0; i < layers; i++) { + parent = union_find_dir(topmost_dentry, i); + mutex_lock(&parent->dentry->d_inode->i_mutex); + dentry = lookup_one_len(name, parent->dentry, namlen); + mutex_unlock(&parent->dentry->d_inode->i_mutex); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + if (dentry->d_inode) { + *ino = dentry->d_inode->i_ino; + *d_type = dt_type(dentry->d_inode); + dput(dentry); + return 0; + } + dput(dentry); + } + return -ENOENT; +} +EXPORT_SYMBOL(generic_readdir_fallthru); + +/* + * Get the inode and path for a dentry where that inode may exist on a lower + * layer in a union. + * + * The caller must preclear the elements of *_lower_cache and prime *_actual + * with the contents of *upper (as is done by wrappers in union.h) and must + * also hold parent->i_mutex. + * + * Note that we don't get a ref on the inode or the lower vfsmount (if + * returned). We leave it to the caller to iget/mntget them if appropriate. + * This should be safe as the caller holds parent->i_mutex. The lower dentry + * (if returned) is dget'd, however. + * + * The pointers returned in *_actual are not dget'd/mntget'd as it is assumed + * they're pinned by the caller's ref on upper->mnt (if set), upper->dentry; or + * by the fact that parent->i_mutex is locked and _lower_cache->dentry is + * dget'd. + */ +struct inode *__union_get_inode_locked(struct dentry *parent, + struct path *upper, + struct path *_lower_cache, + struct path *_actual) +{ + const struct union_stack *d; + struct dentry *dentry = upper->dentry; + unsigned i, layers = parent->d_sb->s_union_count; + + pr_devel("-->%s(%pd,)\n", __func__, dentry); + + BUG_ON(d_is_whiteout(dentry)); + + /* Check for a race with copy up. */ + if (likely(dentry->d_inode)) { + pr_devel("<--%s() = upper\n", __func__); + *_actual = *upper; + return dentry->d_inode; + } + + BUG_ON(!(dentry->d_flags & DCACHE_UNION_PINNING_LOWER)); + + pr_devel("<--%s() = fallthru\n", __func__); + smp_rmb(); + _actual->dentry = dentry->d_fallthru; + d = parent->d_union_stack; + for (i = 0; i < layers; i++) { + if (d->u_dirs[i].dentry == dentry->d_fallthru->d_parent) { + _lower_cache->mnt = d->u_dirs[i].mnt; + break; + } + } + if (unlikely(!_lower_cache->mnt)) + goto out_badcache; + _actual->mnt = mntget(_lower_cache->mnt); + return dentry->d_fallthru->d_inode; + +out_badcache: + printk_ratelimited(KERN_WARNING "UNION: Bad cached fallthru (%pd/%pd)\n", + parent, upper->dentry); + return ERR_PTR(-EIO); +} + +/* + * Get the inode for a dentry where that inode may exist on a lower layer in a + * union. + * + * Note that we don't get a ref on the inode, so we may need to pin it by + * getting a ref on a dentry pointing to it - in which case, a pointer to that + * dentry will be returned in *_lower and the caller is expected to dput() the + * ref on it. + */ +struct inode *__union_get_inode(struct path *upper, struct path *_lower_cache, + struct path *_actual) +{ + struct dentry *parent, *dentry = upper->dentry; + struct inode *inode; + int ret; + + pr_devel("-->%s(%pd,)\n", __func__, dentry); + + /* We need the parent directory so that we can find the stack of lower + * directories in which to do lookups. Use the rename mutex to prevent + * rename from getting underfoot whilst we get the parent. + */ + if (mutex_lock_interruptible(&dentry->d_sb->s_vfs_rename_mutex) < 0) + return ERR_PTR(-EINTR); + + parent = dget_parent(dentry); + if (IS_OPAQUE(parent->d_inode) && !d_is_fallthru(dentry)) { + mutex_unlock(&dentry->d_sb->s_vfs_rename_mutex); + inode = NULL; + } else { + ret = mutex_lock_interruptible(&parent->d_inode->i_mutex); + mutex_unlock(&dentry->d_sb->s_vfs_rename_mutex); + if (ret < 0) { + inode = ERR_PTR(ret); + } else { + inode = __union_get_inode_locked(parent, upper, + _lower_cache, _actual); + mutex_unlock(&parent->d_inode->i_mutex); + } + } + dput(parent); + return inode; +} + +/** + * union_create_file + * @parent: path of the upper parent directory + * @upper: path of the negative dentry to become new file + * @lower: path of the source file + * + * Must already have mnt_want_write() on the mnt and the parent's i_mutex. + */ +static int union_create_file(struct path *parent, struct path *upper, + struct path *lower) +{ + struct inode *dir = parent->dentry->d_inode; + int ret; + + if (!dir->i_op->tmpfile) + return -EPERM; + + ret = dir->i_op->tmpfile(dir, upper->dentry, + lower->dentry->d_inode->i_mode); + if (ret == 0) { + spin_lock(&upper->dentry->d_inode->i_lock); + upper->dentry->d_inode->i_state |= I_LINKABLE; + spin_unlock(&upper->dentry->d_inode->i_lock); + } + return ret; +} + +/** + * union_create_symlink + * @parent: Upper parent of the symlink + * @upper: Path of the negative dentry to become new symlink. + * @lower: Path of the source symlink + * + * Must already have mnt_want_write() on the mnt and the parent's i_mutex. + */ +static int union_create_symlink(struct path *parent, struct path *upper, + struct path *lower) +{ + struct inode *inode = lower->dentry->d_inode; + char *content; + int error; + + content = kmalloc(PATH_MAX + 2, GFP_KERNEL); + if (!content) + return -ENOMEM; + + error = inode->i_op->readlink(lower->dentry, content, PATH_MAX + 1); + if (error < 0) + goto error; + content[error] = 0; + + error = vfs_symlink(parent->dentry->d_inode, upper->dentry, content); +error: + kfree(content); + return error; +} + +/** + * union_copy_up_data - Copy up len bytes of old's data to new + * @path: path of target file + * @actual: path of source file in lower layer + * @truncate_to: number of bytes to copy (or NULL if all) + */ +static int union_copy_up_data(struct path *path, struct path *actual, + const loff_t *truncate_to) +{ + const struct cred *cred = current_cred(); + struct file *lower_file; + struct file *new_file; + loff_t filesize, loffset = 0, noffset = 0; + size_t len; + long bytes; + int error = 0; + + filesize = i_size_read(actual->dentry->d_inode); + if (truncate_to && *truncate_to < filesize) + filesize = *truncate_to; + + /* Check for overflow of file size */ + len = filesize; + if (len != filesize) + return -EFBIG; + + if (len == 0) + return 0; + + lower_file = dentry_open(actual, O_RDONLY, cred); + if (IS_ERR(lower_file)) + return PTR_ERR(lower_file); + + new_file = dentry_open(path, O_WRONLY, cred); + if (IS_ERR(new_file)) { + error = PTR_ERR(new_file); + goto out_fput; + } + + bytes = do_splice_direct(lower_file, &loffset, + new_file, &noffset, len, + SPLICE_F_MOVE); + if (bytes < 0) + error = bytes; + + fput(new_file); +out_fput: + fput(lower_file); + return error; +} + +/* + * Create a temporary file. We don't want to inline this as it uses quite a + * lot of stack space. + * + * The caller should make sure _tmpfile->mnt is set to the upper vfsmount and + * that ->dentry is NULL. + * + * Note: we don't return with a ref on _tmpfile->mnt as path is holding a ref. + * Further, we may return with a dentry in _tmpfile->dentry that needs + * dput'ing, even if an error occurred. + */ +static int union_create_tmpfile(struct path *parent, struct path *path, + struct path *actual, struct path *_tmpfile) +{ + static const struct qstr nameless = { .name = "", .len = 0, .hash = 0 }; + struct dentry *dentry; + int ret; + + pr_devel("-->%s(%pd)\n", __func__, path->dentry); + + /* Create a nameless file not directly attached to the parent + * directory, but still associated with it for layout optimisation + * reasons. The upperfs should check for the file being of zero + * length. + * + * We will then hard link the file into place when we're done copying + * up - and mount/fsck will clean it up in the event of a crash and + * dget() will clean it up in the event of an error. + */ + dentry = d_alloc(parent->dentry, &nameless); + if (!IS_ERR(dentry)) { + _tmpfile->dentry = dentry; + if (S_ISREG(actual->dentry->d_inode->i_mode)) + ret = union_create_file(parent, _tmpfile, actual); + else if (S_ISLNK(actual->dentry->d_inode->i_mode)) + ret = union_create_symlink(parent, _tmpfile, actual); + else + BUG(); + } else { + ret = PTR_ERR(dentry); + } + + pr_devel("<--%s() = %d\n", __func__, ret); + return ret; +} + +/** + * Copy up a file or symlink to a temporary file in the specially prepared + * directory and return the dentry of that. + */ +static int union_copy_up_to_tmpfile(struct path *parent, struct path *path, + struct path *actual, struct path *_tmpfile, + const loff_t *truncate_to) +{ + struct dentry *dentry = actual->dentry; + int ret; + + ret = union_create_tmpfile(parent, path, actual, _tmpfile); + + if (ret == 0 && S_ISREG(dentry->d_inode->i_mode)) + ret = union_copy_up_data(_tmpfile, actual, truncate_to); + if (ret == 0) + ret = union_copy_up_xattr(_tmpfile, actual->dentry); + return ret; +} + +/* + * Create a hardlink from the temporary file to the actual location. + */ +static int union_hard_link_to_tmpfile(struct path *parent, struct path *path, + struct path *tmpfile) +{ + int ret; + + pr_devel("-->%s(%pd,%pd,%pd)\n", + __func__, parent->dentry, path->dentry, tmpfile->dentry); + + ret = vfs_link(tmpfile->dentry, parent->dentry->d_inode, path->dentry, + NULL); + return ret; +} + +/** + * union_copy_up_via_tmpfile - Copy up lower file via temporary file + * + * Copy up a file or symlink to a temporary file in the specially prepared + * directory, then hard link across and unlink the temp file. + */ +static int union_copy_up_via_tmpfile(struct path *parent, struct path *path, + struct path *actual, + const loff_t *truncate_to) +{ + const struct cred *saved_cred; + struct cred *override_cred; + struct path tmpfile = { .mnt = path->mnt, .dentry = NULL }; + int ret; + + pr_devel("-->%s(,%pd,%pd,%pd,,%lld)\n", + __func__, parent->dentry, path->dentry, actual->dentry, + truncate_to ? *truncate_to : -1); + + override_cred = prepare_kernel_cred(NULL); + if (!override_cred) + return -ENOMEM; + + override_cred->fsuid = actual->dentry->d_inode->i_uid; + override_cred->fsgid = actual->dentry->d_inode->i_gid; + + saved_cred = override_creds(override_cred); + + ret = union_copy_up_to_tmpfile(parent, path, actual, &tmpfile, + truncate_to); + + if (ret == 0) + ret = union_hard_link_to_tmpfile(parent, path, &tmpfile); + + /* Discard the temporary dentry */ + dput(tmpfile.dentry); + + revert_creds(saved_cred); + + put_cred(override_cred); + pr_devel("<--%s() = %d\n", __func__, ret); + return ret; +} + +/** + * __union_copy_up - Copy a non-directory file up to the upper layer. + */ +int __union_copy_up(struct path *path, struct path *actual, + const loff_t *truncate_to) +{ + struct dentry *upper = path->dentry; + struct path parent; + int ret; + + pr_devel("-->%s(%pd)\n", __func__, path->dentry); + + /* We don't currently support copyup of special files, though in theory + * there's no reason we couldn't at least copy up blockdev and chrdev + * files. FIFO files are problematic if open. Socket files are + * managed by AF_UNIX and would need help from there. Directories are + * handled by pathwalk. + */ + if (!S_ISREG(actual->dentry->d_inode->i_mode) && + !S_ISLNK(actual->dentry->d_inode->i_mode)) + return -EACCES; + + parent.mnt = path->mnt; + + /* We need to get the parent directory and then we need to lock it. + * Use the rename mutex to prevent rename from getting underfoot whilst + * we do this. + */ + if (mutex_lock_interruptible(&upper->d_sb->s_vfs_rename_mutex) < 0) + return -EINTR; + + if (upper->d_inode) { + mutex_unlock(&upper->d_sb->s_vfs_rename_mutex); + goto already_copied_up; + } + + parent.dentry = dget_parent(upper); + BUG_ON(IS_OPAQUE(parent.dentry->d_inode) && !d_is_fallthru(upper)); + BUG_ON(d_is_whiteout(upper)); + + ret = mutex_lock_interruptible(&parent.dentry->d_inode->i_mutex); + mutex_unlock(&upper->d_sb->s_vfs_rename_mutex); + if (ret < 0) { + dput(parent.dentry); + goto out; + } + + if (upper->d_inode) + goto already_copied_up_unlock; + + /* Do the copy up */ + ret = union_copy_up_via_tmpfile(&parent, path, actual, truncate_to); + mutex_unlock(&parent.dentry->d_inode->i_mutex); + dput(parent.dentry); + +out: + pr_devel("<--%s() = %d\n", __func__, ret); + return ret; + +already_copied_up_unlock: + mutex_unlock(&parent.dentry->d_inode->i_mutex); + dput(parent.dentry); +already_copied_up: + pr_devel("<--%s() = 0 [already done]\n", __func__); + *actual = *path; + return 0; +} + +/** + * __union_copy_up_for_do_last - Copy up a file for do_last() + * @parent: The parent directory of the file to be copied up. + * @path: The file to be copied up _to_. + * @will_truncate: Whether or not O_TRUNC is in force. + * + * Copy up for do_last(). It is expected that the caller will hold the + * want-write lock and will have called union_lookup_point*() first. + */ +int __union_copy_up_for_do_last(struct path *parent, struct path *path, + bool will_truncate) +{ + struct path lower_cache, actual; + struct inode *inode; + loff_t zero = 0; + int ret; + + pr_devel("-->%s(,%pd{%pd},)\n", + __func__, path->dentry, + path->dentry->d_fallthru ? path->dentry->d_fallthru : NULL); + + BUG_ON(!(path->dentry->d_flags & DCACHE_UNION_LOOKUP_DONE)); + BUG_ON(!(path->dentry->d_flags & DCACHE_UNION_PINNING_LOWER)); + BUG_ON(!path->dentry->d_fallthru); + + ret = mutex_lock_interruptible(&parent->dentry->d_inode->i_mutex); + if (ret < 0) + return ret; + + /* Check to see if we raced with another copy-up or an unlink */ + ret = 0; + if (path->dentry->d_parent != parent->dentry || + path->dentry->d_inode) + goto unlock_out; + + inode = union_get_inode_locked(parent->dentry, path, + &lower_cache, &actual); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto unlock_out; + } + + /* Do the copy up */ + ret = union_copy_up_via_tmpfile(parent, path, &actual, + will_truncate ? &zero : 0); + mutex_unlock(&parent->dentry->d_inode->i_mutex); + path_put_maybe(&lower_cache); + + pr_devel("<--%s() = %d [post]\n", __func__, ret); + return ret; + +unlock_out: + mutex_unlock(&parent->dentry->d_inode->i_mutex); + pr_devel("<--%s() = %d [pre]\n", __func__, ret); + return ret; +}
diff --git a/fs/union.h b/fs/union.h new file mode 100644 index 0000000..8b605c1 --- /dev/null +++ b/fs/union.h
@@ -0,0 +1,363 @@ +/* VFS-based union mounts for Linux + * + * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright (C) 2007-2009 Novell Inc. + * Copyright (C) 2009-2012 Red Hat, Inc. + * + * Author(s): Jan Blunck (j.blunck@tu-harburg.de) + * Valerie Aurora <vaurora@redhat.com> + * David Howells <dhowells@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <linux/mount.h> +#include <linux/dcache.h> +#include <linux/namei.h> +#include <linux/path.h> +#include <linux/slab.h> +#include <linux/bug.h> + +/* + * WARNING! Confusing terminology alert. + * + * Note that the directions "up" and "down" in union mounts are the opposite of + * "up" and "down" in normal VFS operation terminology. "Up" in the rest of + * the VFS means "towards the root of the mount tree." If you mount B on top + * of A, following B "up" will get you A. In union mounts, "up" means "towards + * the most recently mounted layer of the union stack." If you union mount B + * on top of A, following A "up" will get you to B. Another way to put it is + * that "up" in the VFS means going from this mount towards the direction of + * its mnt->mnt_parent pointer, but "up" in union mounts means going in the + * opposite direction (until you run out of union layers). + */ + +/* + * The union_stack structure. It is an array of struct paths of + * directories below the topmost directory in a unioned directory, The + * topmost dentry has a pointer to this structure. The topmost dentry + * can only be part of one union, so we can reference it from the + * dentry, but lower dentries can be part of multiple union stacks. + * + * The number of dirs actually allocated is kept in the superblock, + * s_union_count. + */ +struct union_stack { + struct path u_dirs[0]; +}; + +/** + * union_alloc - allocate a union stack + * @path: path of topmost directory + * + * Allocate a union_stack large enough to contain the maximum number + * of layers in this union mount. + */ +static inline struct union_stack *union_alloc_stack(const struct path *topmost) +{ + unsigned layers = topmost->dentry->d_sb->s_union_count; + return kcalloc(sizeof(struct path), layers, GFP_KERNEL); +} + +#ifdef CONFIG_UNION_MOUNT + +static inline bool IS_MNT_UNION(const struct vfsmount *mnt) +{ + return mnt->mnt_flags & MNT_UNION; +} + +static inline bool IS_PATH_UNIONED(const struct path *path) +{ + return IS_MNT_UNION(path->mnt); +} + +static inline bool IS_DIR_UNIONED(const struct dentry *dentry) +{ + return !!dentry->d_union_stack; +} + +extern void d_free_unions(struct dentry *); +extern int union_add_dir(struct path *, struct path *, unsigned int); + +static inline +struct path *union_find_dir(struct dentry *dentry, unsigned int layer) +{ + BUG_ON(layer >= dentry->d_sb->s_union_count); + return &dentry->d_union_stack->u_dirs[layer]; +} + + +extern int union_create_topmost_dir(struct path *, struct path *, struct union_stack *); + +extern int __union_copy_up_dir(struct path *); + +#else /* CONFIG_UNION_MOUNT */ + +static inline bool IS_MNT_UNION(struct vfsmount *mnt) { return false; } +static inline bool IS_PATH_UNIONED(const struct path *path) { return false; } +static inline bool IS_DIR_UNIONED(struct dentry *dentry) { return false; } +static inline void d_free_unions(struct dentry *dentry) {} + +static inline +int union_add_dir(struct path *topmost, struct path *lower, unsigned layer) +{ + BUG(); + return 0; +} + +static inline struct path *union_find_dir(struct dentry *dentry, unsigned layer) +{ + BUG(); + return NULL; +} + +static inline int union_create_topmost_dir(struct path *parent, + struct path *topmost, + struct union_stack *d) +{ + BUG(); + return 0; +} + +static inline int __union_copy_up_dir(struct path *topmost_path) +{ + BUG(); + return 0; +} + +#endif /* CONFIG_UNION_MOUNT */ + +/* + * Make sure that an upper directory is opaque (ie. totally copied up if it is + * in fact unioned with some lower dirs). + */ +static inline int union_copy_up_dir(struct path *path) +{ + if (IS_OPAQUE(path->dentry->d_inode)) + return 0; + return __union_copy_up_dir(path); +} + +extern struct inode *__union_get_inode_locked(struct dentry *parent, + struct path *upper, + struct path *_lower_cache, + struct path *_actual); +extern struct inode *__union_get_inode(struct path *upper, + struct path *_lower_cache, + struct path *_actual); +extern int __union_copy_up(struct path *path, struct path *actual, + const loff_t *truncate_to); + +extern int __union_copy_up_locked(struct path *parent, struct path *path, + struct path *actual, + const loff_t *truncate_to); + +static inline void path_put_maybe(struct path *path) +{ + /* These optimise away if CONFIG_UNION_MOUNT=n */ + if (unlikely(path->dentry)) + dput(path->dentry); + if (unlikely(path->mnt)) + mntput(path->mnt); +} + +/** + * union_get_inode_locked - Get the actual inode and dentry for a dentry + * @parent: The locked parent of the object we're interested in. + * @path: The object we're interested in. + * @_lower_cache: Cache for lower dentry pinning. + * @_actual: The point actually corresponding to the returned inode. + * + * Gets the inode to be used for a dentry where that inode may exist on a lower + * layer in a union. Note that we don't get a ref on the inode, so to pin it + * temporarily, we may point *_lower at the lower dentry. + * + * The caller must hold i_mutex on the parent. + * + * Returns a pointer to the inode to use if a positive dentry is found, NULL if + * a negative dentry is found and an error if lookup in the lower layers + * failed. + * + * On a successful return (positive or negative dentry), *_actual will be set + * to point to the dentry that we determined was the one of interest. This + * does not hold any refs of its own. + * + * The caller should call path_put_maybe() on *_lower_cache to clear any pins + * it may contain. + */ +static inline struct inode *union_get_inode_locked(struct dentry *parent, + struct path *path, + struct path *_lower_cache, + struct path *_actual) +{ + /* Optimise for the non-unionmount case. */ + _lower_cache->dentry = NULL; + _lower_cache->mnt = NULL; + *_actual = *path; + +#ifndef CONFIG_UNION_MOUNT + return path->dentry->d_inode; +#else + /* The normal case is that the inode is right where we expect... */ + if (likely(path->dentry->d_inode)) + return path->dentry->d_inode; + + /* ... or the dentry is ordinarily negative. */ + if (likely(!path->dentry->d_sb->s_union_lower_mnts)) + return NULL; + + if (d_is_whiteout(path->dentry) || + (!d_is_fallthru(path->dentry) && IS_OPAQUE(parent->d_inode))) + return NULL; + + /* We have to lock the parent and do a lookup. */ + return __union_get_inode_locked(parent, path, _lower_cache, _actual); +#endif +} + +/** + * union_get_inode - Get the actual inode and dentry for an object + * @path: The object we're interested in. + * @_lower_cache: Cache for lower dentry pinning. + * @_actual: The point actually corresponding to the returned inode. + * + * Gets the inode to be used for a dentry where that inode may exist on a lower + * layer in a union. Note that we don't get a ref on the inode, so to pin it + * temporarily, we may return a dentry in *_lower. + * + * Returns a pointer to the inode to use if a positive dentry is found, NULL if + * a negative dentry is found and an error if lookup in the lower layers + * failed. + * + * On a successful return (positive or negative dentry), *_actual will be set + * to point to the dentry that we determined was the one of interest. This + * does not have its own ref taken and thus does not need to be dput(). + */ +static inline struct inode *union_get_inode(struct path *path, + struct path *_lower_cache, + struct path *_actual) +{ + _lower_cache->mnt = NULL; + _lower_cache->dentry = NULL; + *_actual = *path; + +#ifndef CONFIG_UNION_MOUNT + return path->dentry->d_inode; +#else + /* The normal case is that the inode is right where we expect... */ + if (likely(path->dentry->d_inode)) + return path->dentry->d_inode; + + /* ... or the dentry is ordinarily negative. */ + if (likely(!path->dentry->d_sb->s_union_lower_mnts)) + return NULL; + + if (d_is_whiteout(path->dentry)) + return NULL; + + /* We have to lock the parent and do a lookup. */ + return __union_get_inode(path, _lower_cache, _actual); +#endif +} + +/** + * union_truncated_copy_up - If needed, partially copy up a file (truncate) + * path: The target object. + * lower: The lower dentry (or NULL) from union_get_inode(). + * truncate_to: The amount to copy up. + */ +static inline int union_truncated_copy_up(struct path *path, struct path *actual, + const loff_t *truncate_to) +{ +#ifdef CONFIG_UNION_MOUNT + if (unlikely(!path->dentry->d_inode)) + return __union_copy_up(path, actual, truncate_to); +#endif + return 0; +} + +/** + * union_copy_up - If needed, copy up a file in its entirety + * path: The target object. + * lower: The lower dentry (or NULL) from union_get_inode(). + */ +static inline int union_copy_up(struct path *path, struct path *actual) +{ +#ifdef CONFIG_UNION_MOUNT + if (unlikely(!path->dentry->d_inode)) + return __union_copy_up(path, actual, NULL); +#endif + return 0; +} + +/** + * union_copy_up_locked - If needed, copy up a file, caller holds parent lock + * parent: The parent directory of the target object + * path: The target object. + * lower: The lower dentry (or NULL) from union_get_inode(). + * + * The parent must hold i_mutex on the parent directory. + */ +static inline int union_copy_up_locked(struct path *parent, struct path *path, + struct path *actual) +{ +#ifdef CONFIG_UNION_MOUNT + if (unlikely(!path->dentry->d_inode)) + // return __union_copy_up_locked(parent, path, actual, true, 0); + return -ENOANO; +#endif + return 0; + +} + +extern int __union_copy_up_for_do_last(struct path *, struct path *, bool); + +/** + * union_copy_up_do_last - If needed, copy up a file (maybe truncated) + * path: The target object. + * lower: The lower dentry (or NULL) from union_get_inode(). + * will_truncate: Whether to honour O_TRUNC or not. + */ +static inline int union_copy_up_for_do_last(struct path *parent, struct path *path, + bool will_truncate) +{ +#ifdef CONFIG_UNION_MOUNT + if (unlikely(!path->dentry->d_inode)) + return __union_copy_up_for_do_last(parent, path, will_truncate); +#endif + return 0; +} + +static inline bool d_is_unioned(const struct dentry *dentry, const struct path *actual) +{ +#ifndef CONFIG_UNION_MOUNT + return false; +#else + return unlikely(dentry != actual->dentry); +#endif +} + +static inline bool is_unioned(const struct dentry *dentry, const struct inode *inode) +{ +#ifndef CONFIG_UNION_MOUNT + return false; +#else + return unlikely(dentry->d_inode != inode); +#endif +} + +extern struct union_stack *union_alloc(const struct path *topmost); + +static inline void union_free(const struct path *path, struct union_stack *d) +{ + unsigned i, layers = path->dentry->d_sb->s_union_count; + + if (d) { + for (i = 0; i < layers; i++) + path_put(&d->u_dirs[i]); + kfree(d); + } +}
diff --git a/fs/utimes.c b/fs/utimes.c index aa138d6..a42c98f 100644 --- a/fs/utimes.c +++ b/fs/utimes.c
@@ -10,6 +10,8 @@ #include <linux/syscalls.h> #include <asm/uaccess.h> #include <asm/unistd.h> +#include "internal.h" +#include "union.h" #ifdef __ARCH_WANT_SYS_UTIME @@ -54,10 +56,17 @@ struct iattr newattrs; struct inode *inode = path->dentry->d_inode; struct inode *delegated_inode = NULL; + struct path lower_cache, actual; + + inode = union_get_inode(path, &lower_cache, &actual); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + goto out; + } error = mnt_want_write(path->mnt); if (error) - goto out; + goto out_put_cache; if (times && times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) @@ -97,12 +106,27 @@ goto mnt_drop_write_and_out; if (!inode_owner_or_capable(inode)) { - error = inode_permission(inode, MAY_WRITE); - if (error) - goto mnt_drop_write_and_out; + /* We have to be able to write to the upperfs. */ + if (d_is_unioned(path->dentry, &actual)) { + error = -EROFS; + if (path->dentry->d_sb->s_flags & MS_RDONLY) + goto mnt_drop_write_and_out; + error = __inode_permission(inode, MAY_WRITE); + if (error) + goto mnt_drop_write_and_out; + } else { + error = inode_permission(inode, MAY_WRITE); + if (error) + goto mnt_drop_write_and_out; + } } } + retry_deleg: + error = union_copy_up(path, &actual); + if (error) + goto mnt_drop_write_and_out; + mutex_lock(&inode->i_mutex); error = notify_change(path->dentry, &newattrs, &delegated_inode); mutex_unlock(&inode->i_mutex); @@ -114,6 +138,8 @@ mnt_drop_write_and_out: mnt_drop_write(path->mnt); +out_put_cache: + path_put_maybe(&lower_cache); out: return error; }
diff --git a/fs/xattr.c b/fs/xattr.c index ead3e34..912e399 100644 --- a/fs/xattr.c +++ b/fs/xattr.c
@@ -23,13 +23,19 @@ #include <linux/posix_acl_xattr.h> #include <asm/uaccess.h> +#include "internal.h" +#include "union.h" /* * Check permissions for extended attribute access. This is a bit complicated * because different namespaces have very different rules. + * + * Note: in unionmount conditions, dentry must be on the _upper_ layer whilst + * inode may be on the lower. */ static int -xattr_permission(struct inode *inode, const char *name, int mask) +xattr_permission(struct dentry *dentry, struct inode *inode, const char *name, + int mask) { /* * We can never set or remove an extended attribute on a read-only @@ -70,7 +76,13 @@ return -EPERM; } - return inode_permission(inode, mask); + if (is_unioned(dentry, inode)) { + if (mask & MAY_WRITE && dentry->d_sb->s_flags & MS_RDONLY) + return -EROFS; + return __inode_permission(inode, mask); + } else { + return inode_permission(inode, mask); + } } /** @@ -87,7 +99,7 @@ * * This function requires the caller to lock the inode's i_mutex before it * is executed. It also assumes that the caller will make the appropriate - * permission checks. + * permission checks. The caller must also have copied up for unionmount. */ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) @@ -97,6 +109,8 @@ int issec = !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); + if (!inode) + return -ENOENT; if (issec) inode->i_flags &= ~S_NOSEC; if (inode->i_op->setxattr) { @@ -122,23 +136,40 @@ vfs_setxattr(struct path *path, const char *name, const void *value, size_t size, int flags) { - struct dentry *dentry = path->dentry; - struct inode *inode = dentry->d_inode; + struct path lower_cache, actual; + struct inode *inode; int error; - error = xattr_permission(inode, name, MAY_WRITE); + inode = union_get_inode(path, &lower_cache, &actual); + if (IS_ERR(inode)) + return PTR_ERR(inode); + +again: + error = xattr_permission(actual.dentry, inode, name, MAY_WRITE); if (error) - return error; + goto out_lower; mutex_lock(&inode->i_mutex); - error = security_inode_setxattr(dentry, name, value, size, flags); + error = security_inode_setxattr(actual.dentry, name, value, size, flags); if (error) - goto out; + goto out_unlock; - error = __vfs_setxattr_noperm(dentry, name, value, size, flags); + if (d_is_unioned(path->dentry, &actual)) { + /* Unionmounted */ + mutex_unlock(&inode->i_mutex); + error = union_copy_up(path, &actual); + if (error) + goto out_lower; + inode = actual.dentry->d_inode; + goto again; + } -out: + error = __vfs_setxattr_noperm(actual.dentry, name, value, size, flags); + +out_unlock: mutex_unlock(&inode->i_mutex); +out_lower: + path_put_maybe(&lower_cache); return error; } EXPORT_SYMBOL_GPL(vfs_setxattr); @@ -186,7 +217,7 @@ char *value = *xattr_value; int error; - error = xattr_permission(inode, name, MAY_READ); + error = xattr_permission(dentry, inode, name, MAY_READ); if (error) return error; @@ -231,55 +262,72 @@ ssize_t vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { - struct inode *inode = dentry->d_inode; - int error; + struct inode *inode; + struct path lower_cache, actual; + struct path path = { .dentry = dentry }; + ssize_t error; - error = xattr_permission(inode, name, MAY_READ); - if (error) - return error; + inode = union_get_inode(&path, &lower_cache, &actual); + if (IS_ERR(inode)) + return PTR_ERR(inode); - error = security_inode_getxattr(dentry, name); + error = xattr_permission(dentry, inode, name, MAY_READ); if (error) - return error; + goto out_dput; + + error = security_inode_getxattr(actual.dentry, name); + if (error) + goto out_dput; if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; - int ret = xattr_getsecurity(inode, suffix, value, size); + ssize_t ret = xattr_getsecurity(inode, suffix, value, size); /* * Only overwrite the return value if a security module * is actually active. */ if (ret == -EOPNOTSUPP) goto nolsm; - return ret; + error = ret; + goto out_dput; } nolsm: if (inode->i_op->getxattr) - error = inode->i_op->getxattr(dentry, name, value, size); + error = inode->i_op->getxattr(actual.dentry, name, value, size); else error = -EOPNOTSUPP; +out_dput: + path_put_maybe(&lower_cache); return error; } EXPORT_SYMBOL_GPL(vfs_getxattr); ssize_t -vfs_listxattr(struct dentry *d, char *list, size_t size) +vfs_listxattr(struct dentry *dentry, char *list, size_t size) { + struct inode *inode; + struct path lower_cache, actual; + struct path path = { .dentry = dentry }; ssize_t error; - error = security_inode_listxattr(d); - if (error) - return error; - error = -EOPNOTSUPP; - if (d->d_inode->i_op->listxattr) { - error = d->d_inode->i_op->listxattr(d, list, size); - } else { - error = security_inode_listsecurity(d->d_inode, list, size); - if (size && error > size) - error = -ERANGE; + inode = union_get_inode(&path, &lower_cache, &actual); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + error = security_inode_listxattr(actual.dentry); + if (!error) { + error = -EOPNOTSUPP; + if (inode->i_op->listxattr) { + error = inode->i_op->listxattr(actual.dentry, list, size); + } else { + error = security_inode_listsecurity(inode, list, size); + if (size && error > size) + error = -ERANGE; + } } + path_put_maybe(&lower_cache); return error; } EXPORT_SYMBOL_GPL(vfs_listxattr); @@ -287,31 +335,48 @@ int vfs_removexattr(struct path *path, const char *name) { - struct dentry *dentry = path->dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode; + struct path lower_cache, actual; int error; - if (!inode->i_op->removexattr) - return -EOPNOTSUPP; + inode = union_get_inode(path, &lower_cache, &actual); + if (IS_ERR(inode)) + return PTR_ERR(inode); - error = xattr_permission(inode, name, MAY_WRITE); +again: + error = -EOPNOTSUPP; + if (!inode->i_op->removexattr) + goto out; + + error = xattr_permission(path->dentry, inode, name, MAY_WRITE); if (error) - return error; + goto out; mutex_lock(&inode->i_mutex); - error = security_inode_removexattr(dentry, name); + error = security_inode_removexattr(actual.dentry, name); if (error) { mutex_unlock(&inode->i_mutex); - return error; + goto out; } - error = inode->i_op->removexattr(dentry, name); + if (d_is_unioned(path->dentry, &actual)) { + mutex_unlock(&inode->i_mutex); + error = union_copy_up(path, &actual); + if (error) + goto out; + inode = actual.dentry->d_inode; + goto again; + } + + error = inode->i_op->removexattr(actual.dentry, name); mutex_unlock(&inode->i_mutex); if (!error) { - fsnotify_xattr(dentry); - evm_inode_post_removexattr(dentry, name); + fsnotify_xattr(actual.dentry); + evm_inode_post_removexattr(actual.dentry, name); } +out: + path_put_maybe(&lower_cache); return error; } EXPORT_SYMBOL_GPL(vfs_removexattr); @@ -426,12 +491,16 @@ if (!f.file) return error; dentry = f.file->f_path.dentry; + error = -EACCES; + if (f.file->f_inode != dentry->d_inode) + goto error; /* Can't alter an open lower union file this way */ audit_inode(NULL, dentry, 0); error = mnt_want_write_file(f.file); if (!error) { error = setxattr(&f.file->f_path, name, value, size, flags); mnt_drop_write_file(f.file); } +error: fdput(f); return error; } @@ -526,13 +595,15 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size) { + struct dentry *dentry; struct fd f = fdget(fd); ssize_t error = -EBADF; if (!f.file) return error; - audit_inode(NULL, f.file->f_path.dentry, 0); - error = getxattr(f.file->f_path.dentry, name, value, size); + audit_file(NULL, f.file, 0); + dentry = d_dentry_or_lower(f.file->f_path.dentry); + error = getxattr(dentry, name, value, size); fdput(f); return error; } @@ -615,13 +686,15 @@ SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) { + struct dentry *dentry; struct fd f = fdget(fd); ssize_t error = -EBADF; if (!f.file) return error; - audit_inode(NULL, f.file->f_path.dentry, 0); - error = listxattr(f.file->f_path.dentry, list, size); + audit_file(NULL, f.file, 0); + dentry = d_dentry_or_lower(f.file->f_path.dentry); + error = listxattr(dentry, list, size); fdput(f); return error; } @@ -701,12 +774,16 @@ if (!f.file) return error; dentry = f.file->f_path.dentry; + error = -EACCES; + if (f.file->f_inode != dentry->d_inode) + goto error; /* Can't alter an open lower union file this way */ audit_inode(NULL, dentry, 0); error = mnt_want_write_file(f.file); if (!error) { error = removexattr(&f.file->f_path, name); mnt_drop_write_file(f.file); } +error: fdput(f); return error; }
diff --git a/include/linux/audit.h b/include/linux/audit.h index a406419..f863680 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h
@@ -108,8 +108,10 @@ #define AUDIT_INODE_PARENT 1 /* dentry represents the parent */ #define AUDIT_INODE_HIDDEN 2 /* audit record should be hidden */ -extern void __audit_inode(struct filename *name, const struct dentry *dentry, - unsigned int flags); +extern void __audit_dentry(struct filename *name, const struct dentry *dentry, + unsigned flags); +extern void __audit_file(struct filename *name, const struct file *file, + unsigned flags); extern void __audit_inode_child(const struct inode *parent, const struct dentry *dentry, const unsigned char type); @@ -153,23 +155,30 @@ if (unlikely(!audit_dummy_context())) __audit_getname(name); } + static inline void audit_inode(struct filename *name, - const struct dentry *dentry, - unsigned int parent) { - if (unlikely(!audit_dummy_context())) { - unsigned int flags = 0; - if (parent) - flags |= AUDIT_INODE_PARENT; - __audit_inode(name, dentry, flags); - } + const struct dentry *dentry, + bool parent) +{ + if (unlikely(!audit_dummy_context())) + __audit_dentry(name, dentry, parent ? AUDIT_INODE_PARENT : 0); } + static inline void audit_inode_parent_hidden(struct filename *name, const struct dentry *dentry) { if (unlikely(!audit_dummy_context())) - __audit_inode(name, dentry, - AUDIT_INODE_PARENT | AUDIT_INODE_HIDDEN); + __audit_dentry(name, dentry, + AUDIT_INODE_PARENT | AUDIT_INODE_HIDDEN); } + +static inline void audit_file(struct filename *name, const struct file *file, + bool parent) +{ + if (unlikely(!audit_dummy_context())) + __audit_file(name, file, parent ? AUDIT_INODE_PARENT : 0); +} + static inline void audit_inode_child(const struct inode *parent, const struct dentry *dentry, const unsigned char type) { @@ -325,21 +334,20 @@ { } static inline void audit_putname(struct filename *name) { } -static inline void __audit_inode(struct filename *name, - const struct dentry *dentry, - unsigned int flags) -{ } static inline void __audit_inode_child(const struct inode *parent, const struct dentry *dentry, const unsigned char type) { } static inline void audit_inode(struct filename *name, const struct dentry *dentry, - unsigned int parent) + bool parent) { } static inline void audit_inode_parent_hidden(struct filename *name, const struct dentry *dentry) { } +static inline void audit_file(struct filename *name, const struct file *file, + bool parent) +{ } static inline void audit_inode_child(const struct inode *parent, const struct dentry *dentry, const unsigned char type)
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 0f3ed77..c68b6aa 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h
@@ -14,6 +14,7 @@ struct nameidata; struct path; struct vfsmount; +struct union_stack; /* * linux/include/linux/dcache.h @@ -92,16 +93,36 @@ * Try to keep struct dentry aligned on 64 byte cachelines (this will * give reasonable cacheline footprint with larger lines without the * large memory footprint increase). + * + * XXX DNAME_INLINE_LEN_MIN is kind of pitiful on 64bit + union + * mounts. May be worth tuning up, but either we go to 256 bytes and + * a wasteful 88 bytes of d_iname, or we lose 64-byte aligment. */ #ifdef CONFIG_64BIT + +#ifdef CONFIG_UNION_MOUNT +# define DNAME_INLINE_LEN 24 /* 192 bytes */ +#else # define DNAME_INLINE_LEN 32 /* 192 bytes */ +#endif /* CONFIG_UNION_MOUNT */ + +#else + +#ifdef CONFIG_UNION_MOUNT +# ifdef CONFIG_SMP +# define DNAME_INLINE_LEN 32 /* 128 bytes */ +# else +# define DNAME_INLINE_LEN 36 /* 128 bytes */ +# endif #else # ifdef CONFIG_SMP # define DNAME_INLINE_LEN 36 /* 128 bytes */ # else # define DNAME_INLINE_LEN 40 /* 128 bytes */ # endif -#endif +#endif /* CONFIG_UNION_MOUNT */ + +#endif /* CONFIG_64BIT */ #define d_lock d_lockref.lock @@ -123,6 +144,12 @@ unsigned long d_time; /* used by d_revalidate */ void *d_fsdata; /* fs-specific data */ +#ifdef CONFIG_UNION_MOUNT + union { + struct union_stack *d_union_stack; /* Dirs in union stack */ + struct dentry *d_fallthru; /* Lower dentry pinned by fallthru */ + }; +#endif struct list_head d_lru; /* LRU list */ /* * d_child and d_rcu can share memory @@ -495,6 +522,71 @@ #endif } +static inline bool d_has_lower(const struct dentry *dentry) +{ + return unlikely(!dentry->d_inode && + (d_is_fallthru(dentry) || + dentry->d_flags & DCACHE_UNION_PINNING_LOWER)); +} + +static inline void d_set_union_stack(struct dentry *dentry, struct union_stack *d) +{ +#ifdef CONFIG_UNION_MOUNT + BUG_ON(dentry->d_union_stack != NULL); + dentry->d_union_stack = d; +#endif +} + +static inline void d_pin_lower(struct dentry *dentry, struct dentry *lower) +{ +#ifdef CONFIG_UNION_MOUNT + BUG_ON(dentry->d_fallthru != NULL); + dentry->d_fallthru = lower; + smp_wmb(); + dentry->d_flags |= DCACHE_UNION_PINNING_LOWER; +#endif +} + +static inline bool d_is_pinning_lower(const struct dentry *dentry) +{ +#ifdef CONFIG_UNION_MOUNT + if (unlikely(dentry->d_flags & DCACHE_UNION_PINNING_LOWER)) { + smp_rmb(); /* d_fallthru must be read only after this flag is + * checked. */ + return true; + } +#endif + return false; +} + +static inline struct dentry *d_get_fallthru(struct dentry *dentry) +{ +#ifdef CONFIG_UNION_MOUNT + return dentry->d_fallthru; +#else + return NULL; +#endif +} + +static inline struct dentry *d_dentry_or_lower(struct dentry *dentry) +{ +#ifdef CONFIG_UNION_MOUNT + return dentry->d_inode ? dentry : dentry->d_fallthru; +#else + return dentry; +#endif +} + +static inline struct inode *d_inode_or_lower(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; +#ifdef CONFIG_UNION_MOUNT + if (!inode && d_is_pinning_lower(dentry)) + inode = dentry->d_fallthru->d_inode; +#endif + return inode; +} + extern int sysctl_vfs_cache_pressure; static inline unsigned long vfs_pressure_ratio(unsigned long val)
diff --git a/include/linux/fs.h b/include/linux/fs.h index 3ee9f62..1b48483 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h
@@ -1325,6 +1325,16 @@ /* AIO completions deferred from interrupt context */ struct workqueue_struct *s_dio_done_wq; + /* Root of the private cloned vfsmount tree of the read-only + * mounts in this union (set in topmost vfsmount only) + */ + struct vfsmount *s_union_lower_mnts; + + /* Number of layers in this union, not counting the topmost or + * submounts. + */ + unsigned int s_union_count; + /* * Keep the lru lists last in the structure so they always sit on their * own individual cachelines. @@ -2657,6 +2667,21 @@ extern int generic_check_addressable(unsigned, u64); +#ifdef CONFIG_UNION_MOUNT +extern int generic_readdir_fallthru(struct dentry *topmost_dentry, const char *name, + int namlen, ino_t *ino, unsigned char *d_type); +#else +static inline int generic_readdir_fallthru(struct dentry *topmost_dentry, const char *name, + int namlen, ino_t *ino, unsigned char *d_type) +{ + /* + * Found a fallthru on a kernel without union support. + * There's nothing to fall through to, so return -ENOENT. + */ + return -ENOENT; +} +#endif + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *,
diff --git a/include/linux/mount.h b/include/linux/mount.h index 562f453..52f443f 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h
@@ -83,4 +83,6 @@ extern dev_t name_to_dev_t(char *name); +extern void put_union_sb(struct super_block *sb); + #endif /* _LINUX_MOUNT_H */
diff --git a/include/linux/security.h b/include/linux/security.h index 5623a7f..83034ad 100644 --- a/include/linux/security.h +++ b/include/linux/security.h
@@ -2951,7 +2951,8 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); int security_path_rename(struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, struct dentry *new_dentry); + struct path *new_dir, struct dentry *new_dentry, + struct inode *old_inode); int security_path_chmod(struct path *path, umode_t mode); int security_path_chown(struct path *path, kuid_t uid, kgid_t gid); int security_path_chroot(struct path *path); @@ -2999,7 +3000,8 @@ static inline int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, - struct dentry *new_dentry) + struct dentry *new_dentry, + struct inode *old_inode) { return 0; }
diff --git a/kernel/audit.c b/kernel/audit.c index 906ae5a0..e2d004b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c
@@ -1753,6 +1753,7 @@ { struct audit_buffer *ab; struct audit_names *name; + struct inode *inode; name = kzalloc(sizeof(*name), GFP_NOFS); if (!name) @@ -1770,7 +1771,12 @@ /* Generate AUDIT_PATH record with object. */ name->type = AUDIT_TYPE_NORMAL; - audit_copy_inode(name, link->dentry, link->dentry->d_inode); + inode = link->dentry->d_inode; +#ifdef CONFIG_UNION_MOUNT + if (!inode) + inode = link->dentry->d_fallthru->d_inode; +#endif + audit_copy_inode(name, link->dentry, inode); audit_log_name(current->audit_context, name, link, 0, NULL); out: kfree(name);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 90594c9..7cab86d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c
@@ -1757,11 +1757,12 @@ * @dentry: dentry being audited * @flags: attributes for this particular entry */ -void __audit_inode(struct filename *name, const struct dentry *dentry, - unsigned int flags) +static void __audit_inode(struct filename *name, + const struct dentry *dentry, + const struct inode *inode, + unsigned int flags) { struct audit_context *context = current->audit_context; - const struct inode *inode = dentry->d_inode; struct audit_names *n; bool parent = flags & AUDIT_INODE_PARENT; @@ -1829,6 +1830,32 @@ } /** + * __audit_dentry - store the inode and device from a lookup + * @name: name being audited (optional) + * @dentry: dentry being audited + * @flags: attributes for this particular entry + */ +void __audit_dentry(struct filename *name, const struct dentry *dentry, + unsigned flags) +{ + if (unlikely(!audit_dummy_context())) + __audit_inode(name, dentry, dentry->d_inode, flags); +} + +/** + * __audit_file - store the inode and device from an open file + * @name: name being audited (optional) + * @file: dentry being audited + * @flags: attributes for this particular entry + */ +void __audit_file(struct filename *name, const struct file *file, + unsigned flags) +{ + if (unlikely(!audit_dummy_context())) + __audit_inode(name, file->f_path.dentry, file->f_inode, flags); +} + +/** * __audit_inode_child - collect inode info for created/removed objects * @parent: inode of dentry parent * @dentry: dentry being audited
diff --git a/mm/shmem.c b/mm/shmem.c index 2ece54b..d1acfbd 100644 --- a/mm/shmem.c +++ b/mm/shmem.c
@@ -1976,6 +1976,7 @@ ret = PTR_ERR(dentry); goto error_free; } + d_set_type(whiteout, DCACHE_WHITEOUT_TYPE); if (old_dentry->d_inode || d_is_fallthru(old_dentry)) { /* A fallthru for a dir is treated like a regular link */ @@ -2017,7 +2018,10 @@ dir->i_size += BOGO_DIRENT_SIZE; dget(dentry); /* Extra count - pin the dentry in core */ } - /* Will clear DCACHE_WHITEOUT and DCACHE_FALLTHRU flags */ + + /* Attach the inode to the dentry - this will set the DCACHE_ENTRY_TYPE + * field and clear DCACHE_FALLTHRU flags. + */ d_instantiate(dentry, inode); } @@ -2052,11 +2056,8 @@ } shmem_d_instantiate(dir, dentry, NULL); + d_set_fallthru(dentry); dir->i_ctime = dir->i_mtime = CURRENT_TIME; - - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_FALLTHRU; - spin_unlock(&dentry->d_lock); return 0; } @@ -2155,9 +2156,11 @@ * but each new link needs a new dentry, pinning lowmem, and * tmpfs dentries cannot be pruned until they are unlinked. */ - ret = shmem_reserve_inode(inode->i_sb); - if (ret) - goto out; + if (inode->i_nlink > 0) { + ret = shmem_reserve_inode(inode->i_sb); + if (ret) + goto out; + } inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; inc_nlink(inode); @@ -2197,7 +2200,7 @@ spin_lock(&dentry->d_lock); list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { - spin_lock(&child->d_lock); + spin_lock_nested(&child->d_lock, 1); if (d_is_whiteout(child)) { __d_drop(child); if (!list_empty(&child->d_lru)) {
diff --git a/security/commoncap.c b/security/commoncap.c index b9d613e..84dc2e5 100644 --- a/security/commoncap.c +++ b/security/commoncap.c
@@ -381,6 +381,11 @@ int size; struct vfs_cap_data caps; +#ifdef CONFIG_UNION_MOUNT + if (!inode) + inode = dentry->d_fallthru->d_inode; +#endif + memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data)); if (!inode || !inode->i_op->getxattr)
diff --git a/security/security.c b/security/security.c index 15b6928..5f09731 100644 --- a/security/security.c +++ b/security/security.c
@@ -433,10 +433,13 @@ } int security_path_rename(struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, struct dentry *new_dentry) + struct path *new_dir, struct dentry *new_dentry, + struct inode *old_inode) { - if (unlikely(IS_PRIVATE(old_dentry->d_inode) || - (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) + struct inode *new_inode = d_inode_or_lower(new_dentry); + + if (unlikely(IS_PRIVATE(old_inode) || + (new_inode && IS_PRIVATE(new_inode)))) return 0; return security_ops->path_rename(old_dir, old_dentry, new_dir, new_dentry); @@ -526,8 +529,9 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - if (unlikely(IS_PRIVATE(old_dentry->d_inode) || - (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) + if (unlikely(IS_PRIVATE(d_inode_or_lower(old_dentry)) || + (d_inode_or_lower(new_dentry) && + IS_PRIVATE(d_inode_or_lower(new_dentry))))) return 0; return security_ops->inode_rename(old_dir, old_dentry, new_dir, new_dentry); @@ -535,7 +539,7 @@ int security_inode_readlink(struct dentry *dentry) { - if (unlikely(IS_PRIVATE(dentry->d_inode))) + if (unlikely(IS_PRIVATE(d_inode_or_lower(dentry)))) return 0; return security_ops->inode_readlink(dentry); }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 57b0b49..fd77820 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c
@@ -1601,6 +1601,11 @@ struct inode *inode = dentry->d_inode; struct common_audit_data ad; +#ifdef CONFIG_UNION_MOUNT + if (unlikely(!inode) && dentry->d_fallthru) + inode = dentry->d_fallthru->d_inode; +#endif + ad.type = LSM_AUDIT_DATA_DENTRY; ad.u.dentry = dentry; return inode_has_perm(cred, inode, av, &ad); @@ -1616,6 +1621,11 @@ struct inode *inode = path->dentry->d_inode; struct common_audit_data ad; +#ifdef CONFIG_UNION_MOUNT + if (unlikely(!inode) && path->dentry->d_fallthru) + inode = path->dentry->d_fallthru->d_inode; +#endif + ad.type = LSM_AUDIT_DATA_PATH; ad.u.path = *path; return inode_has_perm(cred, inode, av, &ad);