Development
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 622f469..6c55e3c 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -200,7 +200,7 @@
 	fsdef->dentry = cachedir;
 	fsdef->fscache.cookie = NULL;
 
-	ret = cachefiles_check_object_type(fsdef);
+	ret = cachefiles_check_object_type(cache, fsdef);
 	if (ret < 0)
 		goto error_unsupported;
 
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index e66d447..8af7a6e 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -232,7 +232,7 @@
 	auxdata->type = cookie->def->type;
 
 	cachefiles_begin_secure(cache, &saved_cred);
-	cachefiles_update_object_xattr(object, auxdata);
+	cachefiles_update_object_xattr(cache, object, auxdata);
 	cachefiles_end_secure(cache, saved_cred);
 	kfree(auxdata);
 	_leave("");
@@ -471,9 +471,9 @@
 		path.mnt = cache->mnt;
 
 		cachefiles_begin_secure(cache, &saved_cred);
-		ret = vfs_truncate(NULL, &path, 0);
+		ret = vfs_truncate(&path, 0);
 		if (ret == 0)
-			ret = vfs_truncate(NULL, &path, ni_size);
+			ret = vfs_truncate(&path, ni_size);
 		cachefiles_end_secure(cache, saved_cred);
 
 		if (ret != 0) {
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 4938251..f6d2cde 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -230,12 +230,16 @@
 /*
  * xattr.c
  */
-extern int cachefiles_check_object_type(struct cachefiles_object *object);
-extern int cachefiles_set_object_xattr(struct cachefiles_object *object,
+extern int cachefiles_check_object_type(struct cachefiles_cache *cache,
+					struct cachefiles_object *object);
+extern int cachefiles_set_object_xattr(struct cachefiles_cache *cache,
+				       struct cachefiles_object *object,
 				       struct cachefiles_xattr *auxdata);
-extern int cachefiles_update_object_xattr(struct cachefiles_object *object,
+extern int cachefiles_update_object_xattr(struct cachefiles_cache *cache,
+					  struct cachefiles_object *object,
 					  struct cachefiles_xattr *auxdata);
-extern int cachefiles_check_object_xattr(struct cachefiles_object *object,
+extern int cachefiles_check_object_xattr(struct cachefiles_cache *cache,
+					 struct cachefiles_object *object,
 					 struct cachefiles_xattr *auxdata);
 extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
 					  struct dentry *dentry);
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 6a76c37..75816cf 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -601,7 +601,7 @@
 	if (!object->new) {
 		_debug("validate '%pq'", &next->d_name);
 
-		ret = cachefiles_check_object_xattr(object, auxdata);
+		ret = cachefiles_check_object_xattr(cache, object, auxdata);
 		if (ret == -ESTALE) {
 			/* delete the object (the deleter drops the directory
 			 * mutex) */
@@ -633,7 +633,7 @@
 
 	if (object->new) {
 		/* attach data to a newly constructed terminal object */
-		ret = cachefiles_set_object_xattr(object, auxdata);
+		ret = cachefiles_set_object_xattr(cache, object, auxdata);
 		if (ret < 0)
 			goto check_error;
 	} else {
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 18587d8..daca6ba 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -26,9 +26,11 @@
  * check the type label on an object
  * - done using xattrs
  */
-int cachefiles_check_object_type(struct cachefiles_object *object)
+int cachefiles_check_object_type(struct cachefiles_cache *cache,
+				 struct cachefiles_object *object)
 {
 	struct dentry *dentry = object->dentry;
+	struct path path = { .mnt = cache->mnt, .dentry = dentry };
 	char type[3], xtype[3];
 	int ret;
 
@@ -43,7 +45,7 @@
 	_enter("%p{%s}", object, type);
 
 	/* attempt to install a type label directly */
-	ret = vfs_setxattr(dentry, cachefiles_xattr_cache, type, 2,
+	ret = vfs_setxattr(&path, cachefiles_xattr_cache, type, 2,
 			   XATTR_CREATE);
 	if (ret == 0) {
 		_debug("SET"); /* we succeeded */
@@ -97,21 +99,22 @@
 /*
  * set the state xattr on a cache file
  */
-int cachefiles_set_object_xattr(struct cachefiles_object *object,
+int cachefiles_set_object_xattr(struct cachefiles_cache *cache,
+				struct cachefiles_object *object,
 				struct cachefiles_xattr *auxdata)
 {
-	struct dentry *dentry = object->dentry;
+	struct path path = { .mnt = cache->mnt, .dentry = object->dentry };
 	int ret;
 
 	ASSERT(object->fscache.cookie);
-	ASSERT(dentry);
+	ASSERT(path.dentry);
 
 	_enter("%p,#%d", object, auxdata->len);
 
 	/* attempt to install the cache metadata directly */
 	_debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len);
 
-	ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
+	ret = vfs_setxattr(&path, cachefiles_xattr_cache,
 			   &auxdata->type, auxdata->len,
 			   XATTR_CREATE);
 	if (ret < 0 && ret != -ENOMEM)
@@ -126,21 +129,22 @@
 /*
  * update the state xattr on a cache file
  */
-int cachefiles_update_object_xattr(struct cachefiles_object *object,
+int cachefiles_update_object_xattr(struct cachefiles_cache *cache,
+				   struct cachefiles_object *object,
 				   struct cachefiles_xattr *auxdata)
 {
-	struct dentry *dentry = object->dentry;
+	struct path path = { .mnt = cache->mnt, .dentry = object->dentry };
 	int ret;
 
 	ASSERT(object->fscache.cookie);
-	ASSERT(dentry);
+	ASSERT(path.dentry);
 
 	_enter("%p,#%d", object, auxdata->len);
 
 	/* attempt to install the cache metadata directly */
 	_debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len);
 
-	ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
+	ret = vfs_setxattr(&path, cachefiles_xattr_cache,
 			   &auxdata->type, auxdata->len,
 			   XATTR_REPLACE);
 	if (ret < 0 && ret != -ENOMEM)
@@ -156,11 +160,13 @@
  * check the state xattr on a cache file
  * - return -ESTALE if the object should be deleted
  */
-int cachefiles_check_object_xattr(struct cachefiles_object *object,
+int cachefiles_check_object_xattr(struct cachefiles_cache *cache,
+				  struct cachefiles_object *object,
 				  struct cachefiles_xattr *auxdata)
 {
 	struct cachefiles_xattr *auxbuf;
 	struct dentry *dentry = object->dentry;
+	struct path path = { .mnt = cache->mnt, .dentry = dentry };
 	int ret;
 
 	_enter("%p,#%d", object, auxdata->len);
@@ -231,7 +237,7 @@
 		}
 
 		/* update the current label */
-		ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
+		ret = vfs_setxattr(&path, cachefiles_xattr_cache,
 				   &auxdata->type, auxdata->len,
 				   XATTR_REPLACE);
 		if (ret < 0) {
@@ -268,9 +274,10 @@
 int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
 				   struct dentry *dentry)
 {
+	struct path path = { .mnt = cache->mnt, .dentry = dentry };
 	int ret;
 
-	ret = vfs_removexattr(dentry, cachefiles_xattr_cache);
+	ret = vfs_removexattr(&path, cachefiles_xattr_cache);
 	if (ret < 0) {
 		if (ret == -ENOENT || ret == -ENODATA)
 			ret = 0;
diff --git a/fs/dcache.c b/fs/dcache.c
index b97b4a6..0b22774 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1377,6 +1377,8 @@
 	spin_lock(&dentry->d_lock);
 	if (inode) {
 		dentry->d_flags &= ~(DCACHE_WHITEOUT | DCACHE_FALLTHRU);
+		if (inode->i_op->follow_link)
+			dentry->d_flags |= DCACHE_SYMLINK;
 		if (unlikely(IS_AUTOMOUNT(inode)))
 			dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
 		hlist_add_head(&dentry->d_alias, &inode->i_dentry);
@@ -1585,6 +1587,8 @@
 	/* attach a disconnected dentry */
 	spin_lock(&tmp->d_lock);
 	tmp->d_inode = inode;
+	if (inode->i_op->follow_link)
+		tmp->d_flags |= DCACHE_SYMLINK;
 	tmp->d_flags |= DCACHE_DISCONNECTED;
 	hlist_add_head(&tmp->d_alias, &inode->i_dentry);
 	hlist_bl_lock(&tmp->d_sb->s_anon);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5eab400..1472d8c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1042,17 +1042,17 @@
 		  size_t size, int flags)
 {
 	int rc = 0;
-	struct dentry *lower_dentry;
+	struct path *lower_path;
 
-	lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	if (!lower_dentry->d_inode->i_op->setxattr) {
+	lower_path = ecryptfs_dentry_to_lower_path(dentry);
+	if (!lower_path->dentry->d_inode->i_op->setxattr) {
 		rc = -EOPNOTSUPP;
 		goto out;
 	}
 
-	rc = vfs_setxattr(lower_dentry, name, value, size, flags);
+	rc = vfs_setxattr(lower_path, name, value, size, flags);
 	if (!rc)
-		fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode);
+		fsstack_copy_attr_all(dentry->d_inode, lower_path->dentry->d_inode);
 out:
 	return rc;
 }
diff --git a/fs/inode.c b/fs/inode.c
index 90c0aa4..04b4324 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1556,6 +1556,11 @@
 	struct inode *inode = path->dentry->d_inode;
 	struct timespec now;
 
+#ifdef CONFIG_UNION_MOUNT
+	if (unlikely(!inode))
+		return;
+#endif
+
 	if (inode->i_flags & S_NOATIME)
 		return;
 	if (IS_NOATIME(inode))
diff --git a/fs/internal.h b/fs/internal.h
index 37de391..526cf49 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -49,6 +49,7 @@
 				  struct path *, struct path *);
 extern int user_path_and_parent(int, const char __user *, unsigned,
 				struct path *, struct path *);
+extern struct dentry *__lookup_hash(struct qstr *, struct dentry *, unsigned);
 
 /*
  * namespace.c
diff --git a/fs/libfs.c b/fs/libfs.c
index 1398da8..453ca27 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -21,7 +21,7 @@
 static inline int simple_positive(struct dentry *dentry)
 {
 	return (dentry->d_inode && !d_unhashed(dentry)) ||
-		(dentry->d_flags & DCACHE_FALLTHRU);
+		d_is_fallthru(dentry);
 }
 
 int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
diff --git a/fs/namei.c b/fs/namei.c
index e2dba6f..a10d8e2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -694,9 +694,18 @@
 
 static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
 {
-	struct inode *inode = link->dentry->d_inode;
+	struct dentry *dentry = link->dentry;
+	struct inode *inode;
+
+	inode = dentry->d_inode;
+#ifdef CONFIG_UNION_MOUNT
+	if (!inode) {
+		dentry = dentry->d_fallthru;
+		inode = dentry->d_inode;
+	}
+#endif
 	if (inode->i_op->put_link)
-		inode->i_op->put_link(link->dentry, nd, cookie);
+		inode->i_op->put_link(dentry, nd, cookie);
 	path_put(link);
 }
 
@@ -729,6 +738,10 @@
 
 	/* Allowed if owner and follower match. */
 	inode = link->dentry->d_inode;
+#ifdef CONFIG_UNION_MOUNT
+	if (!inode)
+		inode = link->dentry->d_fallthru->d_inode;
+#endif
 	if (uid_eq(current_cred()->fsuid, inode->i_uid))
 		return 0;
 
@@ -796,21 +809,19 @@
  */
 static int may_linkat(struct path *link)
 {
-	const struct cred *cred;
-	struct inode *inode;
+	struct inode *inode = link->dentry->d_inode;
 
 	if (!sysctl_protected_hardlinks)
 		return 0;
 
-	cred = current_cred();
-	inode = link->dentry->d_inode;
-
 	/* Source inode owner (or CAP_FOWNER) can hardlink all they like,
 	 * otherwise, it must be a safe source.
 	 */
-	if (uid_eq(cred->fsuid, inode->i_uid) || safe_hardlink_source(inode) ||
-	    capable(CAP_FOWNER))
+	if (uid_eq(current_cred()->fsuid, inode->i_uid) ||
+	    safe_hardlink_source(inode) ||
+	    capable(CAP_FOWNER)) {
 		return 0;
+	}
 
 	audit_log_link_denied("linkat", link);
 	return -EPERM;
@@ -838,7 +849,12 @@
 	touch_atime(link);
 	nd_set_link(nd, NULL);
 
-	error = security_inode_follow_link(link->dentry, nd);
+#ifdef CONFIG_UNION_MOUNT
+	if (unlikely(!dentry->d_inode))
+		dentry = dentry->d_fallthru;
+#endif
+
+	error = security_inode_follow_link(dentry, nd);
 	if (error)
 		goto out_put_nd_path;
 
@@ -1254,8 +1270,204 @@
 	nd->inode = nd->path.dentry->d_inode;
 }
 
-static struct dentry *__lookup_hash(struct qstr *name, struct dentry *base,
-				    unsigned flags);
+/*
+ * Build the lower-layer directory stack for a unioned dir.  Returns:
+ *
+ * (*) 0 if encountered a dir first - the union stack will be filled in.
+ *
+ * (*) 1 if encountered a symlink first - d->u_dirs[0].dentry will be set to it.
+ *
+ * (*) -ENOTDIR if we encountered any other type of file first.
+ *
+ * (*) -ENOENT if we didn't encounter anything.
+ *
+ * The caller must hold i_mutex on the parent dir.
+ */
+static int union_build_dirs(struct path *parent, struct path *path,
+			    struct union_stack *d)
+{
+	struct dentry *lower = NULL;
+	unsigned i, layers = parent->dentry->d_sb->s_union_count;
+	int ret = -ENOENT;
+
+	printk("UNION: -->union_build_dirs(%pq/%pq)\n",
+	       &parent->dentry->d_name, &path->dentry->d_name);
+
+	for (i = 0; i < layers; i++) {
+		/* Get the parent directory for this layer and lookup
+		 * the target in it.
+		 */
+		struct path *lower_parent = union_find_dir(parent->dentry, i);
+		if (!lower_parent->mnt)
+			continue;
+
+		mutex_lock(&lower_parent->dentry->d_inode->i_mutex);
+		lower = __lookup_hash(&path->dentry->d_name, lower_parent->dentry, 0);
+		mutex_unlock(&lower_parent->dentry->d_inode->i_mutex);
+
+		if (IS_ERR(lower)) {
+			ret = PTR_ERR(lower);
+			lower = NULL;
+			break;
+		}
+
+		/* A negative dentry can mean several things: a plain negative
+		 * dentry is ignored and lookup continues to the next layer,
+		 * but a whiteout or a non-fallthru in an opaque dir covers
+		 * everything below it.
+		 */
+		if (!lower->d_inode) {
+			if (d_is_whiteout(lower))
+				break;
+			if (IS_OPAQUE(lower_parent->dentry->d_inode) &&
+			    !d_is_fallthru(lower))
+				break;
+			dput(lower);
+			lower = NULL;
+			continue;
+		}
+
+		/* Files block everything below them.  Special case: If we find
+		 * a file below a directory (which makes no sense), just ignore
+		 * the file and return the directory above it.
+		 */
+		if (!S_ISDIR(lower->d_inode->i_mode)) {
+			if (ret != -ENOENT)
+				break;
+			if (S_ISLNK(lower->d_inode->i_mode)) {
+				d->u_dirs[0].dentry = lower;
+				lower = NULL;
+				ret = 1;
+				break;
+			}
+			ret = -ENOTDIR;
+			break;
+		}
+
+		printk("UNION: layer %u is dir\n", i);
+
+		/* Mountpoints and automount points on a lowerfs just confuse
+		 * everything, so refuse to handle them for the moment.
+		 */
+		if (unlikely(d_mountpoint(lower))) {
+			if (ret == -ENOENT)
+				ret = -EXDEV;
+			break;
+		}
+		if (unlikely(d_managed(lower))) {
+			if (ret == -ENOENT)
+				ret = -EREMOTE;
+			break;
+		}
+
+		d->u_dirs[i].dentry = lower;
+		d->u_dirs[i].mnt = mntget(lower_parent->mnt);
+		lower = NULL;
+		ret = 0;
+	}
+
+	dput(lower);
+	return ret;
+}
+
+/**
+ * union_lookup_parent - Raise directory to upper level
+ * @nd: nameidata for the parent of @path
+ * @path: Path of the target on the upper file system
+ */
+static int union_lookup_parent(struct nameidata *nd, struct path *path)
+{
+	struct union_stack *d;
+	struct dentry *dentry = path->dentry, *lower;
+	struct inode *dir = nd->path.dentry->d_inode;
+	unsigned i, layers = nd->path.dentry->d_sb->s_union_count;
+	int ret;
+
+	printk("UNION: -->union_lookup_parent(%pq/%pq)\n",
+	       &nd->path.dentry->d_name, &path->dentry->d_name);
+
+	if (dentry->d_flags & DCACHE_UNION_LOOKUP_DONE) {
+		printk("UNION: <--union_lookup_parent() = 0 [already 0]\n");
+		return 0;
+	}
+
+	BUG_ON(nd->flags & LOOKUP_RCU); /* lookup_union_rcu() should prevent this */
+	BUG_ON(dentry->d_flags & DCACHE_UNION_PINNING_LOWER);
+
+	d = union_alloc(path);
+	if (!d)
+		return -ENOMEM;
+
+	mutex_lock(&dir->i_mutex);
+	if (dentry->d_flags & DCACHE_UNION_LOOKUP_DONE) {
+		printk("UNION: <--union_lookup_parent() = 0 [already 1]\n");
+		ret = 0;
+		goto out_unlock_mutex;
+	}
+	ret = union_build_dirs(&nd->path, path, d);
+	mutex_unlock(&dir->i_mutex);
+
+	if (ret == 1 || ret == -ENOTDIR) {
+		spin_lock(&dentry->d_lock);
+		if (ret == 1) {
+			/* Dentries covering lower symlinks are pointed at the
+			 * underlying symlink and marked.
+			 */
+			dentry->d_flags |= DCACHE_SYMLINK;
+			d_pin_lower(dentry, d->u_dirs[0].dentry);
+			d->u_dirs[0].dentry = NULL;
+			printk("UNION: <--union_lookup_parent() = 0 [symlink]\n");
+		} else {
+			printk("UNION: <--union_lookup_parent() = 0 [file]\n");
+		}
+		dentry->d_flags |= DCACHE_UNION_LOOKUP_DONE;
+		spin_unlock(&dentry->d_lock);
+		ret = 0;
+		goto out;
+	}
+	if (ret < 0)
+		goto out;
+
+	/* It's a directory, so it must be raised to the upper level.  However,
+	 * we have to take the locks in the right order.
+	 */
+	printk("UNION: May need to raise dir\n");
+	ret = mnt_want_write(nd->path.mnt);
+	if (ret < 0)
+		goto out;
+
+	mutex_lock(&dir->i_mutex);
+
+	ret = 0;
+	if (!(dentry->d_flags & DCACHE_UNION_LOOKUP_DONE)) {
+		printk("UNION: Need to raise dir\n");
+		for (i = 0; i < layers; i++)
+			if ((lower = d->u_dirs[i].dentry))
+				break;
+		ret = union_create_topmost_dir(&nd->path, path, lower);
+		if (ret == 0) {
+			spin_lock(&dentry->d_lock);
+			dentry->d_union_stack = d;
+			dentry->d_flags |= DCACHE_UNION_LOOKUP_DONE;
+			spin_unlock(&dentry->d_lock);
+			d = NULL;
+		}
+	} else {
+		printk("UNION: <--union_lookup_parent() = 0 [already 2]\n");
+	}
+
+	mnt_drop_write(nd->path.mnt);
+out_unlock_mutex:
+	mutex_unlock(&dir->i_mutex);
+out:
+	if (d) {
+		for (i = 0; i < layers; i++)
+			path_put(&d->u_dirs[i]);
+		kfree(d);
+	}
+	printk("UNION: <--union_lookup_parent() = %d\n", ret);
+	return ret;
+}
 
 /**
  * __lookup_union - Lookup and build union stack
@@ -1276,7 +1488,7 @@
 	unsigned i, layers = parent.dentry->d_sb->s_union_count;
 	int err;
 
-	printk("UNION: __lookup %s\n", name->name);
+	printk("UNION: __lookup_union %pq\n", name);
 
 	if (!topmost->dentry->d_inode) {
 		if (d_is_whiteout(topmost->dentry))
@@ -1359,14 +1571,18 @@
 			goto out_err;
 
 		/* Now we know the target is a directory.  Create a matching
-		 * topmost directory if one doesn't already exist, and add this
+		 * topmost directory if one doesn't already exist and add this
 		 * layer's directory to the union stack for the topmost
 		 * directory.
 		 */
 		if (!topmost->dentry->d_inode) {
 			printk("UNION: create top\n");
-			err = union_create_topmost_dir(&parent, name, topmost,
-						       &lower);
+			err = mnt_want_write(parent.mnt);
+			if (err)
+				goto out_err;
+
+			err = union_create_topmost_dir(&parent, topmost, lower.dentry);
+			mnt_drop_write(parent.mnt);
 			if (err)
 				goto out_err;
 		}
@@ -1378,11 +1594,17 @@
 	return 0;
 
 out_found_lower_file:
-	printk("UNION: __lookup found lower file\n");
-	dput(topmost->dentry);
-	*topmost = lower;
-	return 0;
-
+	if (S_ISLNK(lower.dentry->d_inode->i_mode)) {
+		if (nd->flags & (LOOKUP_PARENT | LOOKUP_FOLLOW)) {
+			printk("UNION: __lookup follow symlink\n");
+			dput(topmost->dentry);
+			*topmost = lower;
+			return 0;
+		}
+		printk("UNION: __lookup found lower symlink\n");
+	} else {
+		printk("UNION: __lookup found lower file\n");
+	}
 out_lookup_done:
 	path_put(&lower);
 	return 0;
@@ -1435,6 +1657,7 @@
 	return err;
 }
 
+#if 0 // TODO: REMOVE CONTENTS
 /*
  * lookup_union - union mount-aware part of do_lookup()
  *
@@ -1447,7 +1670,7 @@
 	struct inode *dir = parent->d_inode;
 	int err;
 
-	printk("UNION: lookup %s\n", name->name);
+	printk("UNION: lookup \"%pq\"\n", name);
 
 	mutex_lock(&dir->i_mutex);
 	err = lookup_union_locked(nd, name, topmost);
@@ -1457,24 +1680,17 @@
 
 	return follow_managed(topmost, nd->flags);
 }
+#endif
 
 /*
  * lookup_union_rcu - Handle union mounted dentries in RCU-walk mode
  * @nd: The current pathwalk state (refers to @parent currently)
  * @parent: The parent directory (holds the union stack)
  * @path: The point just looked up in @parent
- * @parent_seq: The d_seq of @parent at the point of lookup
  * @inode: The inode at @dentry (*@inode is NULL if negative dentry)
  *
  * Handle a dentry that represents a non-directory file or a hole/reference in
- * a union mount upperfs.  This involves transiting to the lower file, provided
- * we aren't going to open the lower file for writing - otherwise we have to
- * copy the file up (which we can't do in rcuwalk mode).
- *
- * Directories are handled differently: they're unconditionally and completely
- * mirrored from the lowerfs to the upperfs as soon as we encounter them in a
- * lookup.  However, since we don't create dentries in rcuwalk mode, this will
- * be handled automatically by refwalk mode.
+ * a union mount upperfs
  *
  * We return true if we don't need to do anything or if we've successfully
  * updated the path.  If we need to drop out of RCU-walk and go to refwalk
@@ -1483,18 +1699,16 @@
 static bool lookup_union_rcu(struct nameidata *nd,
 			     struct dentry *parent,
 			     struct path *path,
-			     unsigned parent_seq,
 			     struct inode **inode)
 {
 	struct dentry *dentry = path->dentry;
-	struct inode *parent_inode = nd->inode;
-	unsigned layer, layers;
+	struct inode *dir = nd->inode;
 
 	/* Handle non-unionmount dentries first.  The union stack will have
 	 * been built during the initial lookup of the parent dir, so if it's
 	 * not there, it's not unioned.
 	 */
-	if (!IS_DIR_UNIONED(parent))
+	if (likely(!IS_DIR_UNIONED(parent)))
 		return true;
 
 	printk("UNION: Dir is unioned (RCU)\n");
@@ -1507,88 +1721,22 @@
 
 	/* If this dentry is a blocker, then stop here. */
 	if (d_is_whiteout(dentry) ||
-	    (IS_OPAQUE(parent_inode) && !d_is_fallthru(dentry)))
+	    (!d_is_fallthru(dentry) && IS_OPAQUE(dir)))
 		return true;
 
-	/* The dentry is a fallthru in an opaque unioned directory.
+	/* If we need to look below, then we should break out of RCU walk mode
+	 * with immediate effect.  There are three cases:
 	 *
-	 * If the caller demands that the terminal dentry be instantiated in
-	 * the top layer of the union (copied up) immediately, that will
-	 * require a mutex.
+	 * (1) We've encountered a lower directory.  This must be copied up.
+	 *
+	 * (2) We've encountered a symlink.  Symlinks are walked in refwalk
+	 *     mode (or (3) applies if NOFOLLOW).
+	 *
+	 * (3) We've encountered some other type of file.  This must terminate
+	 *     the pathwalk immediately, one way or another.
 	 */
-	if (nd->flags & LOOKUP_COPY_UP) {
-		printk("Must copy up %s\n", dentry->d_name.name);
-		return false;
-	}
-
-	/* At this point we have a negative dentry in the unionmount that may
-	 * be overlaying a non-directory file in a lower filesystem, so we loop
-	 * through the union stack of the parent directory to try to find a
-	 * usable dentry further down.
-	 */
-	layers = parent->d_sb->s_union_count;
-	for (layer = 0; layer < layers; layer++) {
-		/* Look for the a matching dentry in this layer, assuming it's
-		 * still valid.  Since the lower fs is hard locked R/O,
-		 * revalidation ought to be unnecessary.
-		 */
-		unsigned ldseq, seq;
-		struct dentry *lower_dir, *lower;
-		struct path *lower_path = union_find_dir(parent, layer);
-		if (!lower_path->mnt)
-			continue;
-
-		lower_dir = lower_path->dentry;
-		ldseq = read_seqcount_begin(&lower_dir->d_seq);
-
-		if (unlikely(lower_dir->d_flags & DCACHE_OP_REVALIDATE)) {
-			if (unlikely(d_revalidate(lower_dir, nd->flags) <= 0) ||
-			    __read_seqcount_retry(&lower_dir->d_seq, ldseq))
-				return false;
-		}
-
-		lower = __d_lookup_rcu(lower_dir, &dentry->d_name, &seq, *inode);
-		if (!lower)
-			return false;
-		*inode = lower->d_inode;
-
-		/* We've got a negative dentry which can mean several things: a
-		 * plain negative dentry is ignored and lookup continues to the
-		 * next layer; but a whiteout or a non-fallthru in an opaque
-		 * dir covers everything below it.
-		 */
-		if (!*inode) {
-			if (d_is_whiteout(lower) ||
-			    (IS_OPAQUE(parent_inode) && !d_is_fallthru(lower))) {
-				if (read_seqcount_retry(&lower_dir->d_seq,
-							ldseq))
-					return false;
-				return true;
-			}
-			continue;
-		}
-
-		/* If the lower dentry is a directory then it will need copying
-		 * up before we can make use of it.
-		 */
-		if (S_ISDIR((*inode)->i_mode)) {
-			printk("UNION: Need copyup\n");
-			return false;
-		}
-
-		/* There is a file in a lower fs that we can use */
-		if (read_seqcount_retry(&lower_dir->d_seq, ldseq) ||
-		    __read_seqcount_retry(&parent->d_seq, parent_seq))
-			return false;
-
-		path->mnt = lower_path->mnt;
-		path->dentry = lower;
-		nd->seq = seq;
-		return true;
-	}
-
-	/* Found nothing, so just use the top negative dentry */
-	return true;
+	printk("UNION: Drop out of RCU\n");
+	return false;
 }
 
 /*
@@ -1656,7 +1804,7 @@
 	return dentry;
 }
 
-static struct dentry *__lookup_hash(struct qstr *name,
+struct dentry *__lookup_hash(struct qstr *name,
 		struct dentry *base, unsigned int flags)
 {
 	bool need_lookup;
@@ -1674,17 +1822,18 @@
  *  small and for now I'd prefer to have fast path as straight as possible.
  *  It _is_ time-critical.
  */
-static int lookup_fast(struct nameidata *nd,
-		       struct path *path, struct inode **inode)
+static noinline int lookup_fast(struct nameidata *nd,
+		       struct path *path, struct inode **_inode)
 {
 	struct vfsmount *mnt = nd->path.mnt;
 	struct dentry *dentry, *parent = nd->path.dentry;
+	struct inode *inode;
 	int need_reval = 1;
 	int status = 1;
 	int err;
 
 	if (IS_DIR_UNIONED(parent))
-		printk("UNION: --> lookup_fast(%s)\n", nd->last.name);
+		printk("UNION: --> lookup_fast(%pq)\n", &nd->last);
 
 	/*
 	 * Rename seqlock is not required here because in the off chance
@@ -1694,14 +1843,20 @@
 	if (nd->flags & LOOKUP_RCU) {
 		unsigned seq, pseq;
 		dentry = __d_lookup_rcu(parent, &nd->last, &seq, nd->inode);
-		if (!dentry)
+		if (!dentry) {
+			if (IS_DIR_UNIONED(parent))
+				printk("UNION: __d_lookup_rcu\n");
 			goto unlazy;
+		}
 
 		/*
 		 * This sequence count validates that the inode matches
 		 * the dentry name information from lookup.
 		 */
-		*inode = dentry->d_inode;
+		inode = dentry->d_inode;
+		if (!inode && d_is_pinning_lower(dentry))
+			inode = dentry->d_fallthru->d_inode;
+		*_inode = inode;
 		if (read_seqcount_retry(&dentry->d_seq, seq))
 			return -ECHILD;
 
@@ -1722,17 +1877,28 @@
 			if (unlikely(status <= 0)) {
 				if (status != -ECHILD)
 					need_reval = 0;
+				if (IS_DIR_UNIONED(parent))
+					printk("UNION: d_revalidate\n");
 				goto unlazy;
 			}
 		}
 		path->mnt = mnt;
 		path->dentry = dentry;
-		if (unlikely(!lookup_union_rcu(nd, parent, path, pseq, inode)))
+		if (unlikely(!lookup_union_rcu(nd, parent, path, _inode))) {
+			if (IS_DIR_UNIONED(parent))
+				printk("UNION: !lookup_union_rcu\n");
 			goto unlazy;
-		if (unlikely(!__follow_mount_rcu(nd, path, inode)))
+		}
+		if (unlikely(!__follow_mount_rcu(nd, path, _inode))) {
+			if (IS_DIR_UNIONED(parent))
+				printk("UNION: !__follow_mount_rcu\n");
 			goto unlazy;
-		if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
+		}
+		if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) {
+			if (IS_DIR_UNIONED(parent))
+				printk("UNION: need_automount\n");
 			goto unlazy;
+		}
 		return 0;
 unlazy:
 		if (IS_DIR_UNIONED(parent))
@@ -1771,6 +1937,7 @@
 	if (err)
 		nd->flags |= LOOKUP_JUMPED;
 
+#if 0
 	if (needs_lookup_union(nd, &nd->path, path)) {
 		int err;
 		printk("UNION: Fast need lookup\n");
@@ -1778,8 +1945,12 @@
 		if (err < 0)
 			return err;
 	}
+#endif
 
-	*inode = path->dentry->d_inode;
+	inode = path->dentry->d_inode;
+	if (!inode && d_is_pinning_lower(path->dentry))
+		inode = dentry->d_fallthru->d_inode;
+	*_inode = inode;
 	return 0;
 
 need_lookup:
@@ -1789,7 +1960,7 @@
 }
 
 /* Fast lookup failed, do it the slow way */
-static int lookup_slow(struct nameidata *nd, struct path *path)
+static noinline int lookup_slow(struct nameidata *nd, struct path *path)
 {
 	struct dentry *dentry, *parent;
 	int err;
@@ -1798,14 +1969,14 @@
 	BUG_ON(nd->inode != parent->d_inode);
 
 	if (IS_DIR_UNIONED(parent))
-		printk("UNION: --> lookup_slow(%s)\n", nd->last.name);
+		printk("UNION: --> lookup_slow(%pq)\n", &nd->last);
 
 	mutex_lock(&parent->d_inode->i_mutex);
 	dentry = __lookup_hash(&nd->last, parent, nd->flags);
 	mutex_unlock(&parent->d_inode->i_mutex);
 
 	if (IS_DIR_UNIONED(parent))
-		printk("UNION: --> slow: __lookup_hash() = %p\n", dentry);
+		printk("UNION: slow: __lookup_hash() = %p\n", dentry);
 
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
@@ -1818,6 +1989,7 @@
 	}
 	if (err)
 		nd->flags |= LOOKUP_JUMPED;
+#if 0
 	if (needs_lookup_union(nd, &nd->path, path)) {
 		printk("UNION: Slow need lookup\n");
 		err = lookup_union(nd, &nd->last, path);
@@ -1826,10 +1998,11 @@
 			return err;
 		}
 	}
+#endif
 	return 0;
 }
 
-static inline int may_lookup(struct nameidata *nd)
+static noinline int may_lookup(struct nameidata *nd)
 {
 	if (nd->flags & LOOKUP_RCU) {
 		int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
@@ -1871,28 +2044,19 @@
  * so we keep a cache of "no, this doesn't need follow_link"
  * for the common case.
  */
-static inline int should_follow_link(struct inode *inode, int follow)
+static inline int should_follow_link(struct dentry *dentry, int follow)
 {
-	if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
-		if (likely(inode->i_op->follow_link))
-			return follow;
-
-		/* This gets set once for the inode lifetime */
-		spin_lock(&inode->i_lock);
-		inode->i_opflags |= IOP_NOFOLLOW;
-		spin_unlock(&inode->i_lock);
-	}
-	return 0;
+	return unlikely(d_is_symlink(dentry)) ? follow : 0;
 }
 
-static inline int walk_component(struct nameidata *nd, struct path *path,
+static noinline int walk_component(struct nameidata *nd, struct path *path,
 		int follow)
 {
 	struct inode *inode;
 	int err;
 
 	if (IS_DIR_UNIONED(nd->path.dentry))
-		printk("UNION: --> walk_component(%s)\n", nd->last.name);
+		printk("UNION: --> walk_component(%pq)\n", &nd->last);
 
 	/*
 	 * "." and ".." are special - ".." especially so because it has
@@ -1911,15 +2075,35 @@
 			goto out_err;
 
 		inode = path->dentry->d_inode;
+		if (!inode && d_is_pinning_lower(path->dentry))
+			inode = path->dentry->d_fallthru->d_inode;
 	}
 
-	if (IS_DIR_UNIONED(nd->path.dentry))
-		printk("UNION: --> walk: looked up\n");
-	err = -ENOENT;
-	if (!inode)
-		goto out_path_put;
+	if (IS_MNT_UNION(path->mnt)) {
+		printk("UNION: walk_comp: path->mnt UPPER%s\n",
+		       nd->flags & LOOKUP_RCU ? " RCU" : "");
+		printk("UNION: dentry %pq: %p(%p)\n",
+		       &path->dentry->d_name, path->dentry, path->dentry->d_inode);
+	}
 
-	if (should_follow_link(inode, follow)) {
+	if (!inode) {
+		if (likely(!IS_MNT_UNION(path->mnt)))
+			goto enoent;
+
+		if (!(path->dentry->d_flags & DCACHE_UNION_LOOKUP_DONE)) {
+			err = union_lookup_parent(nd, path);
+			if (err < 0)
+				goto out_path_put;
+		}
+		if (should_follow_link(path->dentry, follow))
+			return 1;
+		inode = path->dentry->d_inode;
+		if (!inode && nd->flags & LOOKUP_PARENT)
+			goto enoent;
+		goto success;
+	}
+
+	if (should_follow_link(path->dentry, follow)) {
 		if (nd->flags & LOOKUP_RCU) {
 			if (unlikely(unlazy_walk(nd, path->dentry))) {
 				err = -ECHILD;
@@ -1929,10 +2113,13 @@
 		BUG_ON(inode != path->dentry->d_inode);
 		return 1;
 	}
+success:
 	path_to_nameidata(path, nd);
 	nd->inode = inode;
 	return 0;
 
+enoent:
+	err = -ENOENT;
 out_path_put:
 	path_to_nameidata(path, nd);
 out_err:
@@ -1983,18 +2170,20 @@
  * the inode ->i_opflags field that says "yes, we can
  * do lookup on this inode".
  */
-static inline int can_lookup(struct inode *inode)
+static inline bool can_lookup(struct inode *inode)
 {
+	if (unlikely(!inode))
+		return false; /* Unioned non-dir, non-symlink */
 	if (likely(inode->i_opflags & IOP_LOOKUP))
-		return 1;
+		return true;
 	if (likely(!inode->i_op->lookup))
-		return 0;
+		return false;
 
 	/* We do this once for the lifetime of the inode */
 	spin_lock(&inode->i_lock);
 	inode->i_opflags |= IOP_LOOKUP;
 	spin_unlock(&inode->i_lock);
-	return 1;
+	return true;
 }
 
 /*
@@ -2297,109 +2486,13 @@
 	return 0;
 }
 
-/*
- * Walk the final component of a path, returning it in *path.  The parent is
- * left in nd->path.
- */
-static int walk_last_component(struct nameidata *nd, struct path *path,
-			       int follow)
-{
-	struct inode *inode;
-	int err;
-
-	if (IS_DIR_UNIONED(nd->path.dentry))
-		printk("UNION: --> walk_last_component(%s)\n", nd->last.name);
-
-	/* "." and ".." are special - ".." especially so because it has
-	 * to be able to know about the current root directory and
-	 * parent relationships.
-	 */
-	if (unlikely(nd->last_type != LAST_NORM))
-		return handle_dots(nd, nd->last_type);
-	err = lookup_fast(nd, path, &inode);
-	if (unlikely(err)) {
-		if (err < 0)
-			goto out_err;
-		err = lookup_slow(nd, path);
-		if (err < 0)
-			goto out_err;
-		inode = path->dentry->d_inode;
-	}
-
-	if (IS_MNT_UNION(nd->path.mnt))
-		printk("UNION: walk_last_comp: nd->path.mnt UPPER\n");
-	if (IS_MNT_LOWER(nd->path.mnt))
-		printk("UNION: walk_last_comp: nd->path.mnt LOWER\n");
-	if (IS_MNT_UNION(path->mnt))
-		printk("UNION: walk_last_comp: path->mnt UPPER\n");
-	if (IS_MNT_LOWER(path->mnt))
-		printk("UNION: walk_last_comp: path->mnt LOWER\n");
-
-	err = -ENOENT;
-	if (!inode)
-		goto out_path_put;
-
-	if (should_follow_link(inode, follow)) {
-		if (nd->flags & LOOKUP_RCU) {
-			if (unlikely(unlazy_walk(nd, path->dentry))) {
-				err = -ECHILD;
-				goto out_err;
-			}
-		}
-		BUG_ON(inode != path->dentry->d_inode);
-		return 1;
-	}
-
-	if (nd->flags & LOOKUP_COPY_UP && IS_MNT_LOWER(path->mnt)) {
-		BUG_ON(S_ISDIR(inode->i_mode));
-		printk("UNION: walk_last_comp: Need to copy up\n");
-		if (nd->flags & LOOKUP_RCU &&
-		    unlikely(unlazy_walk(nd, path->dentry))) {
-			err = -ECHILD;
-			goto out_err;
-		}
-
-		err = mnt_want_write(nd->path.mnt);
-		if (err)
-			goto out_path_put;
-
-		mutex_lock(&nd->path.dentry->d_inode->i_mutex);
-		err = union_copyup(&nd->path, path, true, 0);
-		mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
-		mnt_drop_write(nd->path.mnt);
-		if (err)
-			goto out_path_put;
-
-		if (path->mnt != nd->path.mnt)
-			printk("UNION: !!! mnt not changed by copyup\n");
-
-		printk("UNION: walk_last_comp: copied up lower\n");
-		BUG_ON(path->mnt != nd->path.mnt);
-
-		inode = path->dentry->d_inode;
-		err = -ENOENT;
-		if (!inode)
-			goto out_path_put;
-	}
-
-	path_to_nameidata(path, nd);
-	nd->inode = inode;
-	return 0;
-
-out_path_put:
-	path_to_nameidata(path, nd);
-out_err:
-	terminate_walk(nd);
-	return err;
-}
-
 static inline int lookup_last(struct nameidata *nd, struct path *path)
 {
 	if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
 		nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 
 	nd->flags &= ~LOOKUP_PARENT;
-	return walk_last_component(nd, path, nd->flags & LOOKUP_FOLLOW);
+	return walk_component(nd, path, nd->flags & LOOKUP_FOLLOW);
 }
 
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
@@ -2431,6 +2524,9 @@
 
 	current->total_link_count = 0;
 	err = link_path_walk(name, nd);
+	if (!err && IS_MNT_UNION(nd->path.mnt))
+		printk("UNION: link_path_walk returned nd->path.mnt UPPER%s\n>>>\n",
+		       flags & LOOKUP_PARENT ? " PARENT" : "");
 
 	/* At this point we've processed all the non-terminal parts of the path
 	 * and are ready to tackle the final section.  The final section may
@@ -2442,14 +2538,22 @@
 		while (err > 0) {
 			void *cookie;
 			struct path link = terminal_symlink;
+			if (IS_MNT_UNION(nd->path.mnt))
+				printk("UNION: path_lookupat: may_follow_link\n");
 			err = may_follow_link(&link, nd);
 			if (unlikely(err))
 				break;
 			nd->flags |= LOOKUP_PARENT;
+			if (IS_MNT_UNION(nd->path.mnt))
+				printk("UNION: path_lookupat: follow_link\n");
 			err = follow_link(&link, nd, &cookie);
 			if (err)
 				break;
+			if (IS_MNT_UNION(nd->path.mnt))
+				printk("UNION: path_lookupat: lookup_last\n");
 			err = lookup_last(nd, &terminal_symlink);
+			if (IS_MNT_UNION(nd->path.mnt))
+				printk("UNION: path_lookupat: put_link\n");
 			put_link(nd, &link, cookie);
 		}
 
@@ -2469,7 +2573,7 @@
 		err = complete_walk(nd);
 
 	if (!err && nd->flags & LOOKUP_DIRECTORY) {
-		if (!can_lookup(nd->inode)) {
+		if (!nd->inode || !can_lookup(nd->inode)) {
 			path_put(&nd->path);
 			err = -ENOTDIR;
 		}
@@ -2678,7 +2782,7 @@
 	if (!inode)
 		goto out_path_put;
 
-	if (should_follow_link(inode, follow)) {
+	if (should_follow_link(path.dentry, follow)) {
 		if (nd->flags & LOOKUP_RCU) {
 			if (unlikely(unlazy_walk(nd, path.dentry))) {
 				err = -ECHILD;
@@ -3049,10 +3153,8 @@
 	return error;
 }
 
-static int may_open(struct path *path, int acc_mode, int flag)
+static int may_open(struct path *path, struct inode *inode, int acc_mode, int flag)
 {
-	struct dentry *dentry = path->dentry;
-	struct inode *inode = dentry->d_inode;
 	int error;
 
 	/* O_PATH? */
@@ -3173,7 +3275,7 @@
 	BUG_ON(dentry->d_inode);
 
 	if (IS_DIR_UNIONED(nd->path.dentry))
-		printk("UNION: --> atomic_open(%s)\n", nd->last.name);
+		printk("UNION: --> atomic_open(%pq)\n", &nd->last);
 	if (IS_MNT_UNION(nd->path.mnt))
 		printk("UNION: --> atomic_open(UNIONED)\n");
 
@@ -3269,7 +3371,7 @@
 	 * We didn't have the inode before the open, so check open permission
 	 * here.
 	 */
-	error = may_open(&file->f_path, acc_mode, open_flag);
+	error = may_open(&file->f_path, file->f_inode, acc_mode, open_flag);
 	if (error)
 		fput(file);
 
@@ -3306,6 +3408,53 @@
 }
 
 /*
+ * Look below a top-level dentry to see what lies beneath on a lower fs and
+ * mark the dentry up appropriately.  The caller must hold the parent directory
+ * inode mutex.
+ */
+static noinline int union_assess_lower(struct nameidata *nd, struct path *path)
+{
+	struct path lower_cache, actual;
+	struct inode *inode;
+	struct dentry *dentry;
+	int ret = 0;
+
+	printk("union_assess_lower: %x %p\n",
+	       path->dentry->d_flags, path->dentry->d_fallthru);
+
+	inode = union_get_inode_locked(nd->path.dentry, path,
+				       &lower_cache, &actual);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	dentry = path->dentry;
+	spin_lock(&dentry->d_lock);
+
+	if (!inode) {
+		printk("UNION: union_assess_lower: no ino\n");
+		BUG_ON(dentry->d_fallthru != NULL);
+		dentry->d_flags |= DCACHE_UNION_LOOKUP_DONE;
+	} else if (S_ISDIR(inode->i_mode)) {
+		/* A directory will need creating up top regardless. */
+		printk("UNION: union_assess_lower: isdir\n");
+		ret = -ENOANO;
+	} else {
+		if (d_is_symlink(actual.dentry)) {
+			printk("UNION: union_assess_lower: sym\n");
+			dentry->d_flags |= DCACHE_SYMLINK;
+		} else {
+			printk("UNION: union_assess_lower: file\n");
+		}
+		d_pin_lower(dentry, dget_dlock(actual.dentry));
+		dentry->d_flags |= DCACHE_UNION_LOOKUP_DONE;
+	}
+
+	spin_unlock(&path->dentry->d_lock);
+	path_put_maybe(&lower_cache);
+	return ret;
+}
+
+/*
  * Look up and maybe create and open the last component.
  *
  * Must be called with i_mutex held on parent.
@@ -3339,7 +3488,7 @@
 	bool need_lookup;
 
 	if (IS_DIR_UNIONED(dir))
-		printk("UNION: --> lookup_open(%s)\n", nd->last.name);
+		printk("UNION: --> lookup_open(%pq)\n", &nd->last);
 	if (IS_MNT_UNION(nd->path.mnt))
 		printk("UNION: --> lookup_open(UNIONED)\n");
 	if (IS_MNT_LOWER(nd->path.mnt))
@@ -3420,37 +3569,34 @@
 		 * Otherwise we need to copy up the whole file.
 		 */
 		if (IS_MNT_UNION(nd->path.mnt)) {
-			struct path topmost;
-
-			printk("UNION: %p/%p\n", dir->d_sb, dentry->d_sb);
-
 			printk("UNION: deal with O_CREAT\n");
 
-			if (d_is_whiteout(dentry) ||
-			    (IS_OPAQUE(dir_inode) && !d_is_fallthru(dentry)))
-				goto just_create; /* Lower is blocked off */
-
-			/* Look up the lower file.  This checks the lower
-			 * layers to see if we need to copy up, but if the file
-			 * is opened read-only with just O_CREAT then it'll
-			 * return the lower file.
-			 */
-			topmost.mnt = nd->path.mnt;
-			topmost.dentry = dentry;
-			error = __lookup_union(nd, &dentry->d_name, &topmost);
-			if (error)
-				goto out_dput;
-
-			if (topmost.mnt != nd->path.mnt) {
-				/* Moved down. */
-				printk("UNION: found lower (ignore O_CREAT)\n");
-				BUG_ON(!IS_MNT_LOWER(topmost.mnt));
-				BUG_ON(!topmost.dentry->d_inode);
-				*path = topmost;
-				printk("UNION: looked up lower (O_CREAT ignored)\n");
-				return 1;
+			if (!(dentry->d_flags & DCACHE_UNION_LOOKUP_DONE)) {
+				struct path tmp = {
+					.mnt = nd->path.mnt,
+					.dentry = dentry,
+				};
+				error = union_assess_lower(nd, &tmp);
+				if (error)
+					goto out_dput;
 			}
-			BUG_ON(topmost.dentry != dentry);
+
+			if (d_is_whiteout(dentry) ||
+			    (!d_is_fallthru(dentry) && IS_OPAQUE(dir_inode))) {
+				printk("UNION: lower blocked\n");
+				goto just_create; /* Lower is blocked off */
+			}
+			if (d_is_symlink(dentry))
+				goto out_no_open;
+
+			printk("UNION: deal with O_CREAT\n");
+			if (d_is_pinning_lower(dentry) &&
+			    dentry->d_fallthru->d_inode) {
+				printk("UNION: lower available (O_CREAT ignored)\n");
+				goto out_no_open;
+			}
+
+			printk("UNION: create over lower\n");
 		}
 
 	just_create:
@@ -3460,27 +3606,11 @@
 			goto out_dput;
 		error = vfs_create(dir->d_inode, dentry, mode,
 				   nd->flags & LOOKUP_EXCL);
+		if (IS_MNT_UNION(nd->path.mnt))
+			printk("UNION: vfs_create() = %d [%pq: %p]\n",
+			       error, &dentry->d_name, dentry);
 		if (error)
 			goto out_dput;
-	} else if (!dentry->d_inode && IS_DIR_UNIONED(dir)) {
-		/* The file does not exist in the top layer of a union - but it
-		 * might exist in a lower layer.
-		 */
-		printk("UNION: %p/%p\n", dir->d_sb, dentry->d_sb);
-
-		if (d_is_whiteout(dentry) ||
-		    (IS_OPAQUE(dir_inode) && !d_is_fallthru(dentry)))
-			goto out_no_open; /* Lower is blocked off */
-
-		/* Check for a lower file */
-		path->mnt = nd->path.mnt;
-		path->dentry = dentry;
-		error = __lookup_union(nd, &dentry->d_name, path);
-		if (error)
-			goto out_dput;
-
-		printk("UNION: looked up lower\n");
-		return 1;
 	}
 
 out_no_open:
@@ -3581,6 +3711,7 @@
 		 * dropping this one anyway.
 		 */
 	}
+
 	mutex_lock(&dir->d_inode->i_mutex);
 	error = lookup_open(nd, path, file, op, got_write, opened);
 	mutex_unlock(&dir->d_inode->i_mutex);
@@ -3667,17 +3798,56 @@
 	else if (IS_MNT_UNION(nd->path.mnt))
 		printk("UNION: do_last: nd->path.mnt at upper\n");
 	/* we _can_ be in RCU mode here */
-	BUG_ON(IS_MNT_LOWER(path->mnt) &&
-	       (d_is_whiteout(path->dentry) ||
-		(IS_OPAQUE(dir->d_inode) && d_is_fallthru(path->dentry))));
 
-	error = -ENOENT;
 	if (!inode) {
-		path_to_nameidata(path, nd);
-		goto out;
+		if (likely(!IS_DIR_UNIONED(dir)))
+			goto noent;
+
+		printk("UNION: %pq: d_flags = %x\n",
+		       &path->dentry->d_name, path->dentry->d_flags);
+
+		if (d_is_whiteout(path->dentry) ||
+		    (!d_is_fallthru(path->dentry) && IS_OPAQUE(dir->d_inode)))
+			goto noent;
+
+		/* The dentry falls through from the upper fs of a union - so
+		 * we need to take a look below it.
+		 */
+		printk("UNION: fell through\n");
+		if (!(path->dentry->d_flags & DCACHE_UNION_LOOKUP_DONE)) {
+			/* Need to have a look below, but step out of RCU-mode
+			 * first.  We use d_fallthru to cache the result for
+			 * subsequent RCU-mode walks.
+			 */
+			if (nd->flags & LOOKUP_RCU &&
+			    unlikely(unlazy_walk(nd, path->dentry))) {
+				error = -ECHILD;
+				goto out;
+			}
+
+			mutex_lock(&dir->d_inode->i_mutex);
+			error = union_assess_lower(nd, path);
+			mutex_unlock(&dir->d_inode->i_mutex);
+			if (error)
+				goto out;
+		}
+
+		if (!d_is_pinning_lower(path->dentry))
+			goto noent;
+
+		inode = path->dentry->d_fallthru->d_inode;
+		if (!inode)
+			/* Fallthrough to nothing, it would appear. */
+			goto noent;
+
+		printk("UNION: got lower from d_fallthru\n");
 	}
 
-	if (should_follow_link(inode, !symlink_ok)) {
+	if (should_follow_link(path->dentry, !symlink_ok)) {
+		/* The dentry is either a symlink on this fs or it's a
+		 * fallthrough to a symlink in a lower fs (in which case inode
+		 * will be NULL).
+		 */
 		if (IS_DIR_UNIONED(dir))
 			printk("UNION: should_follow_link() -> true\n");
 		if (nd->flags & LOOKUP_RCU) {
@@ -3686,16 +3856,17 @@
 				goto out;
 			}
 		}
-		BUG_ON(inode != path->dentry->d_inode);
 		if (got_write)
 			mnt_drop_write(nd->path.mnt);
 		return 1;
 	}
 
-	if (IS_MNT_LOWER(path->mnt) &&
+	if (IS_DIR_UNIONED(dir) &&
 	    nd->flags & LOOKUP_COPY_UP &&
+	    !path->dentry->d_inode &&
 	    S_ISREG(inode->i_mode)) {
-		printk("UNION: Need to copy up\n");
+		printk("UNION: WWWWW Need to copy up\n");
+
 		if (nd->flags & LOOKUP_RCU &&
 		    unlikely(unlazy_walk(nd, path->dentry))) {
 			path_to_nameidata(path, nd);
@@ -3703,6 +3874,10 @@
 			goto out;
 		}
 
+		/* Like inode_permission(), but inode->i_sb != dentry->d_sb */
+		error = sb_permission(path->dentry->d_sb, inode, MAY_WRITE);
+		if (error < 0)
+			goto exit_dput;
 		error = __inode_permission(inode, MAY_WRITE);
 		if (error < 0)
 			goto exit_dput;
@@ -3711,9 +3886,7 @@
 		if (error)
 			goto exit_dput;
 
-		mutex_lock(&dir->d_inode->i_mutex);
-		error = union_copyup(&nd->path, path, !will_truncate, 0);
-		mutex_unlock(&dir->d_inode->i_mutex);
+		error = union_copy_up_for_do_last(&nd->path, path, will_truncate);
 		mnt_drop_write(nd->path.mnt);
 		if (error)
 			goto exit_dput;
@@ -3759,7 +3932,7 @@
 		printk("UNION: --> complete_walk()\n");
 	error = complete_walk(nd);
 	if (IS_DIR_UNIONED(dir))
-		printk("UNION: <-- complete_walk()\n");
+		printk("UNION: <-- complete_walk() = %d\n", error);
 	if (error) {
 		path_put(&save_parent);
 		return error;
@@ -3770,8 +3943,11 @@
 	if ((open_flag & O_CREAT) && S_ISDIR(inode->i_mode))
 		goto out;
 	error = -ENOTDIR;
-	if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(inode))
+	if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(inode)) {
+		if (IS_DIR_UNIONED(dir))
+			printk("UNION: !can_lookup\n");
 		goto out;
+	}
 	if (!S_ISREG(inode->i_mode))
 		will_truncate = false;
 
@@ -3782,11 +3958,15 @@
 		got_write = true;
 	}
 finish_open_created:
-	error = may_open(&nd->path, acc_mode, open_flag);
+	error = may_open(&nd->path, inode, acc_mode, open_flag);
+	if (IS_DIR_UNIONED(dir))
+		printk("UNION: <-- may_open() = %d\n", error);
 	if (error)
 		goto out;
 	file->f_path.mnt = nd->path.mnt;
 	error = finish_open(file, nd->path.dentry, inode, NULL, opened);
+	if (IS_DIR_UNIONED(dir))
+		printk("UNION: <-- finish_open() = %d\n", error);
 	if (error) {
 		if (error == -EOPENSTALE)
 			goto stale_open;
@@ -3812,6 +3992,13 @@
 	terminate_walk(nd);
 	return error;
 
+noent:
+	if (IS_DIR_UNIONED(dir))
+		printk("UNION: %pq: ENOENT\n", &path->dentry->d_name);
+	error = -ENOENT;
+	path_to_nameidata(path, nd);
+	goto out;
+
 exit_dput:
 	path_put_conditional(path, nd);
 	goto out;
@@ -3876,7 +4063,7 @@
 	if (error)
 		goto out2;
 	audit_inode(pathname, nd->path.dentry, 0);
-	error = may_open(&nd->path, op->acc_mode, op->open_flag);
+	error = may_open(&nd->path, nd->path.dentry->d_inode, op->acc_mode, op->open_flag);
 	if (error)
 		goto out2;
 	file->f_path.mnt = nd->path.mnt;
@@ -4000,7 +4187,7 @@
 	nd.root.mnt = mnt;
 	nd.root.dentry = dentry;
 
-	if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
+	if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN)
 		return ERR_PTR(-ELOOP);
 
 	file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU);
@@ -4015,6 +4202,8 @@
 				struct path *path, unsigned int lookup_flags)
 {
 	struct nameidata nd;
+	struct path lower_cache, actual;
+	struct inode *inode;
 	struct path new_path;
 	int err2;
 	int error;
@@ -4047,14 +4236,18 @@
 	 */
 	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	error = lookup_hash(&nd, &new_path);
-	if (!error && needs_lookup_union(&nd, &nd.path, &new_path))
-		error = lookup_union_locked(&nd, &nd.last, &new_path);
 	if (error)
 		goto unlock;
 
+	inode = union_get_inode_locked(nd.path.dentry, &new_path,
+				       &lower_cache, &actual);
+	path_put_maybe(&lower_cache);
+	if (IS_ERR(inode))
+		goto put;
+
 	error = -EEXIST;
-	if (new_path.dentry->d_inode)
-		goto fail;
+	if (inode)
+		goto put;
 	/*
 	 * Special case - lookup gave negative, but... we had foo/bar/
 	 * From the vfs_mknod() POV we just have a negative dentry -
@@ -4063,15 +4256,16 @@
 	 */
 	if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
 		error = -ENOENT;
-		goto fail;
+		goto put;
 	}
 	if (unlikely(err2)) {
 		error = err2;
-		goto fail;
+		goto put;
 	}
 	*path = nd.path;
 	return new_path.dentry;
-fail:
+
+put:
 	dput(new_path.dentry);
 unlock:
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -4512,10 +4706,6 @@
 		error = lookup_union_locked(&nd, &nd.last, &path);
 	if (error)
 		goto exit2;
-	if (!path.dentry->d_inode) {
-		error = -ENOENT;
-		goto exit3;
-	}
 	error = security_path_rmdir(&nd.path, path.dentry);
 	if (error)
 		goto exit3;
@@ -4780,7 +4970,8 @@
 		int, newdfd, const char __user *, newname, int, flags)
 {
 	struct dentry *new_dentry;
-	struct path old_path, new_path;
+	struct inode *inode;
+	struct path old_path, new_path, lower_cache, actual;
 	int how = 0;
 	int error;
 
@@ -4801,10 +4992,20 @@
 		how |= LOOKUP_FOLLOW;
 
 retry:
-	error = user_path_at(olddfd, oldname, how | LOOKUP_COPY_UP, &old_path);
+	error = user_path_at(olddfd, oldname, how, &old_path);
 	if (error)
 		return error;
 
+	inode = union_get_inode(&old_path, &lower_cache, &actual);
+	if (IS_ERR(inode)) {
+		error = PTR_ERR(inode);
+		goto out;
+	}
+	error = union_copy_up(&old_path, &actual);
+	path_put_maybe(&lower_cache);
+	if (error < 0)
+		goto out;
+
 	new_dentry = user_path_create(newdfd, newname, &new_path,
 					(how & LOOKUP_REVAL));
 	error = PTR_ERR(new_dentry);
@@ -4994,7 +5195,8 @@
 		int, newdfd, const char __user *, newname)
 {
 	struct dentry *old_dir, *new_dir;
-	struct path old, new;
+	struct inode *old_inode;
+	struct path old, new, old_lower_cache, old_actual;
 	struct dentry *trap;
 	struct nameidata oldnd, newnd;
 	struct filename *from;
@@ -5032,23 +5234,26 @@
 		goto exit2;
 
 	oldnd.flags &= ~LOOKUP_PARENT;
-	oldnd.flags |= LOOKUP_COPY_UP;
 	newnd.flags &= ~LOOKUP_PARENT;
 	newnd.flags |= LOOKUP_RENAME_TARGET;
 
 	trap = lock_rename(new_dir, old_dir);
 
 	error = lookup_hash(&oldnd, &old);
-	if (!error && needs_lookup_union(&oldnd, &oldnd.path, &old))
-		error = lookup_union_locked(&oldnd, &oldnd.last, &old);
 	if (error)
 		goto exit3;
 	/* source must exist */
+	old_inode = union_get_inode_locked(oldnd.path.dentry, &old,
+					   &old_lower_cache, &old_actual);
+	if (IS_ERR(old_inode)) {
+		error = PTR_ERR(old_inode);
+		goto exit_err_old_inode;
+	}
 	error = -ENOENT;
-	if (!old.dentry->d_inode)
+	if (!old_inode)
 		goto exit4;
 	/* unless the source is a directory trailing slashes give -ENOTDIR */
-	if (!S_ISDIR(old.dentry->d_inode->i_mode)) {
+	if (!S_ISDIR(old_inode->i_mode)) {
 		error = -ENOTDIR;
 		if (oldnd.last.name[oldnd.last.len])
 			goto exit4;
@@ -5065,8 +5270,6 @@
 	    IS_DIR_UNIONED(old.dentry))
 		goto exit4;
 	error = lookup_hash(&newnd, &new);
-	if (!error && needs_lookup_union(&newnd, &newnd.path, &new))
-		error = lookup_union_locked(&newnd, &newnd.last, &new);
 	if (error)
 		goto exit4;
 	/* target should not be an ancestor of source */
@@ -5079,42 +5282,15 @@
 	    IS_DIR_UNIONED(new.dentry))
 		goto exit5;
 
-	/* If source should've been copied up by lookup_hash() */
-	if (IS_DIR_UNIONED(oldnd.path.dentry))
-		BUG_ON(old.mnt != oldnd.path.mnt);
-
-	/* If target is on lower layer, get negative dentry for topmost */
-	if (IS_DIR_UNIONED(newnd.path.dentry) &&
-	    new.mnt != newnd.path.mnt) {
-		/* At this point, source and target are both files, the source
-		 * is on the topmost layer and the target is on a lower layer.
-		 * We want the target dentry to disappear from the namespace
-		 * and give vfs_rename a negative dentry from the topmost
-		 * layer.
-		 *
-		 * Note: We already did lookup once, so no need to recheck perm
-		 */
-		struct dentry *dentry =
-			__lookup_hash(&newnd.last, newnd.path.dentry,
-				      newnd.flags);
-		if (IS_ERR(dentry)) {
-			error = PTR_ERR(dentry);
-			goto exit5;
-		}
-
-		/* We no longer need the lower target dentry.  It definitely
-		 * should be removed from the hash table */
-		/* XXX what about failure case? */
-		d_delete(new.dentry);
-		mntput(new.mnt);
-		new.mnt = mntget(newnd.path.mnt);
-		new.dentry = dentry;
-	}
-
 	error = security_path_rename(&oldnd.path, old.dentry,
 				     &newnd.path, new.dentry);
 	if (error)
 		goto exit5;
+
+	error = union_copy_up_locked(&oldnd.path, &old, &old_actual);
+	if (error)
+		goto exit5;
+
 	error = vfs_rename(old_dir->d_inode, old.dentry,
 				   new_dir->d_inode, new.dentry);
 	if (error)
@@ -5135,6 +5311,8 @@
 exit5:
 	path_put_conditional(&new, &newnd);
 exit4:
+	path_put_maybe(&old_lower_cache);
+exit_err_old_inode:
 	path_put_conditional(&old, &oldnd);
 exit3:
 	unlock_rename(new_dir, old_dir);
diff --git a/fs/namespace.c b/fs/namespace.c
index 8915d2a..20673d7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1377,12 +1377,11 @@
 
 #endif
 
-static bool mnt_ns_loop(struct path *path)
+static bool mnt_ns_loop(struct inode *inode)
 {
 	/* Could bind mounting the mount namespace inode cause a
 	 * mount namespace loop?
 	 */
-	struct inode *inode = path->dentry->d_inode;
 	struct proc_ns *ei;
 	struct mnt_namespace *mnt_ns;
 
@@ -1629,8 +1628,8 @@
  */
 static int build_root_union(struct mount *topmost_mnt)
 {
-	struct path lower, topmost_path;
 	struct mount *mnt, *topmost_ro_mnt;
+	struct path lower, topmost_path;
 	unsigned int i, layers = 1;
 	int err = 0;
 
@@ -1932,56 +1931,59 @@
 static int do_loopback(struct path *path, const char *old_name,
 				int recurse)
 {
-	struct path old_parent, old_path;
+	struct path old_path, lower_cache, actual;
 	struct mount *mnt = NULL, *old, *parent;
 	struct mountpoint *mp;
+	struct inode *inode;
 	int err;
 	if (!old_name || !*old_name)
 		return -EINVAL;
 
-	err = __user_path_and_parent(AT_FDCWD, old_name,
-				     LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT,
-				     &old_parent, &old_path);
+	err = user_path_at(AT_FDCWD, old_name,
+			   LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT, &old_path);
 	if (err)
 		return err;
 
-	err = -EINVAL;
-	if (mnt_ns_loop(&old_path))
-		goto out; 
-
-	mp = lock_mount(path);
-	err = PTR_ERR(mp);
-	if (IS_ERR(mp))
+	inode = union_get_inode(&old_path, &lower_cache, &actual);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
 		goto out;
+	}
+
+	err = -EINVAL;
+	if (mnt_ns_loop(inode))
+		goto out_lower; 
 
 	old = real_mount(old_path.mnt);
 
 	err = -EINVAL;
 	if (IS_MNT_UNBINDABLE(old))
-		goto out2;
+		goto out_lower;
 
-	if (IS_MNT_LOWER(old_path.mnt)) {
-		/* If we're bind-mounting a file that's on a lower fs in a
-		 * union then we must first copy the file up as the copied
-		 * mount stack attached to the superblock is independent of any
-		 * namespace and will fail the check_mnt() test.  Directories
-		 * are copied up during the pathwalk, so we need not worry
-		 * about those.
-		 */
-		if (!old_parent.mnt)
-			goto out2;
-		mutex_lock(&old_parent.dentry->d_inode->i_mutex);
-		err = union_copyup(&old_parent, &old_path, true, 0);
-		mutex_unlock(&old_parent.dentry->d_inode->i_mutex);
-		if (err)
-			goto out2;
-		mntget(old_path.mnt);
-		old = real_mount(old_path.mnt);
-	}
+	/* If we're bind-mounting a file that's on a lower fs in a union then
+	 * we must first copy the file up as the copied mount stack attached to
+	 * the superblock is independent of any namespace and will fail the
+	 * check_mnt() test.  Directories are copied up during the pathwalk, so
+	 * we need not worry about those.
+	 */
+	err = union_copy_up(&old_path, &actual);
+	if (err < 0)
+		goto out_lower;
+
+	mp = lock_mount(path);
+	err = PTR_ERR(mp);
+	if (IS_ERR(mp))
+		goto out_lower;
+
+	old = real_mount(old_path.mnt);
+
+	err = -EINVAL;
+	if (IS_MNT_UNBINDABLE(old))
+		goto out_unlock;
 
 	parent = real_mount(path->mnt);
 	if (!check_mnt(parent) || !check_mnt(old))
-		goto out2;
+		goto out_unlock;
 
 	if (recurse)
 		mnt = copy_tree(old, old_path.dentry, 0);
@@ -1990,7 +1992,7 @@
 
 	if (IS_ERR(mnt)) {
 		err = PTR_ERR(mnt);
-		goto out2;
+		goto out_unlock;
 	}
 
 	err = graft_tree(mnt, parent, mp);
@@ -1999,11 +2001,12 @@
 		umount_tree(mnt, 0);
 		br_write_unlock(&vfsmount_lock);
 	}
-out2:
+out_unlock:
 	unlock_mount(mp);
+out_lower:
+	path_put_maybe(&lower_cache);
 out:
 	path_put(&old_path);
-	path_put(&old_parent);
 	return err;
 }
 
@@ -2225,7 +2228,7 @@
 	if (IS_MNT_UNION(&newmnt->mnt)) {
 		err = prepare_mnt_union(newmnt, path);
 		if (err)
-			return err;
+			goto unlock;
 		unioned = true;
 	}
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a6bc8a7..aafed3c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -467,7 +467,7 @@
 
 #if defined(CONFIG_NFSD_V4)
 static int
-set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
+set_nfsv4_acl_one(struct path *path, struct posix_acl *pacl, char *key)
 {
 	int len;
 	size_t buflen;
@@ -486,7 +486,7 @@
 		goto out;
 	}
 
-	error = vfs_setxattr(dentry, key, buf, len, 0);
+	error = vfs_setxattr(path, key, buf, len, 0);
 out:
 	kfree(buf);
 	return error;
@@ -496,9 +496,9 @@
 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
     struct nfs4_acl *acl)
 {
+	struct path path;
 	__be32 error;
 	int host_error;
-	struct dentry *dentry;
 	struct inode *inode;
 	struct posix_acl *pacl = NULL, *dpacl = NULL;
 	unsigned int flags = 0;
@@ -508,8 +508,9 @@
 	if (error)
 		return error;
 
-	dentry = fhp->fh_dentry;
-	inode = dentry->d_inode;
+	path.mnt = fhp->fh_export->ex_path.mnt;
+	path.dentry = fhp->fh_dentry;
+	inode = path.dentry->d_inode;
 	if (S_ISDIR(inode->i_mode))
 		flags = NFS4_ACL_DIR;
 
@@ -519,12 +520,12 @@
 	} else if (host_error < 0)
 		goto out_nfserr;
 
-	host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
+	host_error = set_nfsv4_acl_one(&path, pacl, POSIX_ACL_XATTR_ACCESS);
 	if (host_error < 0)
 		goto out_release;
 
 	if (S_ISDIR(inode->i_mode))
-		host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
+		host_error = set_nfsv4_acl_one(&path, dpacl, POSIX_ACL_XATTR_DEFAULT);
 
 out_release:
 	posix_acl_release(pacl);
@@ -2264,12 +2265,17 @@
 int
 nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
 {
-	struct inode *inode = fhp->fh_dentry->d_inode;
+	struct path path = { .mnt = fhp->fh_export->ex_path.mnt,
+			     .dentry = fhp->fh_dentry };
+	struct inode *inode = path.dentry->d_inode;
 	char *name;
 	void *value = NULL;
 	size_t size;
 	int error;
 
+	if (!inode)
+		return -EOPNOTSUPP; /* Appears to be unionmounted */
+
 	if (!IS_POSIXACL(inode) ||
 	    !inode->i_op->setxattr || !inode->i_op->removexattr)
 		return -EOPNOTSUPP;
@@ -2300,12 +2306,12 @@
 	if (error)
 		goto getout;
 	if (size)
-		error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
+		error = vfs_setxattr(&path, name, value, size, 0);
 	else {
 		if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT)
 			error = 0;
 		else {
-			error = vfs_removexattr(fhp->fh_dentry, name);
+			error = vfs_removexattr(&path, name);
 			if (error == -ENODATA)
 				error = 0;
 		}
diff --git a/fs/open.c b/fs/open.c
index c03de97..c6a7e08 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -63,44 +63,37 @@
 	return ret;
 }
 
-long vfs_truncate(struct path *parent, struct path *path, loff_t length)
+long vfs_truncate(struct path *path, loff_t length)
 {
-	struct vfsmount *mnt;
+	struct path lower_cache, actual;
 	struct inode *inode;
 	long error;
 
-	inode = path->dentry->d_inode;
-
 	if (IS_MNT_LOWER(path->mnt))
 		printk("UNION: truncate: path.mnt: at lower\n");
 	if (IS_MNT_UNION(path->mnt))
 		printk("UNION: truncate: path.mnt: at upper\n");
-	if (parent && IS_MNT_LOWER(parent->mnt))
-		printk("UNION: truncate: parent.mnt at lower\n");
-	if (parent && IS_MNT_UNION(parent->mnt))
-		printk("UNION: truncate: parent.mnt at upper\n");
+
+	inode = union_get_inode(path, &lower_cache, &actual);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
 
 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
+	error = -EISDIR;
 	if (S_ISDIR(inode->i_mode))
-		return -EISDIR;
+		goto out;
+	error = -EINVAL;
 	if (!S_ISREG(inode->i_mode))
-		return -EINVAL;
+		goto out;
 
-	/* If we're looking at the lower layer of a union mount, then we need
-	 * to create the file on the upperfs and truncate that.
-	 */
-	mnt = path->mnt;
-	if (IS_MNT_LOWER(path->mnt) && parent->mnt)
-		mnt = parent->mnt;
-
-	error = mnt_want_write(mnt);
+	error = mnt_want_write(path->mnt);
 	if (error)
 		goto out;
 
-	if (unlikely(IS_MNT_UNION(mnt))) {
+	if (unlikely(d_is_unioned(path->dentry, &actual))) {
 		/* We have to be able to write to the upperfs. */
 		error = -EROFS;
-		if (mnt->mnt_sb->s_flags & MS_RDONLY)
+		if (path->dentry->d_sb->s_flags & MS_RDONLY)
 			goto mnt_drop_write_and_out;
 
 		/* But the lowerfs inode must offer write permission - if the
@@ -118,16 +111,10 @@
 	if (IS_APPEND(inode))
 		goto mnt_drop_write_and_out;
 
-	if (IS_MNT_LOWER(path->mnt)) {
-		mutex_lock(&parent->dentry->d_inode->i_mutex);
-		error = union_copyup(parent, path, false, length);
-		mutex_unlock(&parent->dentry->d_inode->i_mutex);
-		if (error)
-			goto mnt_drop_write_and_out;
-		mntget(path->mnt);
-	}
+	error = union_truncated_copy_up(path, &actual, &length);
+	if (error)
+		goto mnt_drop_write_and_out;
 
-	/* path may have changed after copyup */
 	inode = path->dentry->d_inode;
 	error = get_write_access(inode);
 	if (error)
@@ -150,8 +137,9 @@
 put_write_and_out:
 	put_write_access(inode);
 mnt_drop_write_and_out:
-	mnt_drop_write(mnt);
+	mnt_drop_write(path->mnt);
 out:
+	path_put_maybe(&lower_cache);
 	return error;
 }
 EXPORT_SYMBOL_GPL(vfs_truncate);
@@ -159,18 +147,16 @@
 static long do_sys_truncate(const char __user *pathname, loff_t length)
 {
 	unsigned int lookup_flags = LOOKUP_FOLLOW;
-	struct path parent, path;
+	struct path path;
 	int error;
 
 	if (length < 0)	/* sorry, but loff_t says... */
 		return -EINVAL;
 
 retry:
-	error = user_path_and_parent(AT_FDCWD, pathname, lookup_flags,
-				     &parent, &path);
+	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (!error) {
-		error = vfs_truncate(&parent, &path, length);
-		path_put(&parent);
+		error = vfs_truncate(&path, length);
 		path_put(&path);
 	}
 	if (retry_estale(error, lookup_flags)) {
@@ -532,23 +518,40 @@
 
 static int chmod_common(struct path *path, umode_t mode)
 {
-	struct inode *inode = path->dentry->d_inode;
+	struct path lower_cache, actual;
+	struct inode *inode;
 	struct iattr newattrs;
 	int error;
 
+	inode = union_get_inode(path, &lower_cache, &actual);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
 	error = mnt_want_write(path->mnt);
 	if (error)
-		return error;
+		goto out_lower;
+again:
 	mutex_lock(&inode->i_mutex);
-	error = security_path_chmod(path, mode);
+	error = security_path_chmod(&actual, mode);
 	if (error)
 		goto out_unlock;
+	if (d_is_unioned(path->dentry, &actual)) {
+		mutex_unlock(&inode->i_mutex);
+		error = union_copy_up(path, &actual);
+		if (error < 0)
+			goto out_drop_write;
+		inode = actual.dentry->d_inode;
+		goto again;
+	}
 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-	error = notify_change(path->dentry, &newattrs);
+	error = notify_change(actual.dentry, &newattrs);
 out_unlock:
 	mutex_unlock(&inode->i_mutex);
+out_drop_write:
 	mnt_drop_write(path->mnt);
+out_lower:
+	path_put_maybe(&lower_cache);
 	return error;
 }
 
@@ -560,7 +563,10 @@
 	file = fget(fd);
 	if (file) {
 		audit_inode(NULL, file->f_path.dentry, 0);
-		err = chmod_common(&file->f_path, mode);
+		if (file->f_inode != file->f_path.dentry->d_inode)
+			err = -EACCES; /* Unioned, but can't copy up. */
+		else
+			err = chmod_common(&file->f_path, mode);
 		fput(file);
 	}
 	return err;
@@ -570,7 +576,7 @@
 {
 	struct path path;
 	int error;
-	unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_COPY_UP;
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
 
 retry:
 	error = user_path_at(dfd, filename, lookup_flags, &path);
@@ -592,7 +598,8 @@
 
 static int chown_common(struct path *path, uid_t user, gid_t group)
 {
-	struct inode *inode = path->dentry->d_inode;
+	struct path lower_cache, actual;
+	struct inode *inode;
 	int error;
 	struct iattr newattrs;
 	kuid_t uid;
@@ -604,26 +611,47 @@
 	newattrs.ia_valid =  ATTR_CTIME;
 	if (user != (uid_t) -1) {
 		if (!uid_valid(uid))
-			return -EINVAL;
+			goto einval;
 		newattrs.ia_valid |= ATTR_UID;
 		newattrs.ia_uid = uid;
 	}
 	if (group != (gid_t) -1) {
 		if (!gid_valid(gid))
-			return -EINVAL;
+			goto einval;
 		newattrs.ia_valid |= ATTR_GID;
 		newattrs.ia_gid = gid;
 	}
+
+	inode = union_get_inode(path, &lower_cache, &actual);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
 	if (!S_ISDIR(inode->i_mode))
 		newattrs.ia_valid |=
 			ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+again:
 	mutex_lock(&inode->i_mutex);
 	error = security_path_chown(path, uid, gid);
-	if (!error)
-		error = notify_change(path->dentry, &newattrs);
-	mutex_unlock(&inode->i_mutex);
+	if (error < 0)
+		goto error;
 
+	if (d_is_unioned(path->dentry, &actual)) {
+		mutex_unlock(&inode->i_mutex);
+		error = union_copy_up(path, &actual);
+		if (error < 0)
+			goto error;
+		inode = actual.dentry->d_inode;
+		goto again;
+	}
+
+	error = notify_change(actual.dentry, &newattrs);
+error:
+	mutex_unlock(&inode->i_mutex);
+	path_put_maybe(&lower_cache);
 	return error;
+
+einval:
+	path_put_maybe(&lower_cache);
+	return -EINVAL;
 }
 
 SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
@@ -640,7 +668,7 @@
 	if (flag & AT_EMPTY_PATH)
 		lookup_flags |= LOOKUP_EMPTY;
 retry:
-	error = user_path_at(dfd, filename, lookup_flags | LOOKUP_COPY_UP, &path);
+	error = user_path_at(dfd, filename, lookup_flags, &path);
 	if (error)
 		goto out;
 	error = mnt_want_write(path.mnt);
@@ -677,6 +705,11 @@
 	if (!f.file)
 		goto out;
 
+	if (f.file->f_inode != f.file->f_path.dentry->d_inode) {
+		error = -EACCES; /* Unioned, but can't copy up. */
+		goto out_fput;
+	}
+
 	error = mnt_want_write_file(f.file);
 	if (error)
 		goto out_fput;
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 43de6d6..b21c7a4 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -65,6 +65,8 @@
 		{ MNT_NOATIME, ",noatime" },
 		{ MNT_NODIRATIME, ",nodiratime" },
 		{ MNT_RELATIME, ",relatime" },
+		{ MNT_SHARED, ",shared" },
+		{ MNT_UNBINDABLE, ",nobind" },
 		{ MNT_UNION, ",union" },
 		{ 0, NULL }
 	};
diff --git a/fs/readdir.c b/fs/readdir.c
index fe0f42a..a186f38 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -33,18 +33,30 @@
 	if (res)
 		goto out;
 
-	res = mutex_lock_killable(&inode->i_mutex);
-	if (res)
-		goto out;
+	if (unlikely(IS_DIR_UNIONED(file->f_path.dentry)) &&
+	    !IS_OPAQUE(file->f_path.dentry->d_inode)) {
+		res = mnt_want_write(file->f_path.mnt);
+		if (res < 0)
+			goto out;
+
+		res = mutex_lock_killable(&inode->i_mutex);
+		if (res < 0) {
+			mnt_drop_write(file->f_path.mnt);
+			goto out;
+		}
+
+		res = union_copyup_one_dir(&file->f_path);
+		mnt_drop_write(file->f_path.mnt);
+		if (res < 0)
+			goto out_unlock;
+	} else {
+		res = mutex_lock_killable(&inode->i_mutex);
+		if (res)
+			goto out;
+	}
 
 	res = -ENOENT;
 	if (!IS_DEADDIR(inode)) {
-		if (IS_DIR_UNIONED(file->f_path.dentry)) {
-			res = union_copyup_one_dir(&file->f_path);
-			if (res)
-				goto out_unlock;
-		}
-
 		ctx->pos = file->f_pos;
 		res = file->f_op->iterate(file, ctx);
 		file->f_pos = ctx->pos;
diff --git a/fs/stat.c b/fs/stat.c
index 04ce1ac..8fcf26d 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -17,6 +17,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
+#include "union.h"
 
 void generic_fillattr(struct inode *inode, struct kstat *stat)
 {
@@ -39,18 +40,27 @@
 
 int vfs_getattr(struct path *path, struct kstat *stat)
 {
-	struct inode *inode = path->dentry->d_inode;
+	struct path lower_cache, actual;
+	struct inode *inode;
 	int retval;
 
-	retval = security_inode_getattr(path->mnt, path->dentry);
-	if (retval)
-		return retval;
+	inode = union_get_inode(path, &lower_cache, &actual);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
 
-	if (inode->i_op->getattr)
-		return inode->i_op->getattr(path->mnt, path->dentry, stat);
+	retval = security_inode_getattr(actual.mnt, actual.dentry);
+	if (retval)
+		goto out;
+
+	if (inode->i_op->getattr) {
+		retval = inode->i_op->getattr(actual.mnt, actual.dentry, stat);
+		goto out;
+	}
 
 	generic_fillattr(inode, stat);
-	return 0;
+out:
+	path_put_maybe(&lower_cache);
+	return retval;
 }
 
 EXPORT_SYMBOL(vfs_getattr);
diff --git a/fs/union.c b/fs/union.c
index cfeb65f..7f94fee 100644
--- a/fs/union.c
+++ b/fs/union.c
@@ -13,7 +13,7 @@
  * as published by the Free Software Foundation; version 2
  * of the License.
  */
-
+#define DEBUG
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
@@ -24,8 +24,8 @@
 #include <linux/file.h>
 #include <linux/security.h>
 #include <linux/splice.h>
-#include <linux/xattr.h>
 
+#include "internal.h"
 #include "union.h"
 
 /**
@@ -35,11 +35,9 @@
  * Allocate a union_stack large enough to contain the maximum number
  * of layers in this union mount.
  */
-static struct union_stack *union_alloc(struct path *topmost)
+struct union_stack *union_alloc(struct path *topmost)
 {
 	unsigned int layers = topmost->dentry->d_sb->s_union_count;
-	BUG_ON(!S_ISDIR(topmost->dentry->d_inode->i_mode));
-
 	return kcalloc(sizeof(struct path), layers, GFP_KERNEL);
 }
 
@@ -47,23 +45,37 @@
  * d_free_unions - free all unions for this dentry
  * @dentry: topmost dentry in the union stack to remove
  *
- * This must be called when freeing a dentry.
+ * This must be called when freeing a dentry.  d_inode may point to a defunct
+ * inode or may have been cleared by the time we get here.
  */
 void d_free_unions(struct dentry *topmost)
 {
 	struct path *path;
 	unsigned int i, layers = topmost->d_sb->s_union_count;
 
-	if (!IS_DIR_UNIONED(topmost))
-		return;
+	if (topmost->d_union_stack) {
+		if (topmost->d_flags & DCACHE_UNION_PINNING_LOWER) {
+			/* A negative non-dir upper dentry is pinning
+			 * a single lower dentry so that f_inode
+			 * doesn't have to.
+			 */
+			printk("free pin: %pq\n", &topmost->d_name);
+			dput(topmost->d_fallthru);
+		} else {
+			/* A positive directory dentry is pinning a
+			 * stack of lower dirs.
+			 */
+			printk("free dirstack: %pq\n", &topmost->d_name);
 
-	for (i = 0; i < layers; i++) {
-		path = union_find_dir(topmost, i);
-		if (path->mnt)
-			path_put(path);
+			for (i = 0; i < layers; i++) {
+				path = union_find_dir(topmost, i);
+				if (path->mnt)
+					path_put(path);
+			}
+			kfree(topmost->d_union_stack);
+		}
+		topmost->d_union_stack = NULL;
 	}
-	kfree(topmost->d_union_stack);
-	topmost->d_union_stack = NULL;
 }
 
 /**
@@ -83,6 +95,7 @@
 	struct path *path;
 
 	BUG_ON(layer >= dentry->d_sb->s_union_count);
+	BUG_ON(d_is_fallthru(dentry));
 
 	if (!dentry->d_union_stack)
 		dentry->d_union_stack = union_alloc(topmost);
@@ -96,14 +109,14 @@
 
 /**
  * union_copyup_xattr
- * @old: dentry of original file
  * @new: dentry of new copy
+ * @old: dentry of original file
  *
  * Copy up extended attributes from the original file to the new one.
  *
  * XXX - Permissions?  For now, copying up every xattr.
  */
-static int union_copyup_xattr(struct dentry *old, struct dentry *new)
+static int union_copyup_xattr(struct path *new, struct dentry *old)
 {
 	ssize_t list_size, size;
 	char *buf, *name, *value;
@@ -111,7 +124,7 @@
 
 	/* Check for xattr support */
 	if (!old->d_inode->i_op->getxattr ||
-	    !new->d_inode->i_op->getxattr)
+	    !new->dentry->d_inode->i_op->getxattr)
 		return 0;
 
 	/* Find out how big the list of xattrs is */
@@ -161,7 +174,6 @@
 /**
  * union_create_topmost_dir - Create a matching dir in the topmost file system
  * @parent - parent of target on topmost layer
- * @name - name of target
  * @topmost - path of target on topmost layer
  * @lower - path of source on lower layer
  *
@@ -171,15 +183,17 @@
  * We don't use vfs_mkdir() for a few reasons: don't want to do the security
  * check, don't want to make the dir opaque, don't need to sanitize the mode.
  *
+ * The caller must hold the parent i_mutex lock and the mnt_want_write lock.
+ *
  * XXX - owner is wrong, set credentials properly
  * XXX - rmdir() directory on failure of xattr copyup
  * XXX - not atomic w/ respect to crash
  */
-int union_create_topmost_dir(struct path *parent, struct qstr *name,
-			     struct path *topmost, struct path *lower)
+int union_create_topmost_dir(struct path *parent,
+			     struct path *topmost, struct dentry *lower)
 {
 	struct inode *dir = parent->dentry->d_inode;
-	int mode = lower->dentry->d_inode->i_mode;
+	int mode = lower->d_inode->i_mode;
 	int error;
 
 	BUG_ON(topmost->dentry->d_inode);
@@ -188,32 +202,25 @@
 	if (!dir->i_op->mkdir)
 		return -EPERM;
 
-	error = mnt_want_write(parent->mnt);
+	error = dir->i_op->mkdir(dir, topmost->dentry, mode);
 	if (error)
 		return error;
 
-	error = dir->i_op->mkdir(dir, topmost->dentry, mode);
-	if (error)
-		goto out;
-
-	error = union_copyup_xattr(lower->dentry, topmost->dentry);
+	error = union_copyup_xattr(topmost, lower);
 	if (error)
 		goto out_rmdir;
 
 	fsnotify_mkdir(dir, topmost->dentry);
-
-	mnt_drop_write(parent->mnt);
-
 	return 0;
+
 out_rmdir:
 	/* XXX rm created dir */
 	dput(topmost->dentry);
-out:
-	mnt_drop_write(parent->mnt);
 	return error;
 }
 
-struct union_filldir_info {
+struct union_iterate_context {
+	struct dir_context ctx;
 	struct dentry *topmost_dentry;
 	int error;
 };
@@ -227,8 +234,8 @@
 static int union_copyup_one_dirent(void *buf, const char *name, int namelen,
 				   loff_t offset, u64 ino, unsigned int d_type)
 {
-	struct union_filldir_info *ufi = (struct union_filldir_info *) buf;
-	struct dentry *topmost_dentry = ufi->topmost_dentry;
+	struct union_iterate_context *uic = (struct union_iterate_context *)buf;
+	struct dentry *topmost_dentry = uic->topmost_dentry;
 	struct dentry *dentry;
 	int err = 0;
 
@@ -281,7 +288,7 @@
 	dput(dentry);
 out:
 	if (err)
-		ufi->error = err;
+		uic->error = err;
 	return err;
 }
 
@@ -329,24 +336,26 @@
  */
 int __union_copyup_one_dir(struct path *topmost_path)
 {
-	struct union_filldir_info ufi;
 	struct dentry *topmost_dentry = topmost_path->dentry;
 	unsigned int i, layers = topmost_dentry->d_sb->s_union_count;
 	int error = 0;
 
-	BUG_ON(IS_OPAQUE(topmost_dentry->d_inode));
+	struct union_iterate_context uic = {
+		.ctx.actor = union_copyup_one_dirent,
+		.topmost_dentry = topmost_dentry,
+	};
+
+
+	if (IS_OPAQUE(topmost_dentry->d_inode))
+		return 0;
 
 	if (!topmost_dentry->d_inode->i_op ||
 	    !topmost_dentry->d_inode->i_op->fallthru)
 		return -EOPNOTSUPP;
 
-	error = mnt_want_write(topmost_path->mnt);
-	if (error)
-		return error;
-
 	for (i = 0; i < layers; i++) {
-		struct file * ftmp;
-		struct inode * inode;
+		struct inode *inode;
+		struct file *ftmp;
 		struct path *path;
 
 		path = union_find_dir(topmost_dentry, i);
@@ -356,14 +365,14 @@
 		ftmp = dentry_open(path, O_RDONLY | O_DIRECTORY | O_NOATIME,
 				   current_cred());
 		if (IS_ERR(ftmp)) {
-			printk(KERN_ERR "unable to open dir %s for "
+			printk(KERN_ERR "unable to open dir %pq for "
 			       "directory copyup: %ld\n",
-			       path->dentry->d_name.name, PTR_ERR(ftmp));
+			       &path->dentry->d_name, PTR_ERR(ftmp));
 			error = PTR_ERR(ftmp);
 			break;
 		}
 
-		inode = path->dentry->d_inode;
+		inode = file_inode(ftmp);
 		mutex_lock(&inode->i_mutex);
 
 		error = -ENOENT;
@@ -373,15 +382,15 @@
 		/* Read the whole directory, calling our directory entry copyup
 		 * function on each entry.
 		 */
-		ufi.topmost_dentry = topmost_dentry;
-		ufi.error = 0;
-		error = ftmp->f_op->readdir(ftmp, &ufi, union_copyup_one_dirent);
+		uic.ctx.pos = 0;
+		uic.error = 0;
+		error = ftmp->f_op->iterate(ftmp, &uic.ctx);
 out_fput:
 		mutex_unlock(&inode->i_mutex);
 		fput(ftmp);
 
-		if (ufi.error)
-			error = ufi.error;
+		if (uic.error)
+			error = uic.error;
 		if (error)
 			break;
 
@@ -407,7 +416,6 @@
 		mark_inode_dirty(topmost_dentry->d_inode);
 	}
 
-	mnt_drop_write(topmost_path->mnt);
 	return error;
 }
 
@@ -464,33 +472,213 @@
 }
 EXPORT_SYMBOL(generic_readdir_fallthru);
 
+/*
+ * Get the inode and path for a dentry where that inode may exist on a lower
+ * layer in a union.
+ *
+ * The caller must preclear the elements of *_lower_cache and prime *_actual
+ * with the contents of *upper (as is done by wrappers in union.h) and must
+ * also hold parent->i_mutex.
+ *
+ * Note that we don't get a ref on the inode or the lower vfsmount (if
+ * returned).  We leave it to the caller to iget/mntget them if appropriate.
+ * This should be safe as the caller holds parent->i_mutex.  The lower dentry
+ * (if returned) is dget'd, however.
+ *
+ * The pointers returned in *_actual are not dget'd/mntget'd as it is assumed
+ * they're pinned by the caller's ref on upper->mnt (if set), upper->dentry; or
+ * by the fact that parent->i_mutex is locked and _lower_cache->dentry is
+ * dget'd.
+ */
+struct inode *__union_get_inode_locked(struct dentry *parent,
+				       struct path *upper,
+				       struct path *_lower_cache,
+				       struct path *_actual)
+{
+	const struct union_stack *d;
+	struct dentry *dentry = upper->dentry;
+	struct path lower;
+	unsigned i, layers = parent->d_sb->s_union_count;
+	int ret;
+
+	pr_devel("-->%s(%pq,)\n", __func__, &dentry->d_name);
+
+	BUG_ON(d_is_whiteout(dentry));
+
+	/* Check for a race with copy up. */
+	if (likely(dentry->d_inode)) {
+		pr_devel("<--%s() = upper\n", __func__);
+		*_actual = *upper;
+		return dentry->d_inode;
+	}
+
+	if (dentry->d_flags & DCACHE_UNION_PINNING_LOWER) {
+		pr_devel("<--%s() = fall\n", __func__);
+		smp_rmb();
+		_actual->dentry = dentry->d_fallthru;
+		d = parent->d_union_stack;
+		for (i = 0; i < layers; i++) {
+			if (d->u_dirs[i].dentry == dentry->d_fallthru->d_parent) {
+				_lower_cache->mnt = d->u_dirs[i].mnt;
+				break;
+			}
+		}
+		if (unlikely(!_lower_cache->mnt))
+			goto out_badcache;
+		_actual->mnt = mntget(_lower_cache->mnt);
+		return dentry->d_fallthru->d_inode;
+	}
+
+	/* Search down through the union stack of the parent of the target for
+	 * the lower dentry we're going to use.
+	 */
+	for (i = 0; i < layers; i++) {
+		/* Get the parent directory for this layer and look the target
+		 * up in it.
+		 */
+		const struct path *lower_parent = union_find_dir(parent, i);
+		if (!lower_parent->mnt)
+			continue;
+
+		mutex_lock(&lower_parent->dentry->d_inode->i_mutex);
+		lower.dentry = __lookup_hash(&dentry->d_name,
+					     lower_parent->dentry, 0);
+		mutex_unlock(&lower_parent->dentry->d_inode->i_mutex);
+		if (IS_ERR(lower.dentry)) {
+			ret = PTR_ERR(lower.dentry);
+			goto out_err;
+		}
+
+		/* A negative dentry can mean several things: a plain negative
+		 * dentry is ignored and lookup continues to the next layer,
+		 * but a whiteout or a non-fallthru in an opaque dir covers
+		 * everything below it.
+		 */
+		if (!lower.dentry->d_inode) {
+			if (d_is_whiteout(lower.dentry))
+				goto out_hit_barrier;
+			if (IS_OPAQUE(lower_parent->dentry->d_inode) &&
+			    !d_is_fallthru(lower.dentry))
+				goto out_hit_barrier;
+			dput(lower.dentry);
+			continue;
+		}
+
+		/* TODO: Deal with mountpoints and suchlike */
+		lower.mnt = mntget(lower_parent->mnt);
+		goto out_found_file;
+	}
+
+out_enoent:
+	if (d_is_fallthru(dentry)) {
+		pr_devel("<--%s() = -ENOENT\n", __func__);
+		return ERR_PTR(-ENOENT);
+	}
+	pr_devel("<--%s() = NULL\n", __func__);
+	return NULL;
+
+out_hit_barrier:
+	dput(lower.dentry);
+	goto out_enoent;
+
+out_found_file:
+	*_actual = *_lower_cache = lower;
+	pr_devel("<--%s() = lower\n", __func__);
+	return lower.dentry->d_inode;
+
+out_err:
+	pr_devel("<--%s() = %d\n", __func__, ret);
+	return ERR_PTR(ret);
+
+out_badcache:
+	printk_ratelimited(KERN_WARNING "UNION: Bad cached fallthru (%pq/%pq)\n",
+			   &parent->d_name, &upper->dentry->d_name);
+	return ERR_PTR(-EIO);
+}
+
+/*
+ * Get the inode for a dentry where that inode may exist on a lower layer in a
+ * union.
+ *
+ * Note that we don't get a ref on the inode, so we may need to pin it by
+ * getting a ref on a dentry pointing to it - in which case, a pointer to that
+ * dentry will be returned in *_lower and the caller is expected to dput() the
+ * ref on it.
+ */
+struct inode *__union_get_inode(struct path *upper, struct path *_lower_cache,
+				struct path *_actual)
+{
+	struct dentry *parent, *dentry = upper->dentry;
+	struct inode *inode;
+	int ret;
+
+	pr_devel("-->%s(%pq,)\n", __func__, &dentry->d_name);
+
+	/* We need the parent directory so that we can find the stack of lower
+	 * directories in which to do lookups.  Use the rename mutex to prevent
+	 * rename from getting underfoot whilst we get the parent.
+	 */
+	if (mutex_lock_interruptible(&dentry->d_sb->s_vfs_rename_mutex) < 0)
+		return ERR_PTR(-EINTR);
+
+	parent = dget_parent(dentry);
+	if (IS_OPAQUE(parent->d_inode) && !d_is_fallthru(dentry)) {
+		mutex_unlock(&dentry->d_sb->s_vfs_rename_mutex);
+		inode = NULL;
+	} else {
+		ret = mutex_lock_interruptible(&parent->d_inode->i_mutex);
+		mutex_unlock(&dentry->d_sb->s_vfs_rename_mutex);
+		if (ret < 0) {
+			inode = ERR_PTR(ret);
+		} else {
+			inode = __union_get_inode_locked(parent, upper,
+							 _lower_cache, _actual);
+			mutex_unlock(&parent->d_inode->i_mutex);
+		}
+	}
+	dput(parent);
+	return inode;
+}
+
 /**
  * union_create_file
  * @parent: path of the upper parent directory
+ * @upper: path of the negative dentry to become new file
  * @lower: path of the source file
- * @new: path of the new file, negative dentry
  *
  * Must already have mnt_want_write() on the mnt and the parent's i_mutex.
  */
-static int union_create_file(struct path *parent, struct path *lower,
-			     struct dentry *new)
+static int union_create_file(struct path *parent, struct path *upper,
+			     struct path *lower)
 {
+	struct inode *dir = parent->dentry->d_inode;
+	int ret;
+
 	BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex));
 
-	return vfs_create(parent->dentry->d_inode, new,
-			  lower->dentry->d_inode->i_mode, true);
+	if (!dir->i_op->tmpfile)
+		return -EPERM;
+
+	ret = dir->i_op->tmpfile(dir, upper->dentry,
+				 lower->dentry->d_inode->i_mode);
+	if (ret == 0) {
+		spin_lock(&upper->dentry->d_inode->i_lock);
+		upper->dentry->d_inode->i_state |= I_LINKABLE;
+		spin_unlock(&upper->dentry->d_inode->i_lock);
+	}
+	return ret;
 }
 
 /**
  * union_create_symlink
  * @parent: Upper parent of the symlink
+ * @upper: Path of the negative dentry to become new symlink.
  * @lower: Path of the source symlink
- * @new: Path of the new symlink, negative dentry
  *
  * Must already have mnt_want_write() on the mnt and the parent's i_mutex.
  */
-static int union_create_symlink(struct path *parent, struct path *lower,
-				struct dentry *new)
+static int union_create_symlink(struct path *parent, struct path *upper,
+				struct path *lower)
 {
 	struct inode *inode = lower->dentry->d_inode;
 	char *content;
@@ -507,37 +695,46 @@
 		goto error;
 	content[error] = 0;
 
-	error = vfs_symlink(parent->dentry->d_inode, new, content);
+	error = vfs_symlink(parent->dentry->d_inode, upper->dentry, content);
 error:
 	kfree(content);
 	return error;
 }
 
 /**
- * union_copyup_data - Copy up len bytes of old's data to new
- * @lower: path of source file in lower layer
- * @new_mnt: vfsmount of target file
- * @new_dentry: dentry of target file
- * @len: number of bytes to copy
+ * union_copy_up_data - Copy up len bytes of old's data to new
+ * @path: path of target file
+ * @actual: path of source file in lower layer
+ * @truncate_to: number of bytes to copy (or NULL if all)
  */
-static int union_copyup_data(struct path *lower, struct path *new_path,
-			     size_t len)
+static int union_copy_up_data(struct path *path, struct path *actual,
+			      const loff_t *truncate_to)
 {
 	const struct cred *cred = current_cred();
 	struct file *lower_file;
 	struct file *new_file;
-	loff_t offset = 0;
+	loff_t filesize, offset = 0;
+	size_t len;
 	long bytes;
 	int error = 0;
 
+	filesize = i_size_read(actual->dentry->d_inode);
+	if (truncate_to && *truncate_to < filesize)
+		filesize = *truncate_to;
+
+	/* Check for overflow of file size */
+	len = filesize;
+	if (len != filesize)
+		return -EFBIG;
+
 	if (len == 0)
 		return 0;
 
-	lower_file = dentry_open(lower, O_RDONLY, cred);
+	lower_file = dentry_open(actual, O_RDONLY, cred);
 	if (IS_ERR(lower_file))
 		return PTR_ERR(lower_file);
 
-	new_file = dentry_open(new_path, O_WRONLY, cred);
+	new_file = dentry_open(path, O_WRONLY, cred);
 	if (IS_ERR(new_file)) {
 		error = PTR_ERR(new_file);
 		goto out_fput;
@@ -554,139 +751,311 @@
 	return error;
 }
 
-/**
- * union_copyup_file - Copy up a regular file, symlink or special file
- * @parent: Parent dir on upper fs
- * @lower: path of file to be copied up
- * @dentry: dentry to copy up to
- * @len: number of bytes of file data to copy up
+/*
+ * Create a temporary file.  We don't want to inline this as it uses quite a
+ * lot of stack space.
+ *
+ * The caller should make sure _tmpfile->mnt is set to the upper vfsmount and
+ * that ->dentry is NULL.
+ *
+ * Note: we don't return with a ref on _tmpfile->mnt as path is holding a ref.
+ * Further, we may return with a dentry in _tmpfile->dentry that needs
+ * dput'ing, even if an error occurred.
  */
-static int union_copyup_file(struct path *parent, struct path *lower,
-			     struct dentry *dentry, size_t len)
+static int union_create_tmpfile(struct path *parent, struct path *path,
+				struct path *actual, struct path *_tmpfile)
+{
+	static const struct qstr nameless = { .name = "", .len = 0, .hash = 0 };
+	struct dentry *dentry;
+	int ret;
+
+	pr_devel("-->%s(%pq)\n",
+		 __func__, &path->dentry->d_name);
+
+	/* Create a nameless file not directly attached to the parent
+	 * directory, but still associated with it for layout optimisation
+	 * reasons.  The upperfs should check for the file being of zero
+	 * length.
+	 * 
+	 * We will then hard link the file into place when we're done copying
+	 * up - and mount/fsck will clean it up in the event of a crash and
+	 * dget() will clean it up in the event of an error.
+	 */
+	mutex_lock(&parent->dentry->d_inode->i_mutex);
+
+	dentry = d_alloc(parent->dentry, &nameless);
+	if (!IS_ERR(dentry)) {
+		_tmpfile->dentry = dentry;
+		if (S_ISREG(actual->dentry->d_inode->i_mode))
+			ret = union_create_file(parent, _tmpfile, actual);
+		else if (S_ISLNK(actual->dentry->d_inode->i_mode))
+			ret = union_create_symlink(parent, _tmpfile, actual);
+		else
+			BUG();
+	} else {
+		ret = PTR_ERR(dentry);
+	}
+
+	mutex_unlock(&parent->dentry->d_inode->i_mutex);
+	pr_devel("<--%s() = %d\n", __func__, ret);
+	return ret;
+}
+
+/**
+ * Copy up a file or symlink to a temporary file in the specially prepared
+ * directory and return the dentry of that.
+ */
+static int union_copy_up_to_tmpfile(struct path *parent, struct path *path,
+				    struct path *actual, struct path *_tmpfile,
+				    const loff_t *truncate_to)
+{
+	struct dentry *dentry = actual->dentry;
+	int ret;
+
+	ret = union_create_tmpfile(parent, path, actual, _tmpfile);
+
+	if (ret == 0 && S_ISREG(dentry->d_inode->i_mode))
+		ret = union_copy_up_data(_tmpfile, actual, truncate_to);
+	if (ret == 0)
+		ret = union_copyup_xattr(_tmpfile, actual->dentry);
+	return ret;
+}
+
+/*
+ * Create a hardlink from the temporary file to the actual location.
+ */
+static int union_hard_link_to_tmpfile(struct path *parent, struct path *path,
+				      struct path *tmpfile)
+{
+	int ret;
+
+	pr_devel("-->%s(%pq,%pq,%pq)\n",
+		 __func__, &parent->dentry->d_name, &path->dentry->d_name,
+		 &tmpfile->dentry->d_name);
+
+	mutex_lock(&parent->dentry->d_inode->i_mutex);
+	ret = vfs_link(tmpfile->dentry, parent->dentry->d_inode, path->dentry);
+	mutex_unlock(&parent->dentry->d_inode->i_mutex);
+	return ret;
+}
+
+/**
+ * union_copy_up_via_tmpfile - Copy up lower file via temporary file
+ *
+ * Copy up a file or symlink to a temporary file in the specially prepared
+ * directory, then hard link across and unlink the temp file.
+ */
+static int union_copy_up_via_tmpfile(struct path *parent, struct path *path,
+				     struct path *actual, const loff_t *truncate_to)
 {
 	const struct cred *saved_cred;
 	struct cred *override_cred;
-	struct path to;
-	int error;
+	struct path tmpfile = { .mnt = path->mnt, .dentry = NULL };
+	int ret;
 
-	printk("-->union_copyup_file(,%s,%s,%zu)\n",
-	       lower->dentry->d_name.name, dentry->d_name.name, len);
-
-	BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex));
+	pr_devel("-->%s(,%pq,%pq,%pq,,%lld)\n",
+		 __func__, &parent->dentry->d_name, &path->dentry->d_name,
+		 &actual->dentry->d_name, truncate_to ? *truncate_to : -1);
 
 	override_cred = prepare_kernel_cred(NULL);
 	if (!override_cred)
 		return -ENOMEM;
 
-	override_cred->fsuid = lower->dentry->d_inode->i_uid;
-	override_cred->fsgid = lower->dentry->d_inode->i_gid;
+	override_cred->fsuid = actual->dentry->d_inode->i_uid;
+	override_cred->fsgid = actual->dentry->d_inode->i_gid;
 
 	saved_cred = override_creds(override_cred);
 
-	if (S_ISREG(lower->dentry->d_inode->i_mode)) {
-		error = union_create_file(parent, lower, dentry);
-		if (error)
-			goto out;
-		to.mnt = parent->mnt;
-		to.dentry = dentry;
-		error = union_copyup_data(lower, &to, len);
-	} else if (S_ISLNK(lower->dentry->d_inode->i_mode)) {
-		error = union_create_symlink(parent, lower, dentry);
-		goto out;
-	} else {
-		/* Don't currently support copyup of special files, though in
-		 * theory there's no reason we couldn't at least copy up
-		 * blockdev, chrdev and FIFO files
-		 */
-		error = -EXDEV;
-		goto out;
-	}
-	if (error)
-		/* Most likely error: ENOSPC */
-		vfs_unlink(parent->dentry->d_inode, dentry);
+	ret = union_copy_up_to_tmpfile(parent, path, actual, &tmpfile,
+				       truncate_to);
 
-out:
+	if (ret == 0)
+		ret = union_hard_link_to_tmpfile(parent, path, &tmpfile);
+
+	/* Discard the temporary dentry */
+	dput(tmpfile.dentry);
+
 	revert_creds(saved_cred);
+
 	put_cred(override_cred);
-	printk("<--union_copyup_file() = %d\n", error);
-	return error;
+	pr_devel("<--%s() = %d\n", __func__, ret);
+	return ret;
+}
+
+/*
+ * Make copy-up an exclusive operation on a file.  The caller must have the
+ * parent i_mutex locked - which we will unlock during this function.
+ */
+static int __union_copy_up_exclusive(struct path *parent, struct path *path,
+				     struct path *actual, const loff_t *truncate_to)
+	__releases(parent->dentry->d_inode->i_mutex)
+{
+	struct dentry *upper = path->dentry;
+	int ret;
+
+	spin_lock(&upper->d_lock);
+	if (upper->d_flags & DCACHE_UNION_COPYING_UP) {
+		/* Copy up already in progress */
+		spin_unlock(&upper->d_lock);
+		mutex_unlock(&parent->dentry->d_inode->i_mutex);
+		pr_devel("UNION: wait on copyup\n");
+
+		/* Abuse the bit-wait system to get hold of a waitqueue we can
+		 * use (d_flags may be smaller than an unsigned long).
+		 */
+		do {
+			wait_queue_head_t *wq =
+				bit_waitqueue(&upper->d_flags, ilog2(DCACHE_UNION_COPYING_UP));
+			DEFINE_WAIT(__wait);
+
+			ret = -EAGAIN;
+			for (;;) {
+				prepare_to_wait(wq, &__wait, TASK_INTERRUPTIBLE);
+				if (!(upper->d_flags & DCACHE_UNION_COPYING_UP))
+					break;
+				if (!signal_pending(current)) {
+					schedule();
+					continue;
+				}
+				ret = -ERESTARTSYS;
+				break;
+			}
+			finish_wait(wq, &__wait);
+		} while (0);
+		return ret; /* There might have been an error or a signal */
+	}
+
+	/* Commence copying up.
+	 *
+	 * Mark the dentry so that other potential copy-uppers will wait for us
+	 * and drop the locks so that we can use splice.
+	 */
+	upper->d_flags |= DCACHE_UNION_COPYING_UP;
+	spin_unlock(&upper->d_lock);
+	mutex_unlock(&parent->dentry->d_inode->i_mutex);
+
+	pr_devel("UNION: copyup begin\n");
+	ret = union_copy_up_via_tmpfile(parent, path, actual, truncate_to);
+	pr_devel("UNION: copyup done\n");
+
+	spin_lock(&upper->d_lock);
+	upper->d_flags &= ~DCACHE_UNION_COPYING_UP;
+	spin_unlock(&upper->d_lock);
+
+	wake_up_bit(&upper->d_flags, ilog2(DCACHE_UNION_COPYING_UP));
+	return 0;
 }
 
 /**
- * union_copyup - Copy up a file and len bytes of data
- * @parent: Parent dir on upper fs
- * @path: Path of file to be copied up from
- * @copy_all: Copy all the file (if true) or just @len bytes of it
- * @len: Amount of file data to copy up
- *
- * Parent's i_mutex must be held by caller.  Newly copied up path is
- * returned in @path and original is path_put().
- *
- * NOTE!  If a copy up takes place, path->mnt will be changed to the same as
- * the topmost dir, but won't have a ref taken on it.
+ * __union_copy_up - Copy a non-directory file up to the upper layer.
  */
-int union_copyup(struct path *parent, struct path *path,
-		 bool copy_all, size_t len)
+int __union_copy_up(struct path *path, struct path *actual, const loff_t *truncate_to)
 {
-	struct dentry *top_dentry;
-	int error;
+	struct dentry *upper = path->dentry;
+	struct path parent;
+	int ret;
 
-	pr_devel("-->%s(%s,%s)\n", __func__,
-		 parent->dentry->d_name.name,
-		 path->dentry->d_name.name);
+	pr_devel("-->%s(%pq)\n", __func__, &path->dentry->d_name);
 
-	BUG_ON(!mutex_is_locked(&parent->dentry->d_inode->i_mutex));
+	/* We don't currently support copyup of special files, though in theory
+	 * there's no reason we couldn't at least copy up blockdev and chrdev
+	 * files.  FIFO files are problematic if open.  Socket files are
+	 * managed by AF_UNIX and would need help from there.  Directories are
+	 * handled by pathwalk.
+	 */
+	if (!S_ISREG(actual->dentry->d_inode->i_mode) &&
+	    !S_ISLNK(actual->dentry->d_inode->i_mode))
+		return -EACCES;
 
-	if (!IS_DIR_UNIONED(parent->dentry) || parent->mnt == path->mnt)
-		return 0;
-
-	BUG_ON(!S_ISDIR(parent->dentry->d_inode->i_mode));
-	if (IS_DEADDIR(parent->dentry->d_inode))
-		return -ENOENT;
-
-	if (copy_all && S_ISREG(path->dentry->d_inode->i_mode)) {
-		loff_t filesize = i_size_read(path->dentry->d_inode);
-		/* Check for overflow of file size */
-		if ((ssize_t)filesize != filesize)
-			return -EFBIG;
-		len = filesize;
-	}
-
-	top_dentry = lookup_one_len(path->dentry->d_name.name, parent->dentry,
-				    path->dentry->d_name.len);
-	if (IS_ERR(top_dentry))
-		return PTR_ERR(top_dentry);
-
-	if (top_dentry->d_inode) {
-		/* We raced with someone else and "lost".  That's okay, they
-		 * did all the work of copying up the file.
-		 *
-		 * Note that currently data copyup happens under the parent
-		 * dir's i_mutex.  If we move it outside that, we'll need some
-		 * way of waiting for the data copyup to complete here.
+	parent.mnt = path->mnt;
+	do {
+		/* We need to get the parent directory and then we need to lock
+		 * it.  Use the rename mutex to prevent rename from getting
+		 * underfoot whilst we do this.
 		 */
-		pr_devel("<--%s() = 0 [lost]\n", __func__);
-		return 0;
-	}
+		if (mutex_lock_interruptible(&upper->d_sb->s_vfs_rename_mutex) < 0)
+			return -EINTR;
 
-	error = 0;
-	if (!S_ISREG(path->dentry->d_inode->i_mode) &&
-	    !S_ISLNK(path->dentry->d_inode->i_mode))
-		goto out_dput;
+		if (upper->d_inode) {
+			mutex_unlock(&upper->d_sb->s_vfs_rename_mutex);
+			goto already_copied_up;
+		}
 
-	pr_devel("- copy!\n");
-	error = union_copyup_file(parent, path, top_dentry, len);
-	if (error < 0)
-		goto out_dput;
-	pr_devel("- copied\n");
+		parent.dentry = dget_parent(upper);
+		BUG_ON(IS_OPAQUE(parent.dentry->d_inode) && !d_is_fallthru(upper));
+		BUG_ON(d_is_whiteout(upper));
 
-	path_put(path);
-	path->mnt = parent->mnt;
-	path->dentry = top_dentry;
+		ret = mutex_lock_interruptible(&parent.dentry->d_inode->i_mutex);
+		mutex_unlock(&upper->d_sb->s_vfs_rename_mutex);
+		if (ret < 0) {
+			dput(parent.dentry);
+			goto out;
+		}
+
+		if (upper->d_inode)
+			goto already_copied_up_unlock;
+
+		/* Do the copy up (unlocks the parent) */
+		ret = __union_copy_up_exclusive(&parent, path, actual, truncate_to);
+		dput(parent.dentry);
+	} while (ret == -EAGAIN);
+
+out:
+	pr_devel("<--%s() = %d\n", __func__, ret);
+	return ret;
+
+already_copied_up_unlock:
+	mutex_unlock(&parent.dentry->d_inode->i_mutex);
+	dput(parent.dentry);
+already_copied_up:
+	pr_devel("<--%s() = 0 [already done]\n", __func__);
+	*actual = *path;
 	return 0;
+}
 
-out_dput:
-	dput(top_dentry);
-	pr_devel("<--%s() = %d\n", __func__, error);
-	return error;
+/*
+ * Copy up a file for do last.  This gives us the parent, but we still
+ * need to work out the lower dentry.
+ */
+int __union_copy_up_for_do_last(struct path *parent, struct path *path,
+				bool will_truncate)
+{
+	struct path lower_cache, actual;
+	struct inode *inode;
+	loff_t zero = 0;
+	int ret;
+
+	pr_devel("-->%s(%pq)\n", __func__, &path->dentry->d_name);
+
+	do {
+		ret = mutex_lock_interruptible(&parent->dentry->d_inode->i_mutex);
+		if (ret < 0)
+			return ret;
+
+		/* Check to see if we raced with another copy-up or an unlink */
+		ret = 0;
+		if (path->dentry->d_parent != parent->dentry ||
+		    path->dentry->d_inode)
+			goto unlock_out;
+
+		inode = union_get_inode_locked(parent->dentry, path,
+					       &lower_cache, &actual);
+		if (IS_ERR(inode)) {
+			ret = PTR_ERR(inode);
+			goto unlock_out;
+		}
+
+		/* Do the copy up (unlocks the parent). */
+		ret = __union_copy_up_exclusive(parent, path, &actual,
+						will_truncate ? &zero : 0);
+		path_put_maybe(&lower_cache);
+	} while (ret == -EAGAIN);
+
+	pr_devel("<--%s() = %d [post]\n", __func__, ret);
+	return ret;
+
+unlock_out:
+	mutex_unlock(&parent->dentry->d_inode->i_mutex);
+	pr_devel("<--%s() = %d [pre]\n", __func__, ret);
+	return ret;
 }
diff --git a/fs/union.h b/fs/union.h
index 3ff82ae..7413cb0 100644
--- a/fs/union.h
+++ b/fs/union.h
@@ -14,9 +14,10 @@
  * of the License.
  */
 
+#include <linux/mount.h>
+
 #ifdef CONFIG_UNION_MOUNT
 
-#include <linux/mount.h>
 #include <linux/dcache.h>
 #include <linux/namei.h>
 #include <linux/path.h>
@@ -76,8 +77,7 @@
 }
 
 
-extern int union_create_topmost_dir(struct path *, struct qstr *, struct path *,
-				    struct path *);
+extern int union_create_topmost_dir(struct path *, struct path *, struct dentry *);
 
 /*
  * Determine whether we need to perform unionmount traversal or the copyup of a
@@ -109,8 +109,6 @@
 }
 
 extern int __union_copyup_one_dir(struct path *);
-extern int union_copyup(struct path *parent, struct path *path,
-			bool copy_all, size_t len);
 
 #else /* CONFIG_UNION_MOUNT */
 
@@ -151,12 +149,6 @@
 	return 0;
 }
 
-static inline int union_copyup(struct path *parent, struct path *path,
-			       bool copy_all, size_t len)
-{
-	return 0;
-}
-
 #endif	/* CONFIG_UNION_MOUNT */
 
 /*
@@ -169,3 +161,213 @@
 		return 0;
 	return __union_copyup_one_dir(path);
 }
+
+extern struct inode *__union_get_inode_locked(struct dentry *parent,
+					      struct path *upper,
+					      struct path *_lower_cache,
+					      struct path *_actual);
+extern struct inode *__union_get_inode(struct path *upper,
+				       struct path *_lower_cache,
+				       struct path *_actual);
+extern int __union_copy_up(struct path *path, struct path *actual,
+			   const loff_t *truncate_to);
+
+extern int __union_copy_up_locked(struct path *parent, struct path *path,
+				  struct path *actual,
+				  const loff_t *truncate_to);
+
+static inline void path_put_maybe(struct path *path)
+{
+	/* These optimise away if CONFIG_UNION_MOUNT=n */
+	if (unlikely(path->dentry))
+		dput(path->dentry);
+	if (unlikely(path->mnt))
+		mntput(path->mnt);
+}
+
+/**
+ * union_get_inode_locked - Get the actual inode and dentry for a dentry
+ * @parent: The locked parent of the object we're interested in.
+ * @path: The object we're interested in.
+ * @_lower_cache: Cache for lower dentry pinning.
+ * @_actual: The point actually corresponding to the returned inode.
+ *
+ * Gets the inode to be used for a dentry where that inode may exist on a lower
+ * layer in a union.  Note that we don't get a ref on the inode, so to pin it
+ * temporarily, we may point *_lower at the lower dentry.
+ *
+ * The caller must hold i_mutex on the parent.
+ *
+ * Returns a pointer to the inode to use if a positive dentry is found, NULL if
+ * a negative dentry is found and an error if lookup in the lower layers
+ * failed.
+ *
+ * On a successful return (positive or negative dentry), *_actual will be set
+ * to point to the dentry that we determined was the one of interest.  This
+ * does not hold any refs of its own.
+ *
+ * The caller should call path_put_maybe() on *_lower_cache to clear any pins
+ * it may contain.
+ */
+static inline struct inode *union_get_inode_locked(struct dentry *parent,
+						   struct path *path,
+						   struct path *_lower_cache,
+						   struct path *_actual)
+{
+	/* Optimise for the non-unionmount case. */
+	_lower_cache->dentry = NULL;
+	_lower_cache->mnt = NULL;
+	*_actual = *path;
+
+#ifndef CONFIG_UNION_MOUNT
+	return path->dentry->d_inode;
+#else
+	/* The normal case is that the inode is right where we expect... */
+	if (likely(path->dentry->d_inode))
+		return path->dentry->d_inode;
+
+	/* ... or the dentry is ordinarily negative. */
+	if (likely(!path->dentry->d_sb->s_union_lower_mnts))
+		return NULL;
+
+	if (d_is_whiteout(path->dentry) ||
+	    (!d_is_fallthru(path->dentry) && IS_OPAQUE(parent->d_inode)))
+		return NULL;
+
+	/* We have to lock the parent and do a lookup. */
+	return __union_get_inode_locked(parent, path, _lower_cache, _actual);
+#endif
+}
+
+/**
+ * union_get_inode - Get the actual inode and dentry for an object
+ * @path: The object we're interested in.
+ * @_lower_cache: Cache for lower dentry pinning.
+ * @_actual: The point actually corresponding to the returned inode.
+ *
+ * Gets the inode to be used for a dentry where that inode may exist on a lower
+ * layer in a union.  Note that we don't get a ref on the inode, so to pin it
+ * temporarily, we may return a dentry in *_lower.
+ *
+ * Returns a pointer to the inode to use if a positive dentry is found, NULL if
+ * a negative dentry is found and an error if lookup in the lower layers
+ * failed.
+ *
+ * On a successful return (positive or negative dentry), *_actual will be set
+ * to point to the dentry that we determined was the one of interest.  This
+ * does not have its own ref taken and thus does not need to be dput().
+ */
+static inline struct inode *union_get_inode(struct path *path,
+					    struct path *_lower_cache,
+					    struct path *_actual)
+{
+	_lower_cache->mnt = NULL;
+	_lower_cache->dentry = NULL;
+	*_actual = *path;
+
+#ifndef CONFIG_UNION_MOUNT
+	return path->dentry->d_inode;
+#else
+	/* The normal case is that the inode is right where we expect... */
+	if (likely(path->dentry->d_inode))
+		return path->dentry->d_inode;
+
+	/* ... or the dentry is ordinarily negative. */
+	if (likely(!path->dentry->d_sb->s_union_lower_mnts))
+		return NULL;
+
+	if (d_is_whiteout(path->dentry))
+		return NULL;
+
+	/* We have to lock the parent and do a lookup. */
+	return __union_get_inode(path, _lower_cache, _actual);
+#endif
+}
+
+/**
+ * union_truncated_copy_up - If needed, partially copy up a file (truncate)
+ * path: The target object.
+ * lower: The lower dentry (or NULL) from union_get_inode().
+ * truncate_to: The amount to copy up.
+ */
+static inline int union_truncated_copy_up(struct path *path, struct path *actual,
+					  const loff_t *truncate_to)
+{
+#ifdef CONFIG_UNION_MOUNT
+	if (unlikely(!path->dentry->d_inode))
+		return __union_copy_up(path, actual, truncate_to);
+#endif
+	return 0;
+}
+
+/**
+ * union_copy_up - If needed, copy up a file in its entirety
+ * path: The target object.
+ * lower: The lower dentry (or NULL) from union_get_inode().
+ */
+static inline int union_copy_up(struct path *path, struct path *actual)
+{
+#ifdef CONFIG_UNION_MOUNT
+	if (unlikely(!path->dentry->d_inode))
+		return __union_copy_up(path, actual, NULL);
+#endif
+	return 0;
+}
+
+/**
+ * union_copy_up_locked - If needed, copy up a file, caller holds parent lock
+ * parent: The parent directory of the target object
+ * path: The target object.
+ * lower: The lower dentry (or NULL) from union_get_inode().
+ *
+ * The parent must hold i_mutex on the parent directory.
+ */
+static inline int union_copy_up_locked(struct path *parent, struct path *path,
+				       struct path *actual)
+{
+#ifdef CONFIG_UNION_MOUNT
+	if (unlikely(!path->dentry->d_inode))
+	//	return __union_copy_up_locked(parent, path, actual, true, 0);
+		return -ENOANO;
+#endif
+	return 0;
+	
+}
+
+extern int __union_copy_up_for_do_last(struct path *, struct path *, bool);
+
+/**
+ * union_copy_up_do_last - If needed, copy up a file (maybe truncated)
+ * path: The target object.
+ * lower: The lower dentry (or NULL) from union_get_inode().
+ * will_truncate: Whether to honour O_TRUNC or not.
+ */
+static inline int union_copy_up_for_do_last(struct path *parent, struct path *path,
+					    bool will_truncate)
+{
+#ifdef CONFIG_UNION_MOUNT
+	if (unlikely(!path->dentry->d_inode))
+		return __union_copy_up_for_do_last(parent, path, will_truncate);
+#endif
+	return 0;
+}
+
+static inline bool d_is_unioned(const struct dentry *dentry, const struct path *actual)
+{
+#ifndef CONFIG_UNION_MOUNT
+	return false;
+#else
+	return unlikely(dentry != actual->dentry);
+#endif
+}
+
+static inline bool is_unioned(const struct dentry *dentry, const struct inode *inode)
+{
+#ifndef CONFIG_UNION_MOUNT
+	return false;
+#else
+	return unlikely(dentry->d_inode != inode);
+#endif
+}
+
+extern struct union_stack *union_alloc(struct path *topmost);
diff --git a/fs/utimes.c b/fs/utimes.c
index 51800b3..17eb48a 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -10,6 +10,8 @@
 #include <linux/syscalls.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
+#include "internal.h"
+#include "union.h"
 
 #ifdef __ARCH_WANT_SYS_UTIME
 
@@ -52,11 +54,18 @@
 {
 	int error;
 	struct iattr newattrs;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode;
+	struct path lower_cache, actual;
+
+	inode = union_get_inode(path, &lower_cache, &actual);
+	if (IS_ERR(inode)) {
+		error = PTR_ERR(inode);
+		goto out;
+	}
 
 	error = mnt_want_write(path->mnt);
 	if (error)
-		goto out;
+		goto out_put_cache;
 
 	if (times && times[0].tv_nsec == UTIME_NOW &&
 		     times[1].tv_nsec == UTIME_NOW)
@@ -96,17 +105,34 @@
 			goto mnt_drop_write_and_out;
 
 		if (!inode_owner_or_capable(inode)) {
-			error = inode_permission(inode, MAY_WRITE);
-			if (error)
-				goto mnt_drop_write_and_out;
+			/* We have to be able to write to the upperfs. */
+			if (d_is_unioned(path->dentry, &actual)) {
+				error = -EROFS;
+				if (path->dentry->d_sb->s_flags & MS_RDONLY)
+					goto mnt_drop_write_and_out;
+				error = __inode_permission(inode, MAY_WRITE);
+				if (error)
+					goto mnt_drop_write_and_out;
+			} else {
+				error = inode_permission(inode, MAY_WRITE);
+				if (error)
+					goto mnt_drop_write_and_out;
+			}
 		}
 	}
+
+	error = union_copy_up(path, &actual);
+	if (error)
+		goto mnt_drop_write_and_out;
+
 	mutex_lock(&inode->i_mutex);
 	error = notify_change(path->dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
 
 mnt_drop_write_and_out:
 	mnt_drop_write(path->mnt);
+out_put_cache:
+	path_put_maybe(&lower_cache);
 out:
 	return error;
 }
@@ -154,7 +180,7 @@
 		fdput(f);
 	} else {
 		struct path path;
-		int lookup_flags = LOOKUP_COPY_UP;
+		int lookup_flags = 0;
 
 		if (!(flags & AT_SYMLINK_NOFOLLOW))
 			lookup_flags |= LOOKUP_FOLLOW;
diff --git a/fs/xattr.c b/fs/xattr.c
index 0fe9e14..d81ece1 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -23,13 +23,19 @@
 #include <linux/posix_acl_xattr.h>
 
 #include <asm/uaccess.h>
+#include "internal.h"
+#include "union.h"
 
 /*
  * Check permissions for extended attribute access.  This is a bit complicated
  * because different namespaces have very different rules.
+ *
+ * Note: in unionmount conditions, dentry must be on the _upper_ layer whilst
+ * inode may be on the lower.
  */
 static int
-xattr_permission(struct inode *inode, const char *name, int mask)
+xattr_permission(struct dentry *dentry, struct inode *inode, const char *name,
+		 int mask)
 {
 	/*
 	 * We can never set or remove an extended attribute on a read-only
@@ -70,7 +76,13 @@
 			return -EPERM;
 	}
 
-	return inode_permission(inode, mask);
+	if (is_unioned(dentry, inode)) {
+		if (mask & MAY_WRITE && dentry->d_sb->s_flags & MS_RDONLY)
+			return -EROFS;
+		return __inode_permission(inode, mask);
+	} else {
+		return inode_permission(inode, mask);
+	}
 }
 
 /**
@@ -87,7 +99,7 @@
  *
  *  This function requires the caller to lock the inode's i_mutex before it
  *  is executed. It also assumes that the caller will make the appropriate
- *  permission checks.
+ *  permission checks.  The caller must also have copied up for unionmount.
  */
 int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
 		const void *value, size_t size, int flags)
@@ -97,6 +109,8 @@
 	int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
 				   XATTR_SECURITY_PREFIX_LEN);
 
+	if (!inode)
+		return -ENOENT;
 	if (issec)
 		inode->i_flags &= ~S_NOSEC;
 	if (inode->i_op->setxattr) {
@@ -119,25 +133,43 @@
 
 
 int
-vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+vfs_setxattr(struct path *path, const char *name, const void *value,
 		size_t size, int flags)
 {
-	struct inode *inode = dentry->d_inode;
+	struct path lower_cache, actual;
+	struct inode *inode;
 	int error;
 
-	error = xattr_permission(inode, name, MAY_WRITE);
+	inode = union_get_inode(path, &lower_cache, &actual);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+again:
+	error = xattr_permission(actual.dentry, inode, name, MAY_WRITE);
 	if (error)
-		return error;
+		goto out_lower;
 
-	mutex_lock(&inode->i_mutex);
-	error = security_inode_setxattr(dentry, name, value, size, flags);
+	mutex_lock_nested(&inode->i_mutex, 1);
+	error = security_inode_setxattr(actual.dentry, name, value, size, flags);
 	if (error)
-		goto out;
+		goto out_unlock;
 
-	error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
+	if (d_is_unioned(path->dentry, &actual)) {
+		/* Unionmounted */
+		mutex_unlock(&inode->i_mutex);
+		error = union_copy_up(path, &actual);
+		if (error)
+			goto out_lower;
+		inode = actual.dentry->d_inode;
+		goto again;
+	}
 
-out:
+	error = __vfs_setxattr_noperm(actual.dentry, name, value, size, flags);
+
+out_unlock:
 	mutex_unlock(&inode->i_mutex);
+out_lower:
+	path_put_maybe(&lower_cache);
 	return error;
 }
 EXPORT_SYMBOL_GPL(vfs_setxattr);
@@ -185,7 +217,7 @@
 	char *value = *xattr_value;
 	int error;
 
-	error = xattr_permission(inode, name, MAY_READ);
+	error = xattr_permission(dentry, inode, name, MAY_READ);
 	if (error)
 		return error;
 
@@ -230,86 +262,121 @@
 ssize_t
 vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
 {
-	struct inode *inode = dentry->d_inode;
-	int error;
+	struct inode *inode;
+	struct path lower_cache, actual;
+	struct path path = { .dentry = dentry };
+	ssize_t error;
 
-	error = xattr_permission(inode, name, MAY_READ);
-	if (error)
-		return error;
+	inode = union_get_inode(&path, &lower_cache, &actual);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
 
-	error = security_inode_getxattr(dentry, name);
+	error = xattr_permission(dentry, inode, name, MAY_READ);
 	if (error)
-		return error;
+		goto out_dput;
+
+	error = security_inode_getxattr(actual.dentry, name);
+	if (error)
+		goto out_dput;
 
 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
 				XATTR_SECURITY_PREFIX_LEN)) {
 		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
-		int ret = xattr_getsecurity(inode, suffix, value, size);
+		ssize_t ret = xattr_getsecurity(inode, suffix, value, size);
 		/*
 		 * Only overwrite the return value if a security module
 		 * is actually active.
 		 */
 		if (ret == -EOPNOTSUPP)
 			goto nolsm;
-		return ret;
+		error = ret;
+		goto out_dput;
 	}
 nolsm:
 	if (inode->i_op->getxattr)
-		error = inode->i_op->getxattr(dentry, name, value, size);
+		error = inode->i_op->getxattr(actual.dentry, name, value, size);
 	else
 		error = -EOPNOTSUPP;
 
+out_dput:
+	path_put_maybe(&lower_cache);
 	return error;
 }
 EXPORT_SYMBOL_GPL(vfs_getxattr);
 
 ssize_t
-vfs_listxattr(struct dentry *d, char *list, size_t size)
+vfs_listxattr(struct dentry *dentry, char *list, size_t size)
 {
+	struct inode *inode;
+	struct path lower_cache, actual;
+	struct path path = { .dentry = dentry };
 	ssize_t error;
 
-	error = security_inode_listxattr(d);
-	if (error)
-		return error;
-	error = -EOPNOTSUPP;
-	if (d->d_inode->i_op->listxattr) {
-		error = d->d_inode->i_op->listxattr(d, list, size);
-	} else {
-		error = security_inode_listsecurity(d->d_inode, list, size);
-		if (size && error > size)
-			error = -ERANGE;
+	inode = union_get_inode(&path, &lower_cache, &actual);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	error = security_inode_listxattr(actual.dentry);
+	if (!error) {
+		error = -EOPNOTSUPP;
+		if (inode->i_op->listxattr) {
+			error = inode->i_op->listxattr(actual.dentry, list, size);
+		} else {
+			error = security_inode_listsecurity(inode, list, size);
+			if (size && error > size)
+				error = -ERANGE;
+		}
 	}
+	path_put_maybe(&lower_cache);
 	return error;
 }
 EXPORT_SYMBOL_GPL(vfs_listxattr);
 
 int
-vfs_removexattr(struct dentry *dentry, const char *name)
+vfs_removexattr(struct path *path, const char *name)
 {
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode;
+	struct path lower_cache, actual;
 	int error;
 
-	if (!inode->i_op->removexattr)
-		return -EOPNOTSUPP;
+	inode = union_get_inode(path, &lower_cache, &actual);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
 
-	error = xattr_permission(inode, name, MAY_WRITE);
+again:
+	error = -EOPNOTSUPP;
+	if (!inode->i_op->removexattr)
+		goto out;
+
+	error = xattr_permission(path->dentry, inode, name, MAY_WRITE);
 	if (error)
-		return error;
+		goto out;
 
 	mutex_lock(&inode->i_mutex);
-	error = security_inode_removexattr(dentry, name);
+	error = security_inode_removexattr(actual.dentry, name);
 	if (error) {
 		mutex_unlock(&inode->i_mutex);
-		return error;
+		goto out;
 	}
 
-	error = inode->i_op->removexattr(dentry, name);
+	if (d_is_unioned(path->dentry, &actual)) {
+		mutex_unlock(&inode->i_mutex);
+		error = union_copy_up(path, &actual);
+		if (error)
+			goto out;
+		inode = actual.dentry->d_inode;
+		goto again;
+	}
+
+	error = inode->i_op->removexattr(actual.dentry, name);
 	mutex_unlock(&inode->i_mutex);
 
 	if (!error) {
-		fsnotify_xattr(dentry);
-		evm_inode_post_removexattr(dentry, name);
+		fsnotify_xattr(actual.dentry);
+		evm_inode_post_removexattr(actual.dentry, name);
 	}
+out:
+	path_put_maybe(&lower_cache);
 	return error;
 }
 EXPORT_SYMBOL_GPL(vfs_removexattr);
@@ -319,7 +386,7 @@
  * Extended attribute SET operations
  */
 static long
-setxattr(struct dentry *d, const char __user *name, const void __user *value,
+setxattr(struct path *path, const char __user *name, const void __user *value,
 	 size_t size, int flags)
 {
 	int error;
@@ -355,7 +422,7 @@
 			posix_acl_fix_xattr_from_user(kvalue, size);
 	}
 
-	error = vfs_setxattr(d, kname, kvalue, size, flags);
+	error = vfs_setxattr(path, kname, kvalue, size, flags);
 out:
 	if (vvalue)
 		vfree(vvalue);
@@ -370,7 +437,7 @@
 {
 	struct path path;
 	int error;
-	unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_COPY_UP;
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
 
 retry:
 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
@@ -378,7 +445,7 @@
 		return error;
 	error = mnt_want_write(path.mnt);
 	if (!error) {
-		error = setxattr(path.dentry, name, value, size, flags);
+		error = setxattr(&path, name, value, size, flags);
 		mnt_drop_write(path.mnt);
 	}
 	path_put(&path);
@@ -395,7 +462,7 @@
 {
 	struct path path;
 	int error;
-	unsigned int lookup_flags = LOOKUP_COPY_UP;
+	unsigned int lookup_flags = 0;
 
 retry:
 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
@@ -403,7 +470,7 @@
 		return error;
 	error = mnt_want_write(path.mnt);
 	if (!error) {
-		error = setxattr(path.dentry, name, value, size, flags);
+		error = setxattr(&path, name, value, size, flags);
 		mnt_drop_write(path.mnt);
 	}
 	path_put(&path);
@@ -424,12 +491,16 @@
 	if (!f.file)
 		return error;
 	dentry = f.file->f_path.dentry;
+	error = -EACCES;
+	if (f.file->f_inode != dentry->d_inode)
+		goto error; /* Can't alter an open lower union file this way */
 	audit_inode(NULL, dentry, 0);
 	error = mnt_want_write_file(f.file);
 	if (!error) {
-		error = setxattr(dentry, name, value, size, flags);
+		error = setxattr(&f.file->f_path, name, value, size, flags);
 		mnt_drop_write_file(f.file);
 	}
+error:
 	fdput(f);
 	return error;
 }
@@ -524,13 +595,17 @@
 SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
 		void __user *, value, size_t, size)
 {
+	struct dentry *dentry;
 	struct fd f = fdget(fd);
 	ssize_t error = -EBADF;
 
 	if (!f.file)
 		return error;
-	audit_inode(NULL, f.file->f_path.dentry, 0);
-	error = getxattr(f.file->f_path.dentry, name, value, size);
+	dentry = f.file->f_path.dentry;
+	if (f.file->f_inode != dentry->d_inode)
+		dentry = dentry->d_fallthru;
+	audit_inode(NULL, dentry, 0);
+	error = getxattr(dentry, name, value, size);
 	fdput(f);
 	return error;
 }
@@ -613,13 +688,17 @@
 
 SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
 {
+	struct dentry *dentry;
 	struct fd f = fdget(fd);
 	ssize_t error = -EBADF;
 
 	if (!f.file)
 		return error;
-	audit_inode(NULL, f.file->f_path.dentry, 0);
-	error = listxattr(f.file->f_path.dentry, list, size);
+	dentry = f.file->f_path.dentry;
+	if (f.file->f_inode != dentry->d_inode)
+		dentry = dentry->d_fallthru;
+	audit_inode(NULL, dentry, 0);
+	error = listxattr(dentry, list, size);
 	fdput(f);
 	return error;
 }
@@ -628,7 +707,7 @@
  * Extended attribute REMOVE operations
  */
 static long
-removexattr(struct dentry *d, const char __user *name)
+removexattr(struct path *path, const char __user *name)
 {
 	int error;
 	char kname[XATTR_NAME_MAX + 1];
@@ -639,7 +718,7 @@
 	if (error < 0)
 		return error;
 
-	return vfs_removexattr(d, kname);
+	return vfs_removexattr(path, kname);
 }
 
 SYSCALL_DEFINE2(removexattr, const char __user *, pathname,
@@ -647,7 +726,7 @@
 {
 	struct path path;
 	int error;
-	unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_COPY_UP;
+	unsigned int lookup_flags = LOOKUP_FOLLOW;
 
 retry:
 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
@@ -655,7 +734,7 @@
 		return error;
 	error = mnt_want_write(path.mnt);
 	if (!error) {
-		error = removexattr(path.dentry, name);
+		error = removexattr(&path, name);
 		mnt_drop_write(path.mnt);
 	}
 	path_put(&path);
@@ -671,7 +750,7 @@
 {
 	struct path path;
 	int error;
-	unsigned int lookup_flags = LOOKUP_COPY_UP;
+	unsigned int lookup_flags = 0;
 
 retry:
 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
@@ -679,7 +758,7 @@
 		return error;
 	error = mnt_want_write(path.mnt);
 	if (!error) {
-		error = removexattr(path.dentry, name);
+		error = removexattr(&path, name);
 		mnt_drop_write(path.mnt);
 	}
 	path_put(&path);
@@ -699,12 +778,16 @@
 	if (!f.file)
 		return error;
 	dentry = f.file->f_path.dentry;
+	error = -EACCES;
+	if (f.file->f_inode != dentry->d_inode)
+		goto error; /* Can't alter an open lower union file this way */
 	audit_inode(NULL, dentry, 0);
 	error = mnt_want_write_file(f.file);
 	if (!error) {
-		error = removexattr(dentry, name);
+		error = removexattr(&f.file->f_path, name);
 		mnt_drop_write_file(f.file);
 	}
+error:
 	fdput(f);
 	return error;
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 1e1a350..f53d445 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -140,7 +140,10 @@
 	void *d_fsdata;			/* fs-specific data */
 
 #ifdef CONFIG_UNION_MOUNT
-	struct union_stack *d_union_stack;	/* dirs in union stack */
+	union {
+		struct union_stack *d_union_stack; /* Dirs in union stack */
+		struct dentry *d_fallthru; /* Lower dentry pinned by fallthru */
+	};
 #endif
 	struct list_head d_lru;		/* LRU list */
 	/*
@@ -215,7 +218,6 @@
 #define DCACHE_CANT_MOUNT	0x0100
 #define DCACHE_GENOCIDE		0x0200
 #define DCACHE_SHRINK_LIST	0x0400
-#define DCACHE_WHITEOUT		0x0800	/* Stop lookup in a unioned file system */
 
 #define DCACHE_OP_WEAK_REVALIDATE	0x0800
 
@@ -226,8 +228,6 @@
 #define DCACHE_FSNOTIFY_PARENT_WATCHED 0x4000
      /* Parent inode is watched by some fsnotify listener */
 
-#define DCACHE_FALLTHRU		0x8000	/* Continue lookup below an opaque dir */
-
 #define DCACHE_MOUNTED		0x10000	/* is a mountpoint */
 #define DCACHE_NEED_AUTOMOUNT	0x20000	/* handle automount on this dir */
 #define DCACHE_MANAGE_TRANSIT	0x40000	/* manage transit from this dirent */
@@ -235,7 +235,14 @@
 	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
 
 #define DCACHE_DENTRY_KILLED	0x100000
-#define DCACHE_UNION_LOOKUP_DONE 0x200000 /* Union lookup was called on this dentry */
+
+#define DCACHE_SYMLINK			0x00200000 /* Symlink-type (or fallthru to such) */
+#define DCACHE_WHITEOUT			0x00400000 /* Whiteout-type (stop pathwalk) */
+#define DCACHE_FALLTHRU			0x00800000 /* Fallthru-type (jump union layer) */
+
+#define DCACHE_UNION_LOOKUP_DONE	0x01000000 /* Union lookup was called on this dentry */
+#define DCACHE_UNION_PINNING_LOWER	0x02000000 /* Union upper dentry is pinning lower */
+#define DCACHE_UNION_COPYING_UP		0x04000000 /* This dentry is being copied up */
 
 extern seqlock_t rename_lock;
 
@@ -433,16 +440,46 @@
 	return dentry->d_flags & DCACHE_MANAGED_DENTRY;
 }
 
-static inline int d_is_whiteout(struct dentry *dentry)
+static inline bool d_is_symlink(struct dentry *dentry)
+{
+	return dentry->d_flags & DCACHE_SYMLINK;
+}
+
+static inline bool d_is_whiteout(struct dentry *dentry)
 {
 	return dentry->d_flags & DCACHE_WHITEOUT;
 }
 
-static inline int d_is_fallthru(struct dentry *dentry)
+static inline bool d_is_fallthru(struct dentry *dentry)
 {
 	return dentry->d_flags & DCACHE_FALLTHRU;
 }
 
+static inline bool d_has_lower(struct dentry *dentry)
+{
+	return unlikely(!dentry->d_inode &&
+			dentry->d_flags & (DCACHE_FALLTHRU |
+					   DCACHE_UNION_PINNING_LOWER));
+}
+
+static inline void d_pin_lower(struct dentry *dentry, struct dentry *lower)
+{
+	BUG_ON(dentry->d_fallthru != NULL);
+	dentry->d_fallthru = lower;
+	smp_wmb();
+	dentry->d_flags |= DCACHE_UNION_PINNING_LOWER;
+}
+
+static inline bool d_is_pinning_lower(struct dentry *dentry)
+{
+	if (unlikely(dentry->d_flags & DCACHE_UNION_PINNING_LOWER)) {
+		smp_rmb(); /* d_fallthru must be read only after this flag is
+			    * checked. */
+		return true;
+	}
+	return false;
+}
+
 static inline bool d_mountpoint(struct dentry *dentry)
 {
 	return dentry->d_flags & DCACHE_MOUNTED;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9295c17..7433a97 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2025,7 +2025,7 @@
 	bool			separate; /* should "name" be freed? */
 };
 
-extern long vfs_truncate(struct path *, struct path *, loff_t);
+extern long vfs_truncate(struct path *, loff_t);
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 		       struct file *filp);
 extern int do_fallocate(struct file *file, int mode, loff_t offset,
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index fdbafc6..bb3e0c2 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -18,6 +18,7 @@
 
 struct inode;
 struct dentry;
+struct path;
 
 struct xattr_handler {
 	const char *prefix;
@@ -40,8 +41,8 @@
 ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t);
 ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
 int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int);
-int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int);
-int vfs_removexattr(struct dentry *, const char *);
+int vfs_setxattr(struct path *, const char *, const void *, size_t, int);
+int vfs_removexattr(struct path *, const char *);
 
 ssize_t generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size);
 ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size);
diff --git a/kernel/audit.c b/kernel/audit.c
index 21c7fa6..3914639 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1627,6 +1627,7 @@
 {
 	struct audit_buffer *ab;
 	struct audit_names *name;
+	struct inode *inode;
 
 	name = kzalloc(sizeof(*name), GFP_NOFS);
 	if (!name)
@@ -1644,7 +1645,12 @@
 
 	/* Generate AUDIT_PATH record with object. */
 	name->type = AUDIT_TYPE_NORMAL;
-	audit_copy_inode(name, link->dentry, link->dentry->d_inode);
+	inode = link->dentry->d_inode;
+#ifdef CONFIG_UNION_MOUNT
+	if (!inode)
+		inode = link->dentry->d_fallthru->d_inode;
+#endif
+	audit_copy_inode(name, link->dentry, inode);
 	audit_log_name(current->audit_context, name, link, 0, NULL);
 out:
 	kfree(name);
diff --git a/mm/shmem.c b/mm/shmem.c
index b2a1bb8..6f2ac85 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2161,9 +2161,11 @@
 	 * but each new link needs a new dentry, pinning lowmem, and
 	 * tmpfs dentries cannot be pruned until they are unlinked.
 	 */
-	ret = shmem_reserve_inode(inode->i_sb);
-	if (ret)
-		goto out;
+	if (inode->i_nlink > 0) {
+		ret = shmem_reserve_inode(inode->i_sb);
+		if (ret)
+			goto out;
+	}
 
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 	inc_nlink(inode);
@@ -2203,7 +2205,7 @@
 
 		spin_lock(&dentry->d_lock);
 		list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
-			spin_lock(&child->d_lock);
+			spin_lock_nested(&child->d_lock, 1);
 			if (d_is_whiteout(child)) {
 				__d_drop(child);
 				if (!list_empty(&child->d_lru)) {
diff --git a/security/commoncap.c b/security/commoncap.c
index c44b6fe..cd0fc37 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -381,6 +381,11 @@
 	int size;
 	struct vfs_cap_data caps;
 
+#ifdef CONFIG_UNION_MOUNT
+	if (!inode)
+		inode = dentry->d_fallthru->d_inode;
+#endif
+
 	memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
 
 	if (!inode || !inode->i_op->getxattr)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index db1fca9..30c51b8 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1527,6 +1527,11 @@
 	struct inode *inode = dentry->d_inode;
 	struct common_audit_data ad;
 
+#ifdef CONFIG_UNION_MOUNT
+	if (unlikely(!inode) && dentry->d_fallthru)
+		inode = dentry->d_fallthru->d_inode;
+#endif
+
 	ad.type = LSM_AUDIT_DATA_DENTRY;
 	ad.u.dentry = dentry;
 	return inode_has_perm(cred, inode, av, &ad, 0);
@@ -1542,6 +1547,11 @@
 	struct inode *inode = path->dentry->d_inode;
 	struct common_audit_data ad;
 
+#ifdef CONFIG_UNION_MOUNT
+	if (unlikely(!inode) && path->dentry->d_fallthru)
+		inode = path->dentry->d_fallthru->d_inode;
+#endif
+
 	ad.type = LSM_AUDIT_DATA_PATH;
 	ad.u.path = *path;
 	return inode_has_perm(cred, inode, av, &ad, 0);