Merge tag 'dm-3.5-changes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm

Pull device-mapper updates from Alasdair G Kergon:
 "Improve multipath's retrying mechanism in some defined circumstances
  and provide a simple reserve/release mechanism for userspace tools to
  access thin provisioning metadata while the pool is in use."

* tag 'dm-3.5-changes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm:
  dm thin: provide userspace access to pool metadata
  dm thin: use slab mempools
  dm mpath: allow ioctls to trigger pg init
  dm mpath: delay retry of bypassed pg
  dm mpath: reduce size of struct multipath
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
index 3370bc4..f5cfc62 100644
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -287,6 +287,17 @@
 	the current transaction id is when you change it with this
 	compare-and-swap message.
 
+    reserve_metadata_snap
+
+        Reserve a copy of the data mapping btree for use by userland.
+        This allows userland to inspect the mappings as they were when
+        this message was executed.  Use the pool's status command to
+        get the root block associated with the metadata snapshot.
+
+    release_metadata_snap
+
+        Release a previously reserved copy of the data mapping btree.
+
 'thin' target
 -------------
 
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 754f38f..638dae0 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/workqueue.h>
+#include <linux/delay.h>
 #include <scsi/scsi_dh.h>
 #include <linux/atomic.h>
 
@@ -61,11 +62,11 @@
 	struct list_head list;
 	struct dm_target *ti;
 
-	spinlock_t lock;
-
 	const char *hw_handler_name;
 	char *hw_handler_params;
 
+	spinlock_t lock;
+
 	unsigned nr_priority_groups;
 	struct list_head priority_groups;
 
@@ -81,16 +82,17 @@
 	struct priority_group *next_pg;	/* Switch to this PG if set */
 	unsigned repeat_count;		/* I/Os left before calling PS again */
 
-	unsigned queue_io;		/* Must we queue all I/O? */
-	unsigned queue_if_no_path;	/* Queue I/O if last path fails? */
-	unsigned saved_queue_if_no_path;/* Saved state during suspension */
+	unsigned queue_io:1;		/* Must we queue all I/O? */
+	unsigned queue_if_no_path:1;	/* Queue I/O if last path fails? */
+	unsigned saved_queue_if_no_path:1; /* Saved state during suspension */
+
 	unsigned pg_init_retries;	/* Number of times to retry pg_init */
 	unsigned pg_init_count;		/* Number of times pg_init called */
 	unsigned pg_init_delay_msecs;	/* Number of msecs before pg_init retry */
 
+	unsigned queue_size;
 	struct work_struct process_queued_ios;
 	struct list_head queued_ios;
-	unsigned queue_size;
 
 	struct work_struct trigger_event;
 
@@ -328,14 +330,18 @@
 	/*
 	 * Loop through priority groups until we find a valid path.
 	 * First time we skip PGs marked 'bypassed'.
-	 * Second time we only try the ones we skipped.
+	 * Second time we only try the ones we skipped, but set
+	 * pg_init_delay_retry so we do not hammer controllers.
 	 */
 	do {
 		list_for_each_entry(pg, &m->priority_groups, list) {
 			if (pg->bypassed == bypassed)
 				continue;
-			if (!__choose_path_in_pg(m, pg, nr_bytes))
+			if (!__choose_path_in_pg(m, pg, nr_bytes)) {
+				if (!bypassed)
+					m->pg_init_delay_retry = 1;
 				return;
+			}
 		}
 	} while (bypassed--);
 
@@ -481,9 +487,6 @@
 
 	spin_lock_irqsave(&m->lock, flags);
 
-	if (!m->queue_size)
-		goto out;
-
 	if (!m->current_pgpath)
 		__choose_pgpath(m, 0);
 
@@ -496,7 +499,6 @@
 	if (m->pg_init_required && !m->pg_init_in_progress && pgpath)
 		__pg_init_all_paths(m);
 
-out:
 	spin_unlock_irqrestore(&m->lock, flags);
 	if (!must_queue)
 		dispatch_queued_ios(m);
@@ -1517,11 +1519,16 @@
 static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
 			   unsigned long arg)
 {
-	struct multipath *m = (struct multipath *) ti->private;
-	struct block_device *bdev = NULL;
-	fmode_t mode = 0;
+	struct multipath *m = ti->private;
+	struct block_device *bdev;
+	fmode_t mode;
 	unsigned long flags;
-	int r = 0;
+	int r;
+
+again:
+	bdev = NULL;
+	mode = 0;
+	r = 0;
 
 	spin_lock_irqsave(&m->lock, flags);
 
@@ -1546,6 +1553,12 @@
 	if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT)
 		r = scsi_verify_blk_ioctl(NULL, cmd);
 
+	if (r == -EAGAIN && !fatal_signal_pending(current)) {
+		queue_work(kmultipathd, &m->process_queued_ios);
+		msleep(10);
+		goto again;
+	}
+
 	return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 }
 
@@ -1643,7 +1656,7 @@
  *---------------------------------------------------------------*/
 static struct target_type multipath_target = {
 	.name = "multipath",
-	.version = {1, 3, 0},
+	.version = {1, 4, 0},
 	.module = THIS_MODULE,
 	.ctr = multipath_ctr,
 	.dtr = multipath_dtr,
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 737d388..3e2907f 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1082,15 +1082,139 @@
 	return 0;
 }
 
-static int __get_held_metadata_root(struct dm_pool_metadata *pmd,
-				    dm_block_t *result)
+static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
+{
+	int r, inc;
+	struct thin_disk_superblock *disk_super;
+	struct dm_block *copy, *sblock;
+	dm_block_t held_root;
+
+	/*
+	 * Copy the superblock.
+	 */
+	dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
+	r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION,
+			       &sb_validator, &copy, &inc);
+	if (r)
+		return r;
+
+	BUG_ON(!inc);
+
+	held_root = dm_block_location(copy);
+	disk_super = dm_block_data(copy);
+
+	if (le64_to_cpu(disk_super->held_root)) {
+		DMWARN("Pool metadata snapshot already exists: release this before taking another.");
+
+		dm_tm_dec(pmd->tm, held_root);
+		dm_tm_unlock(pmd->tm, copy);
+		pmd->need_commit = 1;
+
+		return -EBUSY;
+	}
+
+	/*
+	 * Wipe the spacemap since we're not publishing this.
+	 */
+	memset(&disk_super->data_space_map_root, 0,
+	       sizeof(disk_super->data_space_map_root));
+	memset(&disk_super->metadata_space_map_root, 0,
+	       sizeof(disk_super->metadata_space_map_root));
+
+	/*
+	 * Increment the data structures that need to be preserved.
+	 */
+	dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->data_mapping_root));
+	dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root));
+	dm_tm_unlock(pmd->tm, copy);
+
+	/*
+	 * Write the held root into the superblock.
+	 */
+	r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
+			     &sb_validator, &sblock);
+	if (r) {
+		dm_tm_dec(pmd->tm, held_root);
+		pmd->need_commit = 1;
+		return r;
+	}
+
+	disk_super = dm_block_data(sblock);
+	disk_super->held_root = cpu_to_le64(held_root);
+	dm_bm_unlock(sblock);
+
+	pmd->need_commit = 1;
+
+	return 0;
+}
+
+int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd)
+{
+	int r;
+
+	down_write(&pmd->root_lock);
+	r = __reserve_metadata_snap(pmd);
+	up_write(&pmd->root_lock);
+
+	return r;
+}
+
+static int __release_metadata_snap(struct dm_pool_metadata *pmd)
+{
+	int r;
+	struct thin_disk_superblock *disk_super;
+	struct dm_block *sblock, *copy;
+	dm_block_t held_root;
+
+	r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
+			     &sb_validator, &sblock);
+	if (r)
+		return r;
+
+	disk_super = dm_block_data(sblock);
+	held_root = le64_to_cpu(disk_super->held_root);
+	disk_super->held_root = cpu_to_le64(0);
+	pmd->need_commit = 1;
+
+	dm_bm_unlock(sblock);
+
+	if (!held_root) {
+		DMWARN("No pool metadata snapshot found: nothing to release.");
+		return -EINVAL;
+	}
+
+	r = dm_tm_read_lock(pmd->tm, held_root, &sb_validator, &copy);
+	if (r)
+		return r;
+
+	disk_super = dm_block_data(copy);
+	dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root));
+	dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root));
+	dm_sm_dec_block(pmd->metadata_sm, held_root);
+
+	return dm_tm_unlock(pmd->tm, copy);
+}
+
+int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd)
+{
+	int r;
+
+	down_write(&pmd->root_lock);
+	r = __release_metadata_snap(pmd);
+	up_write(&pmd->root_lock);
+
+	return r;
+}
+
+static int __get_metadata_snap(struct dm_pool_metadata *pmd,
+			       dm_block_t *result)
 {
 	int r;
 	struct thin_disk_superblock *disk_super;
 	struct dm_block *sblock;
 
-	r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
-			     &sb_validator, &sblock);
+	r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
+			    &sb_validator, &sblock);
 	if (r)
 		return r;
 
@@ -1100,13 +1224,13 @@
 	return dm_bm_unlock(sblock);
 }
 
-int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
-				   dm_block_t *result)
+int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
+			      dm_block_t *result)
 {
 	int r;
 
 	down_read(&pmd->root_lock);
-	r = __get_held_metadata_root(pmd, result);
+	r = __get_metadata_snap(pmd, result);
 	up_read(&pmd->root_lock);
 
 	return r;
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
index ed4725e..b88918c 100644
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -90,11 +90,18 @@
 
 /*
  * Hold/get root for userspace transaction.
+ *
+ * The metadata snapshot is a copy of the current superblock (minus the
+ * space maps).  Userland can access the data structures for READ
+ * operations only.  A small performance hit is incurred by providing this
+ * copy of the metadata to userland due to extra copy-on-write operations
+ * on the metadata nodes.  Release this as soon as you finish with it.
  */
-int dm_pool_hold_metadata_root(struct dm_pool_metadata *pmd);
+int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd);
+int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd);
 
-int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
-				   dm_block_t *result);
+int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
+			      dm_block_t *result);
 
 /*
  * Actions on a single virtual device.
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index eb3d138..37fdaf8 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -111,7 +111,7 @@
 	dm_block_t block;
 };
 
-struct cell {
+struct dm_bio_prison_cell {
 	struct hlist_node list;
 	struct bio_prison *prison;
 	struct cell_key key;
@@ -141,6 +141,8 @@
 	return n;
 }
 
+static struct kmem_cache *_cell_cache;
+
 /*
  * @nr_cells should be the number of cells you want in use _concurrently_.
  * Don't confuse it with the number of distinct keys.
@@ -157,8 +159,7 @@
 		return NULL;
 
 	spin_lock_init(&prison->lock);
-	prison->cell_pool = mempool_create_kmalloc_pool(nr_cells,
-							sizeof(struct cell));
+	prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache);
 	if (!prison->cell_pool) {
 		kfree(prison);
 		return NULL;
@@ -194,10 +195,10 @@
 		       (lhs->block == rhs->block);
 }
 
-static struct cell *__search_bucket(struct hlist_head *bucket,
-				    struct cell_key *key)
+static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket,
+						  struct cell_key *key)
 {
-	struct cell *cell;
+	struct dm_bio_prison_cell *cell;
 	struct hlist_node *tmp;
 
 	hlist_for_each_entry(cell, tmp, bucket, list)
@@ -214,12 +215,12 @@
  * Returns 1 if the cell was already held, 0 if @inmate is the new holder.
  */
 static int bio_detain(struct bio_prison *prison, struct cell_key *key,
-		      struct bio *inmate, struct cell **ref)
+		      struct bio *inmate, struct dm_bio_prison_cell **ref)
 {
 	int r = 1;
 	unsigned long flags;
 	uint32_t hash = hash_key(prison, key);
-	struct cell *cell, *cell2;
+	struct dm_bio_prison_cell *cell, *cell2;
 
 	BUG_ON(hash > prison->nr_buckets);
 
@@ -273,7 +274,7 @@
 /*
  * @inmates must have been initialised prior to this call
  */
-static void __cell_release(struct cell *cell, struct bio_list *inmates)
+static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates)
 {
 	struct bio_prison *prison = cell->prison;
 
@@ -287,7 +288,7 @@
 	mempool_free(cell, prison->cell_pool);
 }
 
-static void cell_release(struct cell *cell, struct bio_list *bios)
+static void cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios)
 {
 	unsigned long flags;
 	struct bio_prison *prison = cell->prison;
@@ -303,7 +304,7 @@
  * bio may be in the cell.  This function releases the cell, and also does
  * a sanity check.
  */
-static void __cell_release_singleton(struct cell *cell, struct bio *bio)
+static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio)
 {
 	BUG_ON(cell->holder != bio);
 	BUG_ON(!bio_list_empty(&cell->bios));
@@ -311,7 +312,7 @@
 	__cell_release(cell, NULL);
 }
 
-static void cell_release_singleton(struct cell *cell, struct bio *bio)
+static void cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio)
 {
 	unsigned long flags;
 	struct bio_prison *prison = cell->prison;
@@ -324,7 +325,8 @@
 /*
  * Sometimes we don't want the holder, just the additional bios.
  */
-static void __cell_release_no_holder(struct cell *cell, struct bio_list *inmates)
+static void __cell_release_no_holder(struct dm_bio_prison_cell *cell,
+				     struct bio_list *inmates)
 {
 	struct bio_prison *prison = cell->prison;
 
@@ -334,7 +336,8 @@
 	mempool_free(cell, prison->cell_pool);
 }
 
-static void cell_release_no_holder(struct cell *cell, struct bio_list *inmates)
+static void cell_release_no_holder(struct dm_bio_prison_cell *cell,
+				   struct bio_list *inmates)
 {
 	unsigned long flags;
 	struct bio_prison *prison = cell->prison;
@@ -344,7 +347,7 @@
 	spin_unlock_irqrestore(&prison->lock, flags);
 }
 
-static void cell_error(struct cell *cell)
+static void cell_error(struct dm_bio_prison_cell *cell)
 {
 	struct bio_prison *prison = cell->prison;
 	struct bio_list bios;
@@ -491,7 +494,7 @@
  * also provides the interface for creating and destroying internal
  * devices.
  */
-struct new_mapping;
+struct dm_thin_new_mapping;
 
 struct pool_features {
 	unsigned zero_new_blocks:1;
@@ -537,7 +540,7 @@
 	struct deferred_set shared_read_ds;
 	struct deferred_set all_io_ds;
 
-	struct new_mapping *next_mapping;
+	struct dm_thin_new_mapping *next_mapping;
 	mempool_t *mapping_pool;
 	mempool_t *endio_hook_pool;
 };
@@ -630,11 +633,11 @@
 
 /*----------------------------------------------------------------*/
 
-struct endio_hook {
+struct dm_thin_endio_hook {
 	struct thin_c *tc;
 	struct deferred_entry *shared_read_entry;
 	struct deferred_entry *all_io_entry;
-	struct new_mapping *overwrite_mapping;
+	struct dm_thin_new_mapping *overwrite_mapping;
 };
 
 static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master)
@@ -647,7 +650,8 @@
 	bio_list_init(master);
 
 	while ((bio = bio_list_pop(&bios))) {
-		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
+		struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
+
 		if (h->tc == tc)
 			bio_endio(bio, DM_ENDIO_REQUEUE);
 		else
@@ -736,7 +740,7 @@
 /*
  * Bio endio functions.
  */
-struct new_mapping {
+struct dm_thin_new_mapping {
 	struct list_head list;
 
 	unsigned quiesced:1;
@@ -746,7 +750,7 @@
 	struct thin_c *tc;
 	dm_block_t virt_block;
 	dm_block_t data_block;
-	struct cell *cell, *cell2;
+	struct dm_bio_prison_cell *cell, *cell2;
 	int err;
 
 	/*
@@ -759,7 +763,7 @@
 	bio_end_io_t *saved_bi_end_io;
 };
 
-static void __maybe_add_mapping(struct new_mapping *m)
+static void __maybe_add_mapping(struct dm_thin_new_mapping *m)
 {
 	struct pool *pool = m->tc->pool;
 
@@ -772,7 +776,7 @@
 static void copy_complete(int read_err, unsigned long write_err, void *context)
 {
 	unsigned long flags;
-	struct new_mapping *m = context;
+	struct dm_thin_new_mapping *m = context;
 	struct pool *pool = m->tc->pool;
 
 	m->err = read_err || write_err ? -EIO : 0;
@@ -786,8 +790,8 @@
 static void overwrite_endio(struct bio *bio, int err)
 {
 	unsigned long flags;
-	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
-	struct new_mapping *m = h->overwrite_mapping;
+	struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
+	struct dm_thin_new_mapping *m = h->overwrite_mapping;
 	struct pool *pool = m->tc->pool;
 
 	m->err = err;
@@ -811,7 +815,7 @@
 /*
  * This sends the bios in the cell back to the deferred_bios list.
  */
-static void cell_defer(struct thin_c *tc, struct cell *cell,
+static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell,
 		       dm_block_t data_block)
 {
 	struct pool *pool = tc->pool;
@@ -828,7 +832,7 @@
  * Same as cell_defer above, except it omits one particular detainee,
  * a write bio that covers the block and has already been processed.
  */
-static void cell_defer_except(struct thin_c *tc, struct cell *cell)
+static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell)
 {
 	struct bio_list bios;
 	struct pool *pool = tc->pool;
@@ -843,7 +847,7 @@
 	wake_worker(pool);
 }
 
-static void process_prepared_mapping(struct new_mapping *m)
+static void process_prepared_mapping(struct dm_thin_new_mapping *m)
 {
 	struct thin_c *tc = m->tc;
 	struct bio *bio;
@@ -886,7 +890,7 @@
 	mempool_free(m, tc->pool->mapping_pool);
 }
 
-static void process_prepared_discard(struct new_mapping *m)
+static void process_prepared_discard(struct dm_thin_new_mapping *m)
 {
 	int r;
 	struct thin_c *tc = m->tc;
@@ -909,11 +913,11 @@
 }
 
 static void process_prepared(struct pool *pool, struct list_head *head,
-			     void (*fn)(struct new_mapping *))
+			     void (*fn)(struct dm_thin_new_mapping *))
 {
 	unsigned long flags;
 	struct list_head maps;
-	struct new_mapping *m, *tmp;
+	struct dm_thin_new_mapping *m, *tmp;
 
 	INIT_LIST_HEAD(&maps);
 	spin_lock_irqsave(&pool->lock, flags);
@@ -957,9 +961,9 @@
 	return pool->next_mapping ? 0 : -ENOMEM;
 }
 
-static struct new_mapping *get_next_mapping(struct pool *pool)
+static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
 {
-	struct new_mapping *r = pool->next_mapping;
+	struct dm_thin_new_mapping *r = pool->next_mapping;
 
 	BUG_ON(!pool->next_mapping);
 
@@ -971,11 +975,11 @@
 static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 			  struct dm_dev *origin, dm_block_t data_origin,
 			  dm_block_t data_dest,
-			  struct cell *cell, struct bio *bio)
+			  struct dm_bio_prison_cell *cell, struct bio *bio)
 {
 	int r;
 	struct pool *pool = tc->pool;
-	struct new_mapping *m = get_next_mapping(pool);
+	struct dm_thin_new_mapping *m = get_next_mapping(pool);
 
 	INIT_LIST_HEAD(&m->list);
 	m->quiesced = 0;
@@ -997,7 +1001,8 @@
 	 * bio immediately. Otherwise we use kcopyd to clone the data first.
 	 */
 	if (io_overwrites_block(pool, bio)) {
-		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
+		struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
+
 		h->overwrite_mapping = m;
 		m->bio = bio;
 		save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
@@ -1025,7 +1030,7 @@
 
 static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
 				   dm_block_t data_origin, dm_block_t data_dest,
-				   struct cell *cell, struct bio *bio)
+				   struct dm_bio_prison_cell *cell, struct bio *bio)
 {
 	schedule_copy(tc, virt_block, tc->pool_dev,
 		      data_origin, data_dest, cell, bio);
@@ -1033,18 +1038,18 @@
 
 static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
 				   dm_block_t data_dest,
-				   struct cell *cell, struct bio *bio)
+				   struct dm_bio_prison_cell *cell, struct bio *bio)
 {
 	schedule_copy(tc, virt_block, tc->origin_dev,
 		      virt_block, data_dest, cell, bio);
 }
 
 static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
-			  dm_block_t data_block, struct cell *cell,
+			  dm_block_t data_block, struct dm_bio_prison_cell *cell,
 			  struct bio *bio)
 {
 	struct pool *pool = tc->pool;
-	struct new_mapping *m = get_next_mapping(pool);
+	struct dm_thin_new_mapping *m = get_next_mapping(pool);
 
 	INIT_LIST_HEAD(&m->list);
 	m->quiesced = 1;
@@ -1065,12 +1070,12 @@
 		process_prepared_mapping(m);
 
 	else if (io_overwrites_block(pool, bio)) {
-		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
+		struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
+
 		h->overwrite_mapping = m;
 		m->bio = bio;
 		save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
 		remap_and_issue(tc, bio, data_block);
-
 	} else {
 		int r;
 		struct dm_io_region to;
@@ -1155,7 +1160,7 @@
  */
 static void retry_on_resume(struct bio *bio)
 {
-	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
+	struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
 	struct thin_c *tc = h->tc;
 	struct pool *pool = tc->pool;
 	unsigned long flags;
@@ -1165,7 +1170,7 @@
 	spin_unlock_irqrestore(&pool->lock, flags);
 }
 
-static void no_space(struct cell *cell)
+static void no_space(struct dm_bio_prison_cell *cell)
 {
 	struct bio *bio;
 	struct bio_list bios;
@@ -1182,11 +1187,11 @@
 	int r;
 	unsigned long flags;
 	struct pool *pool = tc->pool;
-	struct cell *cell, *cell2;
+	struct dm_bio_prison_cell *cell, *cell2;
 	struct cell_key key, key2;
 	dm_block_t block = get_bio_block(tc, bio);
 	struct dm_thin_lookup_result lookup_result;
-	struct new_mapping *m;
+	struct dm_thin_new_mapping *m;
 
 	build_virtual_key(tc->td, block, &key);
 	if (bio_detain(tc->pool->prison, &key, bio, &cell))
@@ -1263,7 +1268,7 @@
 static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
 			  struct cell_key *key,
 			  struct dm_thin_lookup_result *lookup_result,
-			  struct cell *cell)
+			  struct dm_bio_prison_cell *cell)
 {
 	int r;
 	dm_block_t data_block;
@@ -1290,7 +1295,7 @@
 			       dm_block_t block,
 			       struct dm_thin_lookup_result *lookup_result)
 {
-	struct cell *cell;
+	struct dm_bio_prison_cell *cell;
 	struct pool *pool = tc->pool;
 	struct cell_key key;
 
@@ -1305,7 +1310,7 @@
 	if (bio_data_dir(bio) == WRITE)
 		break_sharing(tc, bio, block, &key, lookup_result, cell);
 	else {
-		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
+		struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
 
 		h->shared_read_entry = ds_inc(&pool->shared_read_ds);
 
@@ -1315,7 +1320,7 @@
 }
 
 static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block,
-			    struct cell *cell)
+			    struct dm_bio_prison_cell *cell)
 {
 	int r;
 	dm_block_t data_block;
@@ -1363,7 +1368,7 @@
 {
 	int r;
 	dm_block_t block = get_bio_block(tc, bio);
-	struct cell *cell;
+	struct dm_bio_prison_cell *cell;
 	struct cell_key key;
 	struct dm_thin_lookup_result lookup_result;
 
@@ -1432,7 +1437,7 @@
 	spin_unlock_irqrestore(&pool->lock, flags);
 
 	while ((bio = bio_list_pop(&bios))) {
-		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
+		struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
 		struct thin_c *tc = h->tc;
 
 		/*
@@ -1522,10 +1527,10 @@
 	wake_worker(pool);
 }
 
-static struct endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio)
+static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio)
 {
 	struct pool *pool = tc->pool;
-	struct endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
+	struct dm_thin_endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
 
 	h->tc = tc;
 	h->shared_read_entry = NULL;
@@ -1687,6 +1692,9 @@
 	kfree(pool);
 }
 
+static struct kmem_cache *_new_mapping_cache;
+static struct kmem_cache *_endio_hook_cache;
+
 static struct pool *pool_create(struct mapped_device *pool_md,
 				struct block_device *metadata_dev,
 				unsigned long block_size, char **error)
@@ -1755,16 +1763,16 @@
 	ds_init(&pool->all_io_ds);
 
 	pool->next_mapping = NULL;
-	pool->mapping_pool =
-		mempool_create_kmalloc_pool(MAPPING_POOL_SIZE, sizeof(struct new_mapping));
+	pool->mapping_pool = mempool_create_slab_pool(MAPPING_POOL_SIZE,
+						      _new_mapping_cache);
 	if (!pool->mapping_pool) {
 		*error = "Error creating pool's mapping mempool";
 		err_p = ERR_PTR(-ENOMEM);
 		goto bad_mapping_pool;
 	}
 
-	pool->endio_hook_pool =
-		mempool_create_kmalloc_pool(ENDIO_HOOK_POOL_SIZE, sizeof(struct endio_hook));
+	pool->endio_hook_pool = mempool_create_slab_pool(ENDIO_HOOK_POOL_SIZE,
+							 _endio_hook_cache);
 	if (!pool->endio_hook_pool) {
 		*error = "Error creating pool's endio_hook mempool";
 		err_p = ERR_PTR(-ENOMEM);
@@ -2276,6 +2284,36 @@
 	return 0;
 }
 
+static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct pool *pool)
+{
+	int r;
+
+	r = check_arg_count(argc, 1);
+	if (r)
+		return r;
+
+	r = dm_pool_reserve_metadata_snap(pool->pmd);
+	if (r)
+		DMWARN("reserve_metadata_snap message failed.");
+
+	return r;
+}
+
+static int process_release_metadata_snap_mesg(unsigned argc, char **argv, struct pool *pool)
+{
+	int r;
+
+	r = check_arg_count(argc, 1);
+	if (r)
+		return r;
+
+	r = dm_pool_release_metadata_snap(pool->pmd);
+	if (r)
+		DMWARN("release_metadata_snap message failed.");
+
+	return r;
+}
+
 /*
  * Messages supported:
  *   create_thin	<dev_id>
@@ -2283,6 +2321,8 @@
  *   delete		<dev_id>
  *   trim		<dev_id> <new_size_in_sectors>
  *   set_transaction_id <current_trans_id> <new_trans_id>
+ *   reserve_metadata_snap
+ *   release_metadata_snap
  */
 static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
 {
@@ -2302,6 +2342,12 @@
 	else if (!strcasecmp(argv[0], "set_transaction_id"))
 		r = process_set_transaction_id_mesg(argc, argv, pool);
 
+	else if (!strcasecmp(argv[0], "reserve_metadata_snap"))
+		r = process_reserve_metadata_snap_mesg(argc, argv, pool);
+
+	else if (!strcasecmp(argv[0], "release_metadata_snap"))
+		r = process_release_metadata_snap_mesg(argc, argv, pool);
+
 	else
 		DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
 
@@ -2361,7 +2407,7 @@
 		if (r)
 			return r;
 
-		r = dm_pool_get_held_metadata_root(pool->pmd, &held_root);
+		r = dm_pool_get_metadata_snap(pool->pmd, &held_root);
 		if (r)
 			return r;
 
@@ -2457,7 +2503,7 @@
 	.name = "thin-pool",
 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
 		    DM_TARGET_IMMUTABLE,
-	.version = {1, 1, 0},
+	.version = {1, 2, 0},
 	.module = THIS_MODULE,
 	.ctr = pool_ctr,
 	.dtr = pool_dtr,
@@ -2613,9 +2659,9 @@
 		      union map_info *map_context)
 {
 	unsigned long flags;
-	struct endio_hook *h = map_context->ptr;
+	struct dm_thin_endio_hook *h = map_context->ptr;
 	struct list_head work;
-	struct new_mapping *m, *tmp;
+	struct dm_thin_new_mapping *m, *tmp;
 	struct pool *pool = h->tc->pool;
 
 	if (h->shared_read_entry) {
@@ -2755,7 +2801,32 @@
 
 	r = dm_register_target(&pool_target);
 	if (r)
-		dm_unregister_target(&thin_target);
+		goto bad_pool_target;
+
+	r = -ENOMEM;
+
+	_cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0);
+	if (!_cell_cache)
+		goto bad_cell_cache;
+
+	_new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0);
+	if (!_new_mapping_cache)
+		goto bad_new_mapping_cache;
+
+	_endio_hook_cache = KMEM_CACHE(dm_thin_endio_hook, 0);
+	if (!_endio_hook_cache)
+		goto bad_endio_hook_cache;
+
+	return 0;
+
+bad_endio_hook_cache:
+	kmem_cache_destroy(_new_mapping_cache);
+bad_new_mapping_cache:
+	kmem_cache_destroy(_cell_cache);
+bad_cell_cache:
+	dm_unregister_target(&pool_target);
+bad_pool_target:
+	dm_unregister_target(&thin_target);
 
 	return r;
 }
@@ -2764,6 +2835,10 @@
 {
 	dm_unregister_target(&thin_target);
 	dm_unregister_target(&pool_target);
+
+	kmem_cache_destroy(_cell_cache);
+	kmem_cache_destroy(_new_mapping_cache);
+	kmem_cache_destroy(_endio_hook_cache);
 }
 
 module_init(dm_thin_init);
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 6f8d387..400fe14 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -249,6 +249,7 @@
 
 	return r;
 }
+EXPORT_SYMBOL_GPL(dm_tm_shadow_block);
 
 int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
 		    struct dm_block_validator *v,
@@ -259,6 +260,7 @@
 
 	return dm_bm_read_lock(tm->bm, b, v, blk);
 }
+EXPORT_SYMBOL_GPL(dm_tm_read_lock);
 
 int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b)
 {