|  | // SPDX-License-Identifier: GPL-2.0-only | 
|  | /* | 
|  | * Copyright (C) 2002 Sistina Software (UK) Limited. | 
|  | * Copyright (C) 2006 Red Hat GmbH | 
|  | * | 
|  | * This file is released under the GPL. | 
|  | * | 
|  | * Kcopyd provides a simple interface for copying an area of one | 
|  | * block-device to one or more other block-devices, with an asynchronous | 
|  | * completion notification. | 
|  | */ | 
|  |  | 
|  | #include <linux/types.h> | 
|  | #include <linux/atomic.h> | 
|  | #include <linux/blkdev.h> | 
|  | #include <linux/fs.h> | 
|  | #include <linux/init.h> | 
|  | #include <linux/list.h> | 
|  | #include <linux/mempool.h> | 
|  | #include <linux/module.h> | 
|  | #include <linux/pagemap.h> | 
|  | #include <linux/slab.h> | 
|  | #include <linux/vmalloc.h> | 
|  | #include <linux/workqueue.h> | 
|  | #include <linux/mutex.h> | 
|  | #include <linux/delay.h> | 
|  | #include <linux/device-mapper.h> | 
|  | #include <linux/dm-kcopyd.h> | 
|  |  | 
|  | #include "dm-core.h" | 
|  |  | 
|  | #define SPLIT_COUNT	8 | 
|  | #define MIN_JOBS	8 | 
|  |  | 
|  | #define DEFAULT_SUB_JOB_SIZE_KB 512 | 
|  | #define MAX_SUB_JOB_SIZE_KB     1024 | 
|  |  | 
|  | static unsigned int kcopyd_subjob_size_kb = DEFAULT_SUB_JOB_SIZE_KB; | 
|  |  | 
|  | module_param(kcopyd_subjob_size_kb, uint, 0644); | 
|  | MODULE_PARM_DESC(kcopyd_subjob_size_kb, "Sub-job size for dm-kcopyd clients"); | 
|  |  | 
|  | static unsigned int dm_get_kcopyd_subjob_size(void) | 
|  | { | 
|  | unsigned int sub_job_size_kb; | 
|  |  | 
|  | sub_job_size_kb = __dm_get_module_param(&kcopyd_subjob_size_kb, | 
|  | DEFAULT_SUB_JOB_SIZE_KB, | 
|  | MAX_SUB_JOB_SIZE_KB); | 
|  |  | 
|  | return sub_job_size_kb << 1; | 
|  | } | 
|  |  | 
|  | /* | 
|  | *---------------------------------------------------------------- | 
|  | * Each kcopyd client has its own little pool of preallocated | 
|  | * pages for kcopyd io. | 
|  | *--------------------------------------------------------------- | 
|  | */ | 
|  | struct dm_kcopyd_client { | 
|  | struct page_list *pages; | 
|  | unsigned int nr_reserved_pages; | 
|  | unsigned int nr_free_pages; | 
|  | unsigned int sub_job_size; | 
|  |  | 
|  | struct dm_io_client *io_client; | 
|  |  | 
|  | wait_queue_head_t destroyq; | 
|  |  | 
|  | mempool_t job_pool; | 
|  |  | 
|  | struct workqueue_struct *kcopyd_wq; | 
|  | struct work_struct kcopyd_work; | 
|  |  | 
|  | struct dm_kcopyd_throttle *throttle; | 
|  |  | 
|  | atomic_t nr_jobs; | 
|  |  | 
|  | /* | 
|  | * We maintain four lists of jobs: | 
|  | * | 
|  | * i)   jobs waiting for pages | 
|  | * ii)  jobs that have pages, and are waiting for the io to be issued. | 
|  | * iii) jobs that don't need to do any IO and just run a callback | 
|  | * iv) jobs that have completed. | 
|  | * | 
|  | * All four of these are protected by job_lock. | 
|  | */ | 
|  | spinlock_t job_lock; | 
|  | struct list_head callback_jobs; | 
|  | struct list_head complete_jobs; | 
|  | struct list_head io_jobs; | 
|  | struct list_head pages_jobs; | 
|  | }; | 
|  |  | 
|  | static struct page_list zero_page_list; | 
|  |  | 
|  | static DEFINE_SPINLOCK(throttle_spinlock); | 
|  |  | 
|  | /* | 
|  | * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period. | 
|  | * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided | 
|  | * by 2. | 
|  | */ | 
|  | #define ACCOUNT_INTERVAL_SHIFT		SHIFT_HZ | 
|  |  | 
|  | /* | 
|  | * Sleep this number of milliseconds. | 
|  | * | 
|  | * The value was decided experimentally. | 
|  | * Smaller values seem to cause an increased copy rate above the limit. | 
|  | * The reason for this is unknown but possibly due to jiffies rounding errors | 
|  | * or read/write cache inside the disk. | 
|  | */ | 
|  | #define SLEEP_USEC			100000 | 
|  |  | 
|  | /* | 
|  | * Maximum number of sleep events. There is a theoretical livelock if more | 
|  | * kcopyd clients do work simultaneously which this limit avoids. | 
|  | */ | 
|  | #define MAX_SLEEPS			10 | 
|  |  | 
|  | static void io_job_start(struct dm_kcopyd_throttle *t) | 
|  | { | 
|  | unsigned int throttle, now, difference; | 
|  | int slept = 0, skew; | 
|  |  | 
|  | if (unlikely(!t)) | 
|  | return; | 
|  |  | 
|  | try_again: | 
|  | spin_lock_irq(&throttle_spinlock); | 
|  |  | 
|  | throttle = READ_ONCE(t->throttle); | 
|  |  | 
|  | if (likely(throttle >= 100)) | 
|  | goto skip_limit; | 
|  |  | 
|  | now = jiffies; | 
|  | difference = now - t->last_jiffies; | 
|  | t->last_jiffies = now; | 
|  | if (t->num_io_jobs) | 
|  | t->io_period += difference; | 
|  | t->total_period += difference; | 
|  |  | 
|  | /* | 
|  | * Maintain sane values if we got a temporary overflow. | 
|  | */ | 
|  | if (unlikely(t->io_period > t->total_period)) | 
|  | t->io_period = t->total_period; | 
|  |  | 
|  | if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) { | 
|  | int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT); | 
|  |  | 
|  | t->total_period >>= shift; | 
|  | t->io_period >>= shift; | 
|  | } | 
|  |  | 
|  | skew = t->io_period - throttle * t->total_period / 100; | 
|  |  | 
|  | if (unlikely(skew > 0) && slept < MAX_SLEEPS) { | 
|  | slept++; | 
|  | spin_unlock_irq(&throttle_spinlock); | 
|  | fsleep(SLEEP_USEC); | 
|  | goto try_again; | 
|  | } | 
|  |  | 
|  | skip_limit: | 
|  | t->num_io_jobs++; | 
|  |  | 
|  | spin_unlock_irq(&throttle_spinlock); | 
|  | } | 
|  |  | 
|  | static void io_job_finish(struct dm_kcopyd_throttle *t) | 
|  | { | 
|  | unsigned long flags; | 
|  |  | 
|  | if (unlikely(!t)) | 
|  | return; | 
|  |  | 
|  | spin_lock_irqsave(&throttle_spinlock, flags); | 
|  |  | 
|  | t->num_io_jobs--; | 
|  |  | 
|  | if (likely(READ_ONCE(t->throttle) >= 100)) | 
|  | goto skip_limit; | 
|  |  | 
|  | if (!t->num_io_jobs) { | 
|  | unsigned int now, difference; | 
|  |  | 
|  | now = jiffies; | 
|  | difference = now - t->last_jiffies; | 
|  | t->last_jiffies = now; | 
|  |  | 
|  | t->io_period += difference; | 
|  | t->total_period += difference; | 
|  |  | 
|  | /* | 
|  | * Maintain sane values if we got a temporary overflow. | 
|  | */ | 
|  | if (unlikely(t->io_period > t->total_period)) | 
|  | t->io_period = t->total_period; | 
|  | } | 
|  |  | 
|  | skip_limit: | 
|  | spin_unlock_irqrestore(&throttle_spinlock, flags); | 
|  | } | 
|  |  | 
|  |  | 
|  | static void wake(struct dm_kcopyd_client *kc) | 
|  | { | 
|  | queue_work(kc->kcopyd_wq, &kc->kcopyd_work); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Obtain one page for the use of kcopyd. | 
|  | */ | 
|  | static struct page_list *alloc_pl(gfp_t gfp) | 
|  | { | 
|  | struct page_list *pl; | 
|  |  | 
|  | pl = kmalloc(sizeof(*pl), gfp); | 
|  | if (!pl) | 
|  | return NULL; | 
|  |  | 
|  | pl->page = alloc_page(gfp | __GFP_HIGHMEM); | 
|  | if (!pl->page) { | 
|  | kfree(pl); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | return pl; | 
|  | } | 
|  |  | 
|  | static void free_pl(struct page_list *pl) | 
|  | { | 
|  | __free_page(pl->page); | 
|  | kfree(pl); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Add the provided pages to a client's free page list, releasing | 
|  | * back to the system any beyond the reserved_pages limit. | 
|  | */ | 
|  | static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) | 
|  | { | 
|  | struct page_list *next; | 
|  |  | 
|  | do { | 
|  | next = pl->next; | 
|  |  | 
|  | if (kc->nr_free_pages >= kc->nr_reserved_pages) | 
|  | free_pl(pl); | 
|  | else { | 
|  | pl->next = kc->pages; | 
|  | kc->pages = pl; | 
|  | kc->nr_free_pages++; | 
|  | } | 
|  |  | 
|  | pl = next; | 
|  | } while (pl); | 
|  | } | 
|  |  | 
|  | static int kcopyd_get_pages(struct dm_kcopyd_client *kc, | 
|  | unsigned int nr, struct page_list **pages) | 
|  | { | 
|  | struct page_list *pl; | 
|  |  | 
|  | *pages = NULL; | 
|  |  | 
|  | do { | 
|  | pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM); | 
|  | if (unlikely(!pl)) { | 
|  | /* Use reserved pages */ | 
|  | pl = kc->pages; | 
|  | if (unlikely(!pl)) | 
|  | goto out_of_memory; | 
|  | kc->pages = pl->next; | 
|  | kc->nr_free_pages--; | 
|  | } | 
|  | pl->next = *pages; | 
|  | *pages = pl; | 
|  | } while (--nr); | 
|  |  | 
|  | return 0; | 
|  |  | 
|  | out_of_memory: | 
|  | if (*pages) | 
|  | kcopyd_put_pages(kc, *pages); | 
|  | return -ENOMEM; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * These three functions resize the page pool. | 
|  | */ | 
|  | static void drop_pages(struct page_list *pl) | 
|  | { | 
|  | struct page_list *next; | 
|  |  | 
|  | while (pl) { | 
|  | next = pl->next; | 
|  | free_pl(pl); | 
|  | pl = next; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Allocate and reserve nr_pages for the use of a specific client. | 
|  | */ | 
|  | static int client_reserve_pages(struct dm_kcopyd_client *kc, unsigned int nr_pages) | 
|  | { | 
|  | unsigned int i; | 
|  | struct page_list *pl = NULL, *next; | 
|  |  | 
|  | for (i = 0; i < nr_pages; i++) { | 
|  | next = alloc_pl(GFP_KERNEL); | 
|  | if (!next) { | 
|  | if (pl) | 
|  | drop_pages(pl); | 
|  | return -ENOMEM; | 
|  | } | 
|  | next->next = pl; | 
|  | pl = next; | 
|  | } | 
|  |  | 
|  | kc->nr_reserved_pages += nr_pages; | 
|  | kcopyd_put_pages(kc, pl); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void client_free_pages(struct dm_kcopyd_client *kc) | 
|  | { | 
|  | BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages); | 
|  | drop_pages(kc->pages); | 
|  | kc->pages = NULL; | 
|  | kc->nr_free_pages = kc->nr_reserved_pages = 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | *--------------------------------------------------------------- | 
|  | * kcopyd_jobs need to be allocated by the *clients* of kcopyd, | 
|  | * for this reason we use a mempool to prevent the client from | 
|  | * ever having to do io (which could cause a deadlock). | 
|  | *--------------------------------------------------------------- | 
|  | */ | 
|  | struct kcopyd_job { | 
|  | struct dm_kcopyd_client *kc; | 
|  | struct list_head list; | 
|  | unsigned int flags; | 
|  |  | 
|  | /* | 
|  | * Error state of the job. | 
|  | */ | 
|  | int read_err; | 
|  | unsigned long write_err; | 
|  |  | 
|  | /* | 
|  | * REQ_OP_READ, REQ_OP_WRITE or REQ_OP_WRITE_ZEROES. | 
|  | */ | 
|  | enum req_op op; | 
|  | struct dm_io_region source; | 
|  |  | 
|  | /* | 
|  | * The destinations for the transfer. | 
|  | */ | 
|  | unsigned int num_dests; | 
|  | struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; | 
|  |  | 
|  | struct page_list *pages; | 
|  |  | 
|  | /* | 
|  | * Set this to ensure you are notified when the job has | 
|  | * completed.  'context' is for callback to use. | 
|  | */ | 
|  | dm_kcopyd_notify_fn fn; | 
|  | void *context; | 
|  |  | 
|  | /* | 
|  | * These fields are only used if the job has been split | 
|  | * into more manageable parts. | 
|  | */ | 
|  | struct mutex lock; | 
|  | atomic_t sub_jobs; | 
|  | sector_t progress; | 
|  | sector_t write_offset; | 
|  |  | 
|  | struct kcopyd_job *master_job; | 
|  | }; | 
|  |  | 
|  | static struct kmem_cache *_job_cache; | 
|  |  | 
|  | int __init dm_kcopyd_init(void) | 
|  | { | 
|  | _job_cache = kmem_cache_create("kcopyd_job", | 
|  | sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1), | 
|  | __alignof__(struct kcopyd_job), 0, NULL); | 
|  | if (!_job_cache) | 
|  | return -ENOMEM; | 
|  |  | 
|  | zero_page_list.next = &zero_page_list; | 
|  | zero_page_list.page = ZERO_PAGE(0); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | void dm_kcopyd_exit(void) | 
|  | { | 
|  | kmem_cache_destroy(_job_cache); | 
|  | _job_cache = NULL; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Functions to push and pop a job onto the head of a given job | 
|  | * list. | 
|  | */ | 
|  | static struct kcopyd_job *pop_io_job(struct list_head *jobs, | 
|  | struct dm_kcopyd_client *kc) | 
|  | { | 
|  | struct kcopyd_job *job; | 
|  |  | 
|  | /* | 
|  | * For I/O jobs, pop any read, any write without sequential write | 
|  | * constraint and sequential writes that are at the right position. | 
|  | */ | 
|  | list_for_each_entry(job, jobs, list) { | 
|  | if (job->op == REQ_OP_READ || | 
|  | !(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) { | 
|  | list_del(&job->list); | 
|  | return job; | 
|  | } | 
|  |  | 
|  | if (job->write_offset == job->master_job->write_offset) { | 
|  | job->master_job->write_offset += job->source.count; | 
|  | list_del(&job->list); | 
|  | return job; | 
|  | } | 
|  | } | 
|  |  | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static struct kcopyd_job *pop(struct list_head *jobs, | 
|  | struct dm_kcopyd_client *kc) | 
|  | { | 
|  | struct kcopyd_job *job = NULL; | 
|  |  | 
|  | spin_lock_irq(&kc->job_lock); | 
|  |  | 
|  | if (!list_empty(jobs)) { | 
|  | if (jobs == &kc->io_jobs) | 
|  | job = pop_io_job(jobs, kc); | 
|  | else { | 
|  | job = list_entry(jobs->next, struct kcopyd_job, list); | 
|  | list_del(&job->list); | 
|  | } | 
|  | } | 
|  | spin_unlock_irq(&kc->job_lock); | 
|  |  | 
|  | return job; | 
|  | } | 
|  |  | 
|  | static void push(struct list_head *jobs, struct kcopyd_job *job) | 
|  | { | 
|  | unsigned long flags; | 
|  | struct dm_kcopyd_client *kc = job->kc; | 
|  |  | 
|  | spin_lock_irqsave(&kc->job_lock, flags); | 
|  | list_add_tail(&job->list, jobs); | 
|  | spin_unlock_irqrestore(&kc->job_lock, flags); | 
|  | } | 
|  |  | 
|  |  | 
|  | static void push_head(struct list_head *jobs, struct kcopyd_job *job) | 
|  | { | 
|  | struct dm_kcopyd_client *kc = job->kc; | 
|  |  | 
|  | spin_lock_irq(&kc->job_lock); | 
|  | list_add(&job->list, jobs); | 
|  | spin_unlock_irq(&kc->job_lock); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * These three functions process 1 item from the corresponding | 
|  | * job list. | 
|  | * | 
|  | * They return: | 
|  | * < 0: error | 
|  | *   0: success | 
|  | * > 0: can't process yet. | 
|  | */ | 
|  | static int run_complete_job(struct kcopyd_job *job) | 
|  | { | 
|  | void *context = job->context; | 
|  | int read_err = job->read_err; | 
|  | unsigned long write_err = job->write_err; | 
|  | dm_kcopyd_notify_fn fn = job->fn; | 
|  | struct dm_kcopyd_client *kc = job->kc; | 
|  |  | 
|  | if (job->pages && job->pages != &zero_page_list) | 
|  | kcopyd_put_pages(kc, job->pages); | 
|  | /* | 
|  | * If this is the master job, the sub jobs have already | 
|  | * completed so we can free everything. | 
|  | */ | 
|  | if (job->master_job == job) { | 
|  | mutex_destroy(&job->lock); | 
|  | mempool_free(job, &kc->job_pool); | 
|  | } | 
|  | fn(read_err, write_err, context); | 
|  |  | 
|  | if (atomic_dec_and_test(&kc->nr_jobs)) | 
|  | wake_up(&kc->destroyq); | 
|  |  | 
|  | cond_resched(); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void complete_io(unsigned long error, void *context) | 
|  | { | 
|  | struct kcopyd_job *job = context; | 
|  | struct dm_kcopyd_client *kc = job->kc; | 
|  |  | 
|  | io_job_finish(kc->throttle); | 
|  |  | 
|  | if (error) { | 
|  | if (op_is_write(job->op)) | 
|  | job->write_err |= error; | 
|  | else | 
|  | job->read_err = 1; | 
|  |  | 
|  | if (!(job->flags & BIT(DM_KCOPYD_IGNORE_ERROR))) { | 
|  | push(&kc->complete_jobs, job); | 
|  | wake(kc); | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (op_is_write(job->op)) | 
|  | push(&kc->complete_jobs, job); | 
|  |  | 
|  | else { | 
|  | job->op = REQ_OP_WRITE; | 
|  | push(&kc->io_jobs, job); | 
|  | } | 
|  |  | 
|  | wake(kc); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Request io on as many buffer heads as we can currently get for | 
|  | * a particular job. | 
|  | */ | 
|  | static int run_io_job(struct kcopyd_job *job) | 
|  | { | 
|  | int r; | 
|  | struct dm_io_request io_req = { | 
|  | .bi_opf = job->op, | 
|  | .mem.type = DM_IO_PAGE_LIST, | 
|  | .mem.ptr.pl = job->pages, | 
|  | .mem.offset = 0, | 
|  | .notify.fn = complete_io, | 
|  | .notify.context = job, | 
|  | .client = job->kc->io_client, | 
|  | }; | 
|  |  | 
|  | /* | 
|  | * If we need to write sequentially and some reads or writes failed, | 
|  | * no point in continuing. | 
|  | */ | 
|  | if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) && | 
|  | job->master_job->write_err) { | 
|  | job->write_err = job->master_job->write_err; | 
|  | return -EIO; | 
|  | } | 
|  |  | 
|  | io_job_start(job->kc->throttle); | 
|  |  | 
|  | if (job->op == REQ_OP_READ) | 
|  | r = dm_io(&io_req, 1, &job->source, NULL); | 
|  | else | 
|  | r = dm_io(&io_req, job->num_dests, job->dests, NULL); | 
|  |  | 
|  | return r; | 
|  | } | 
|  |  | 
|  | static int run_pages_job(struct kcopyd_job *job) | 
|  | { | 
|  | int r; | 
|  | unsigned int nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9); | 
|  |  | 
|  | r = kcopyd_get_pages(job->kc, nr_pages, &job->pages); | 
|  | if (!r) { | 
|  | /* this job is ready for io */ | 
|  | push(&job->kc->io_jobs, job); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | if (r == -ENOMEM) | 
|  | /* can't complete now */ | 
|  | return 1; | 
|  |  | 
|  | return r; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Run through a list for as long as possible.  Returns the count | 
|  | * of successful jobs. | 
|  | */ | 
|  | static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, | 
|  | int (*fn)(struct kcopyd_job *)) | 
|  | { | 
|  | struct kcopyd_job *job; | 
|  | int r, count = 0; | 
|  |  | 
|  | while ((job = pop(jobs, kc))) { | 
|  |  | 
|  | r = fn(job); | 
|  |  | 
|  | if (r < 0) { | 
|  | /* error this rogue job */ | 
|  | if (op_is_write(job->op)) | 
|  | job->write_err = (unsigned long) -1L; | 
|  | else | 
|  | job->read_err = 1; | 
|  | push(&kc->complete_jobs, job); | 
|  | wake(kc); | 
|  | break; | 
|  | } | 
|  |  | 
|  | if (r > 0) { | 
|  | /* | 
|  | * We couldn't service this job ATM, so | 
|  | * push this job back onto the list. | 
|  | */ | 
|  | push_head(jobs, job); | 
|  | break; | 
|  | } | 
|  |  | 
|  | count++; | 
|  | } | 
|  |  | 
|  | return count; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * kcopyd does this every time it's woken up. | 
|  | */ | 
|  | static void do_work(struct work_struct *work) | 
|  | { | 
|  | struct dm_kcopyd_client *kc = container_of(work, | 
|  | struct dm_kcopyd_client, kcopyd_work); | 
|  | struct blk_plug plug; | 
|  |  | 
|  | /* | 
|  | * The order that these are called is *very* important. | 
|  | * complete jobs can free some pages for pages jobs. | 
|  | * Pages jobs when successful will jump onto the io jobs | 
|  | * list.  io jobs call wake when they complete and it all | 
|  | * starts again. | 
|  | */ | 
|  | spin_lock_irq(&kc->job_lock); | 
|  | list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs); | 
|  | spin_unlock_irq(&kc->job_lock); | 
|  |  | 
|  | blk_start_plug(&plug); | 
|  | process_jobs(&kc->complete_jobs, kc, run_complete_job); | 
|  | process_jobs(&kc->pages_jobs, kc, run_pages_job); | 
|  | process_jobs(&kc->io_jobs, kc, run_io_job); | 
|  | blk_finish_plug(&plug); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * If we are copying a small region we just dispatch a single job | 
|  | * to do the copy, otherwise the io has to be split up into many | 
|  | * jobs. | 
|  | */ | 
|  | static void dispatch_job(struct kcopyd_job *job) | 
|  | { | 
|  | struct dm_kcopyd_client *kc = job->kc; | 
|  |  | 
|  | atomic_inc(&kc->nr_jobs); | 
|  | if (unlikely(!job->source.count)) | 
|  | push(&kc->callback_jobs, job); | 
|  | else if (job->pages == &zero_page_list) | 
|  | push(&kc->io_jobs, job); | 
|  | else | 
|  | push(&kc->pages_jobs, job); | 
|  | wake(kc); | 
|  | } | 
|  |  | 
|  | static void segment_complete(int read_err, unsigned long write_err, | 
|  | void *context) | 
|  | { | 
|  | /* FIXME: tidy this function */ | 
|  | sector_t progress = 0; | 
|  | sector_t count = 0; | 
|  | struct kcopyd_job *sub_job = context; | 
|  | struct kcopyd_job *job = sub_job->master_job; | 
|  | struct dm_kcopyd_client *kc = job->kc; | 
|  |  | 
|  | mutex_lock(&job->lock); | 
|  |  | 
|  | /* update the error */ | 
|  | if (read_err) | 
|  | job->read_err = 1; | 
|  |  | 
|  | if (write_err) | 
|  | job->write_err |= write_err; | 
|  |  | 
|  | /* | 
|  | * Only dispatch more work if there hasn't been an error. | 
|  | */ | 
|  | if ((!job->read_err && !job->write_err) || | 
|  | job->flags & BIT(DM_KCOPYD_IGNORE_ERROR)) { | 
|  | /* get the next chunk of work */ | 
|  | progress = job->progress; | 
|  | count = job->source.count - progress; | 
|  | if (count) { | 
|  | if (count > kc->sub_job_size) | 
|  | count = kc->sub_job_size; | 
|  |  | 
|  | job->progress += count; | 
|  | } | 
|  | } | 
|  | mutex_unlock(&job->lock); | 
|  |  | 
|  | if (count) { | 
|  | int i; | 
|  |  | 
|  | *sub_job = *job; | 
|  | sub_job->write_offset = progress; | 
|  | sub_job->source.sector += progress; | 
|  | sub_job->source.count = count; | 
|  |  | 
|  | for (i = 0; i < job->num_dests; i++) { | 
|  | sub_job->dests[i].sector += progress; | 
|  | sub_job->dests[i].count = count; | 
|  | } | 
|  |  | 
|  | sub_job->fn = segment_complete; | 
|  | sub_job->context = sub_job; | 
|  | dispatch_job(sub_job); | 
|  |  | 
|  | } else if (atomic_dec_and_test(&job->sub_jobs)) { | 
|  |  | 
|  | /* | 
|  | * Queue the completion callback to the kcopyd thread. | 
|  | * | 
|  | * Some callers assume that all the completions are called | 
|  | * from a single thread and don't race with each other. | 
|  | * | 
|  | * We must not call the callback directly here because this | 
|  | * code may not be executing in the thread. | 
|  | */ | 
|  | push(&kc->complete_jobs, job); | 
|  | wake(kc); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Create some sub jobs to share the work between them. | 
|  | */ | 
|  | static void split_job(struct kcopyd_job *master_job) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | atomic_inc(&master_job->kc->nr_jobs); | 
|  |  | 
|  | atomic_set(&master_job->sub_jobs, SPLIT_COUNT); | 
|  | for (i = 0; i < SPLIT_COUNT; i++) { | 
|  | master_job[i + 1].master_job = master_job; | 
|  | segment_complete(0, 0u, &master_job[i + 1]); | 
|  | } | 
|  | } | 
|  |  | 
|  | void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, | 
|  | unsigned int num_dests, struct dm_io_region *dests, | 
|  | unsigned int flags, dm_kcopyd_notify_fn fn, void *context) | 
|  | { | 
|  | struct kcopyd_job *job; | 
|  | int i; | 
|  |  | 
|  | /* | 
|  | * Allocate an array of jobs consisting of one master job | 
|  | * followed by SPLIT_COUNT sub jobs. | 
|  | */ | 
|  | job = mempool_alloc(&kc->job_pool, GFP_NOIO); | 
|  | mutex_init(&job->lock); | 
|  |  | 
|  | /* | 
|  | * set up for the read. | 
|  | */ | 
|  | job->kc = kc; | 
|  | job->flags = flags; | 
|  | job->read_err = 0; | 
|  | job->write_err = 0; | 
|  |  | 
|  | job->num_dests = num_dests; | 
|  | memcpy(&job->dests, dests, sizeof(*dests) * num_dests); | 
|  |  | 
|  | /* | 
|  | * If one of the destination is a host-managed zoned block device, | 
|  | * we need to write sequentially. If one of the destination is a | 
|  | * host-aware device, then leave it to the caller to choose what to do. | 
|  | */ | 
|  | if (!(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) { | 
|  | for (i = 0; i < job->num_dests; i++) { | 
|  | if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) { | 
|  | job->flags |= BIT(DM_KCOPYD_WRITE_SEQ); | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * If we need to write sequentially, errors cannot be ignored. | 
|  | */ | 
|  | if (job->flags & BIT(DM_KCOPYD_WRITE_SEQ) && | 
|  | job->flags & BIT(DM_KCOPYD_IGNORE_ERROR)) | 
|  | job->flags &= ~BIT(DM_KCOPYD_IGNORE_ERROR); | 
|  |  | 
|  | if (from) { | 
|  | job->source = *from; | 
|  | job->pages = NULL; | 
|  | job->op = REQ_OP_READ; | 
|  | } else { | 
|  | memset(&job->source, 0, sizeof(job->source)); | 
|  | job->source.count = job->dests[0].count; | 
|  | job->pages = &zero_page_list; | 
|  |  | 
|  | /* | 
|  | * Use WRITE ZEROES to optimize zeroing if all dests support it. | 
|  | */ | 
|  | job->op = REQ_OP_WRITE_ZEROES; | 
|  | for (i = 0; i < job->num_dests; i++) | 
|  | if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) { | 
|  | job->op = REQ_OP_WRITE; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | job->fn = fn; | 
|  | job->context = context; | 
|  | job->master_job = job; | 
|  | job->write_offset = 0; | 
|  |  | 
|  | if (job->source.count <= kc->sub_job_size) | 
|  | dispatch_job(job); | 
|  | else { | 
|  | job->progress = 0; | 
|  | split_job(job); | 
|  | } | 
|  | } | 
|  | EXPORT_SYMBOL(dm_kcopyd_copy); | 
|  |  | 
|  | void dm_kcopyd_zero(struct dm_kcopyd_client *kc, | 
|  | unsigned int num_dests, struct dm_io_region *dests, | 
|  | unsigned int flags, dm_kcopyd_notify_fn fn, void *context) | 
|  | { | 
|  | dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context); | 
|  | } | 
|  | EXPORT_SYMBOL(dm_kcopyd_zero); | 
|  |  | 
|  | void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, | 
|  | dm_kcopyd_notify_fn fn, void *context) | 
|  | { | 
|  | struct kcopyd_job *job; | 
|  |  | 
|  | job = mempool_alloc(&kc->job_pool, GFP_NOIO); | 
|  |  | 
|  | memset(job, 0, sizeof(struct kcopyd_job)); | 
|  | job->kc = kc; | 
|  | job->fn = fn; | 
|  | job->context = context; | 
|  | job->master_job = job; | 
|  |  | 
|  | atomic_inc(&kc->nr_jobs); | 
|  |  | 
|  | return job; | 
|  | } | 
|  | EXPORT_SYMBOL(dm_kcopyd_prepare_callback); | 
|  |  | 
|  | void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err) | 
|  | { | 
|  | struct kcopyd_job *job = j; | 
|  | struct dm_kcopyd_client *kc = job->kc; | 
|  |  | 
|  | job->read_err = read_err; | 
|  | job->write_err = write_err; | 
|  |  | 
|  | push(&kc->callback_jobs, job); | 
|  | wake(kc); | 
|  | } | 
|  | EXPORT_SYMBOL(dm_kcopyd_do_callback); | 
|  |  | 
|  | /* | 
|  | * Cancels a kcopyd job, eg. someone might be deactivating a | 
|  | * mirror. | 
|  | */ | 
|  | #if 0 | 
|  | int kcopyd_cancel(struct kcopyd_job *job, int block) | 
|  | { | 
|  | /* FIXME: finish */ | 
|  | return -1; | 
|  | } | 
|  | #endif  /*  0  */ | 
|  |  | 
|  | /* | 
|  | *--------------------------------------------------------------- | 
|  | * Client setup | 
|  | *--------------------------------------------------------------- | 
|  | */ | 
|  | struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle) | 
|  | { | 
|  | int r; | 
|  | unsigned int reserve_pages; | 
|  | struct dm_kcopyd_client *kc; | 
|  |  | 
|  | kc = kzalloc(sizeof(*kc), GFP_KERNEL); | 
|  | if (!kc) | 
|  | return ERR_PTR(-ENOMEM); | 
|  |  | 
|  | spin_lock_init(&kc->job_lock); | 
|  | INIT_LIST_HEAD(&kc->callback_jobs); | 
|  | INIT_LIST_HEAD(&kc->complete_jobs); | 
|  | INIT_LIST_HEAD(&kc->io_jobs); | 
|  | INIT_LIST_HEAD(&kc->pages_jobs); | 
|  | kc->throttle = throttle; | 
|  |  | 
|  | r = mempool_init_slab_pool(&kc->job_pool, MIN_JOBS, _job_cache); | 
|  | if (r) | 
|  | goto bad_slab; | 
|  |  | 
|  | INIT_WORK(&kc->kcopyd_work, do_work); | 
|  | kc->kcopyd_wq = alloc_workqueue("kcopyd", WQ_MEM_RECLAIM, 0); | 
|  | if (!kc->kcopyd_wq) { | 
|  | r = -ENOMEM; | 
|  | goto bad_workqueue; | 
|  | } | 
|  |  | 
|  | kc->sub_job_size = dm_get_kcopyd_subjob_size(); | 
|  | reserve_pages = DIV_ROUND_UP(kc->sub_job_size << SECTOR_SHIFT, PAGE_SIZE); | 
|  |  | 
|  | kc->pages = NULL; | 
|  | kc->nr_reserved_pages = kc->nr_free_pages = 0; | 
|  | r = client_reserve_pages(kc, reserve_pages); | 
|  | if (r) | 
|  | goto bad_client_pages; | 
|  |  | 
|  | kc->io_client = dm_io_client_create(); | 
|  | if (IS_ERR(kc->io_client)) { | 
|  | r = PTR_ERR(kc->io_client); | 
|  | goto bad_io_client; | 
|  | } | 
|  |  | 
|  | init_waitqueue_head(&kc->destroyq); | 
|  | atomic_set(&kc->nr_jobs, 0); | 
|  |  | 
|  | return kc; | 
|  |  | 
|  | bad_io_client: | 
|  | client_free_pages(kc); | 
|  | bad_client_pages: | 
|  | destroy_workqueue(kc->kcopyd_wq); | 
|  | bad_workqueue: | 
|  | mempool_exit(&kc->job_pool); | 
|  | bad_slab: | 
|  | kfree(kc); | 
|  |  | 
|  | return ERR_PTR(r); | 
|  | } | 
|  | EXPORT_SYMBOL(dm_kcopyd_client_create); | 
|  |  | 
|  | void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) | 
|  | { | 
|  | /* Wait for completion of all jobs submitted by this client. */ | 
|  | wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); | 
|  |  | 
|  | BUG_ON(!list_empty(&kc->callback_jobs)); | 
|  | BUG_ON(!list_empty(&kc->complete_jobs)); | 
|  | BUG_ON(!list_empty(&kc->io_jobs)); | 
|  | BUG_ON(!list_empty(&kc->pages_jobs)); | 
|  | destroy_workqueue(kc->kcopyd_wq); | 
|  | dm_io_client_destroy(kc->io_client); | 
|  | client_free_pages(kc); | 
|  | mempool_exit(&kc->job_pool); | 
|  | kfree(kc); | 
|  | } | 
|  | EXPORT_SYMBOL(dm_kcopyd_client_destroy); | 
|  |  | 
|  | void dm_kcopyd_client_flush(struct dm_kcopyd_client *kc) | 
|  | { | 
|  | flush_workqueue(kc->kcopyd_wq); | 
|  | } | 
|  | EXPORT_SYMBOL(dm_kcopyd_client_flush); |