| // SPDX-License-Identifier: GPL-2.0 | 
 | /* | 
 |  * Bad block management | 
 |  * | 
 |  * - Heavily based on MD badblocks code from Neil Brown | 
 |  * | 
 |  * Copyright (c) 2015, Intel Corporation. | 
 |  */ | 
 |  | 
 | #include <linux/badblocks.h> | 
 | #include <linux/seqlock.h> | 
 | #include <linux/device.h> | 
 | #include <linux/kernel.h> | 
 | #include <linux/module.h> | 
 | #include <linux/stddef.h> | 
 | #include <linux/types.h> | 
 | #include <linux/slab.h> | 
 |  | 
 | /** | 
 |  * badblocks_check() - check a given range for bad sectors | 
 |  * @bb:		the badblocks structure that holds all badblock information | 
 |  * @s:		sector (start) at which to check for badblocks | 
 |  * @sectors:	number of sectors to check for badblocks | 
 |  * @first_bad:	pointer to store location of the first badblock | 
 |  * @bad_sectors: pointer to store number of badblocks after @first_bad | 
 |  * | 
 |  * We can record which blocks on each device are 'bad' and so just | 
 |  * fail those blocks, or that stripe, rather than the whole device. | 
 |  * Entries in the bad-block table are 64bits wide.  This comprises: | 
 |  * Length of bad-range, in sectors: 0-511 for lengths 1-512 | 
 |  * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) | 
 |  *  A 'shift' can be set so that larger blocks are tracked and | 
 |  *  consequently larger devices can be covered. | 
 |  * 'Acknowledged' flag - 1 bit. - the most significant bit. | 
 |  * | 
 |  * Locking of the bad-block table uses a seqlock so badblocks_check | 
 |  * might need to retry if it is very unlucky. | 
 |  * We will sometimes want to check for bad blocks in a bi_end_io function, | 
 |  * so we use the write_seqlock_irq variant. | 
 |  * | 
 |  * When looking for a bad block we specify a range and want to | 
 |  * know if any block in the range is bad.  So we binary-search | 
 |  * to the last range that starts at-or-before the given endpoint, | 
 |  * (or "before the sector after the target range") | 
 |  * then see if it ends after the given start. | 
 |  * | 
 |  * Return: | 
 |  *  0: there are no known bad blocks in the range | 
 |  *  1: there are known bad block which are all acknowledged | 
 |  * -1: there are bad blocks which have not yet been acknowledged in metadata. | 
 |  * plus the start/length of the first bad section we overlap. | 
 |  */ | 
 | int badblocks_check(struct badblocks *bb, sector_t s, int sectors, | 
 | 			sector_t *first_bad, int *bad_sectors) | 
 | { | 
 | 	int hi; | 
 | 	int lo; | 
 | 	u64 *p = bb->page; | 
 | 	int rv; | 
 | 	sector_t target = s + sectors; | 
 | 	unsigned seq; | 
 |  | 
 | 	if (bb->shift > 0) { | 
 | 		/* round the start down, and the end up */ | 
 | 		s >>= bb->shift; | 
 | 		target += (1<<bb->shift) - 1; | 
 | 		target >>= bb->shift; | 
 | 		sectors = target - s; | 
 | 	} | 
 | 	/* 'target' is now the first block after the bad range */ | 
 |  | 
 | retry: | 
 | 	seq = read_seqbegin(&bb->lock); | 
 | 	lo = 0; | 
 | 	rv = 0; | 
 | 	hi = bb->count; | 
 |  | 
 | 	/* Binary search between lo and hi for 'target' | 
 | 	 * i.e. for the last range that starts before 'target' | 
 | 	 */ | 
 | 	/* INVARIANT: ranges before 'lo' and at-or-after 'hi' | 
 | 	 * are known not to be the last range before target. | 
 | 	 * VARIANT: hi-lo is the number of possible | 
 | 	 * ranges, and decreases until it reaches 1 | 
 | 	 */ | 
 | 	while (hi - lo > 1) { | 
 | 		int mid = (lo + hi) / 2; | 
 | 		sector_t a = BB_OFFSET(p[mid]); | 
 |  | 
 | 		if (a < target) | 
 | 			/* This could still be the one, earlier ranges | 
 | 			 * could not. | 
 | 			 */ | 
 | 			lo = mid; | 
 | 		else | 
 | 			/* This and later ranges are definitely out. */ | 
 | 			hi = mid; | 
 | 	} | 
 | 	/* 'lo' might be the last that started before target, but 'hi' isn't */ | 
 | 	if (hi > lo) { | 
 | 		/* need to check all range that end after 's' to see if | 
 | 		 * any are unacknowledged. | 
 | 		 */ | 
 | 		while (lo >= 0 && | 
 | 		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { | 
 | 			if (BB_OFFSET(p[lo]) < target) { | 
 | 				/* starts before the end, and finishes after | 
 | 				 * the start, so they must overlap | 
 | 				 */ | 
 | 				if (rv != -1 && BB_ACK(p[lo])) | 
 | 					rv = 1; | 
 | 				else | 
 | 					rv = -1; | 
 | 				*first_bad = BB_OFFSET(p[lo]); | 
 | 				*bad_sectors = BB_LEN(p[lo]); | 
 | 			} | 
 | 			lo--; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	if (read_seqretry(&bb->lock, seq)) | 
 | 		goto retry; | 
 |  | 
 | 	return rv; | 
 | } | 
 | EXPORT_SYMBOL_GPL(badblocks_check); | 
 |  | 
 | static void badblocks_update_acked(struct badblocks *bb) | 
 | { | 
 | 	u64 *p = bb->page; | 
 | 	int i; | 
 | 	bool unacked = false; | 
 |  | 
 | 	if (!bb->unacked_exist) | 
 | 		return; | 
 |  | 
 | 	for (i = 0; i < bb->count ; i++) { | 
 | 		if (!BB_ACK(p[i])) { | 
 | 			unacked = true; | 
 | 			break; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	if (!unacked) | 
 | 		bb->unacked_exist = 0; | 
 | } | 
 |  | 
 | /** | 
 |  * badblocks_set() - Add a range of bad blocks to the table. | 
 |  * @bb:		the badblocks structure that holds all badblock information | 
 |  * @s:		first sector to mark as bad | 
 |  * @sectors:	number of sectors to mark as bad | 
 |  * @acknowledged: weather to mark the bad sectors as acknowledged | 
 |  * | 
 |  * This might extend the table, or might contract it if two adjacent ranges | 
 |  * can be merged. We binary-search to find the 'insertion' point, then | 
 |  * decide how best to handle it. | 
 |  * | 
 |  * Return: | 
 |  *  0: success | 
 |  *  1: failed to set badblocks (out of space) | 
 |  */ | 
 | int badblocks_set(struct badblocks *bb, sector_t s, int sectors, | 
 | 			int acknowledged) | 
 | { | 
 | 	u64 *p; | 
 | 	int lo, hi; | 
 | 	int rv = 0; | 
 | 	unsigned long flags; | 
 |  | 
 | 	if (bb->shift < 0) | 
 | 		/* badblocks are disabled */ | 
 | 		return 1; | 
 |  | 
 | 	if (bb->shift) { | 
 | 		/* round the start down, and the end up */ | 
 | 		sector_t next = s + sectors; | 
 |  | 
 | 		s >>= bb->shift; | 
 | 		next += (1<<bb->shift) - 1; | 
 | 		next >>= bb->shift; | 
 | 		sectors = next - s; | 
 | 	} | 
 |  | 
 | 	write_seqlock_irqsave(&bb->lock, flags); | 
 |  | 
 | 	p = bb->page; | 
 | 	lo = 0; | 
 | 	hi = bb->count; | 
 | 	/* Find the last range that starts at-or-before 's' */ | 
 | 	while (hi - lo > 1) { | 
 | 		int mid = (lo + hi) / 2; | 
 | 		sector_t a = BB_OFFSET(p[mid]); | 
 |  | 
 | 		if (a <= s) | 
 | 			lo = mid; | 
 | 		else | 
 | 			hi = mid; | 
 | 	} | 
 | 	if (hi > lo && BB_OFFSET(p[lo]) > s) | 
 | 		hi = lo; | 
 |  | 
 | 	if (hi > lo) { | 
 | 		/* we found a range that might merge with the start | 
 | 		 * of our new range | 
 | 		 */ | 
 | 		sector_t a = BB_OFFSET(p[lo]); | 
 | 		sector_t e = a + BB_LEN(p[lo]); | 
 | 		int ack = BB_ACK(p[lo]); | 
 |  | 
 | 		if (e >= s) { | 
 | 			/* Yes, we can merge with a previous range */ | 
 | 			if (s == a && s + sectors >= e) | 
 | 				/* new range covers old */ | 
 | 				ack = acknowledged; | 
 | 			else | 
 | 				ack = ack && acknowledged; | 
 |  | 
 | 			if (e < s + sectors) | 
 | 				e = s + sectors; | 
 | 			if (e - a <= BB_MAX_LEN) { | 
 | 				p[lo] = BB_MAKE(a, e-a, ack); | 
 | 				s = e; | 
 | 			} else { | 
 | 				/* does not all fit in one range, | 
 | 				 * make p[lo] maximal | 
 | 				 */ | 
 | 				if (BB_LEN(p[lo]) != BB_MAX_LEN) | 
 | 					p[lo] = BB_MAKE(a, BB_MAX_LEN, ack); | 
 | 				s = a + BB_MAX_LEN; | 
 | 			} | 
 | 			sectors = e - s; | 
 | 		} | 
 | 	} | 
 | 	if (sectors && hi < bb->count) { | 
 | 		/* 'hi' points to the first range that starts after 's'. | 
 | 		 * Maybe we can merge with the start of that range | 
 | 		 */ | 
 | 		sector_t a = BB_OFFSET(p[hi]); | 
 | 		sector_t e = a + BB_LEN(p[hi]); | 
 | 		int ack = BB_ACK(p[hi]); | 
 |  | 
 | 		if (a <= s + sectors) { | 
 | 			/* merging is possible */ | 
 | 			if (e <= s + sectors) { | 
 | 				/* full overlap */ | 
 | 				e = s + sectors; | 
 | 				ack = acknowledged; | 
 | 			} else | 
 | 				ack = ack && acknowledged; | 
 |  | 
 | 			a = s; | 
 | 			if (e - a <= BB_MAX_LEN) { | 
 | 				p[hi] = BB_MAKE(a, e-a, ack); | 
 | 				s = e; | 
 | 			} else { | 
 | 				p[hi] = BB_MAKE(a, BB_MAX_LEN, ack); | 
 | 				s = a + BB_MAX_LEN; | 
 | 			} | 
 | 			sectors = e - s; | 
 | 			lo = hi; | 
 | 			hi++; | 
 | 		} | 
 | 	} | 
 | 	if (sectors == 0 && hi < bb->count) { | 
 | 		/* we might be able to combine lo and hi */ | 
 | 		/* Note: 's' is at the end of 'lo' */ | 
 | 		sector_t a = BB_OFFSET(p[hi]); | 
 | 		int lolen = BB_LEN(p[lo]); | 
 | 		int hilen = BB_LEN(p[hi]); | 
 | 		int newlen = lolen + hilen - (s - a); | 
 |  | 
 | 		if (s >= a && newlen < BB_MAX_LEN) { | 
 | 			/* yes, we can combine them */ | 
 | 			int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]); | 
 |  | 
 | 			p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack); | 
 | 			memmove(p + hi, p + hi + 1, | 
 | 				(bb->count - hi - 1) * 8); | 
 | 			bb->count--; | 
 | 		} | 
 | 	} | 
 | 	while (sectors) { | 
 | 		/* didn't merge (it all). | 
 | 		 * Need to add a range just before 'hi' | 
 | 		 */ | 
 | 		if (bb->count >= MAX_BADBLOCKS) { | 
 | 			/* No room for more */ | 
 | 			rv = 1; | 
 | 			break; | 
 | 		} else { | 
 | 			int this_sectors = sectors; | 
 |  | 
 | 			memmove(p + hi + 1, p + hi, | 
 | 				(bb->count - hi) * 8); | 
 | 			bb->count++; | 
 |  | 
 | 			if (this_sectors > BB_MAX_LEN) | 
 | 				this_sectors = BB_MAX_LEN; | 
 | 			p[hi] = BB_MAKE(s, this_sectors, acknowledged); | 
 | 			sectors -= this_sectors; | 
 | 			s += this_sectors; | 
 | 		} | 
 | 	} | 
 |  | 
 | 	bb->changed = 1; | 
 | 	if (!acknowledged) | 
 | 		bb->unacked_exist = 1; | 
 | 	else | 
 | 		badblocks_update_acked(bb); | 
 | 	write_sequnlock_irqrestore(&bb->lock, flags); | 
 |  | 
 | 	return rv; | 
 | } | 
 | EXPORT_SYMBOL_GPL(badblocks_set); | 
 |  | 
 | /** | 
 |  * badblocks_clear() - Remove a range of bad blocks to the table. | 
 |  * @bb:		the badblocks structure that holds all badblock information | 
 |  * @s:		first sector to mark as bad | 
 |  * @sectors:	number of sectors to mark as bad | 
 |  * | 
 |  * This may involve extending the table if we spilt a region, | 
 |  * but it must not fail.  So if the table becomes full, we just | 
 |  * drop the remove request. | 
 |  * | 
 |  * Return: | 
 |  *  0: success | 
 |  *  1: failed to clear badblocks | 
 |  */ | 
 | int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) | 
 | { | 
 | 	u64 *p; | 
 | 	int lo, hi; | 
 | 	sector_t target = s + sectors; | 
 | 	int rv = 0; | 
 |  | 
 | 	if (bb->shift > 0) { | 
 | 		/* When clearing we round the start up and the end down. | 
 | 		 * This should not matter as the shift should align with | 
 | 		 * the block size and no rounding should ever be needed. | 
 | 		 * However it is better the think a block is bad when it | 
 | 		 * isn't than to think a block is not bad when it is. | 
 | 		 */ | 
 | 		s += (1<<bb->shift) - 1; | 
 | 		s >>= bb->shift; | 
 | 		target >>= bb->shift; | 
 | 		sectors = target - s; | 
 | 	} | 
 |  | 
 | 	write_seqlock_irq(&bb->lock); | 
 |  | 
 | 	p = bb->page; | 
 | 	lo = 0; | 
 | 	hi = bb->count; | 
 | 	/* Find the last range that starts before 'target' */ | 
 | 	while (hi - lo > 1) { | 
 | 		int mid = (lo + hi) / 2; | 
 | 		sector_t a = BB_OFFSET(p[mid]); | 
 |  | 
 | 		if (a < target) | 
 | 			lo = mid; | 
 | 		else | 
 | 			hi = mid; | 
 | 	} | 
 | 	if (hi > lo) { | 
 | 		/* p[lo] is the last range that could overlap the | 
 | 		 * current range.  Earlier ranges could also overlap, | 
 | 		 * but only this one can overlap the end of the range. | 
 | 		 */ | 
 | 		if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) && | 
 | 		    (BB_OFFSET(p[lo]) < target)) { | 
 | 			/* Partial overlap, leave the tail of this range */ | 
 | 			int ack = BB_ACK(p[lo]); | 
 | 			sector_t a = BB_OFFSET(p[lo]); | 
 | 			sector_t end = a + BB_LEN(p[lo]); | 
 |  | 
 | 			if (a < s) { | 
 | 				/* we need to split this range */ | 
 | 				if (bb->count >= MAX_BADBLOCKS) { | 
 | 					rv = -ENOSPC; | 
 | 					goto out; | 
 | 				} | 
 | 				memmove(p+lo+1, p+lo, (bb->count - lo) * 8); | 
 | 				bb->count++; | 
 | 				p[lo] = BB_MAKE(a, s-a, ack); | 
 | 				lo++; | 
 | 			} | 
 | 			p[lo] = BB_MAKE(target, end - target, ack); | 
 | 			/* there is no longer an overlap */ | 
 | 			hi = lo; | 
 | 			lo--; | 
 | 		} | 
 | 		while (lo >= 0 && | 
 | 		       (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) && | 
 | 		       (BB_OFFSET(p[lo]) < target)) { | 
 | 			/* This range does overlap */ | 
 | 			if (BB_OFFSET(p[lo]) < s) { | 
 | 				/* Keep the early parts of this range. */ | 
 | 				int ack = BB_ACK(p[lo]); | 
 | 				sector_t start = BB_OFFSET(p[lo]); | 
 |  | 
 | 				p[lo] = BB_MAKE(start, s - start, ack); | 
 | 				/* now low doesn't overlap, so.. */ | 
 | 				break; | 
 | 			} | 
 | 			lo--; | 
 | 		} | 
 | 		/* 'lo' is strictly before, 'hi' is strictly after, | 
 | 		 * anything between needs to be discarded | 
 | 		 */ | 
 | 		if (hi - lo > 1) { | 
 | 			memmove(p+lo+1, p+hi, (bb->count - hi) * 8); | 
 | 			bb->count -= (hi - lo - 1); | 
 | 		} | 
 | 	} | 
 |  | 
 | 	badblocks_update_acked(bb); | 
 | 	bb->changed = 1; | 
 | out: | 
 | 	write_sequnlock_irq(&bb->lock); | 
 | 	return rv; | 
 | } | 
 | EXPORT_SYMBOL_GPL(badblocks_clear); | 
 |  | 
 | /** | 
 |  * ack_all_badblocks() - Acknowledge all bad blocks in a list. | 
 |  * @bb:		the badblocks structure that holds all badblock information | 
 |  * | 
 |  * This only succeeds if ->changed is clear.  It is used by | 
 |  * in-kernel metadata updates | 
 |  */ | 
 | void ack_all_badblocks(struct badblocks *bb) | 
 | { | 
 | 	if (bb->page == NULL || bb->changed) | 
 | 		/* no point even trying */ | 
 | 		return; | 
 | 	write_seqlock_irq(&bb->lock); | 
 |  | 
 | 	if (bb->changed == 0 && bb->unacked_exist) { | 
 | 		u64 *p = bb->page; | 
 | 		int i; | 
 |  | 
 | 		for (i = 0; i < bb->count ; i++) { | 
 | 			if (!BB_ACK(p[i])) { | 
 | 				sector_t start = BB_OFFSET(p[i]); | 
 | 				int len = BB_LEN(p[i]); | 
 |  | 
 | 				p[i] = BB_MAKE(start, len, 1); | 
 | 			} | 
 | 		} | 
 | 		bb->unacked_exist = 0; | 
 | 	} | 
 | 	write_sequnlock_irq(&bb->lock); | 
 | } | 
 | EXPORT_SYMBOL_GPL(ack_all_badblocks); | 
 |  | 
 | /** | 
 |  * badblocks_show() - sysfs access to bad-blocks list | 
 |  * @bb:		the badblocks structure that holds all badblock information | 
 |  * @page:	buffer received from sysfs | 
 |  * @unack:	weather to show unacknowledged badblocks | 
 |  * | 
 |  * Return: | 
 |  *  Length of returned data | 
 |  */ | 
 | ssize_t badblocks_show(struct badblocks *bb, char *page, int unack) | 
 | { | 
 | 	size_t len; | 
 | 	int i; | 
 | 	u64 *p = bb->page; | 
 | 	unsigned seq; | 
 |  | 
 | 	if (bb->shift < 0) | 
 | 		return 0; | 
 |  | 
 | retry: | 
 | 	seq = read_seqbegin(&bb->lock); | 
 |  | 
 | 	len = 0; | 
 | 	i = 0; | 
 |  | 
 | 	while (len < PAGE_SIZE && i < bb->count) { | 
 | 		sector_t s = BB_OFFSET(p[i]); | 
 | 		unsigned int length = BB_LEN(p[i]); | 
 | 		int ack = BB_ACK(p[i]); | 
 |  | 
 | 		i++; | 
 |  | 
 | 		if (unack && ack) | 
 | 			continue; | 
 |  | 
 | 		len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", | 
 | 				(unsigned long long)s << bb->shift, | 
 | 				length << bb->shift); | 
 | 	} | 
 | 	if (unack && len == 0) | 
 | 		bb->unacked_exist = 0; | 
 |  | 
 | 	if (read_seqretry(&bb->lock, seq)) | 
 | 		goto retry; | 
 |  | 
 | 	return len; | 
 | } | 
 | EXPORT_SYMBOL_GPL(badblocks_show); | 
 |  | 
 | /** | 
 |  * badblocks_store() - sysfs access to bad-blocks list | 
 |  * @bb:		the badblocks structure that holds all badblock information | 
 |  * @page:	buffer received from sysfs | 
 |  * @len:	length of data received from sysfs | 
 |  * @unack:	weather to show unacknowledged badblocks | 
 |  * | 
 |  * Return: | 
 |  *  Length of the buffer processed or -ve error. | 
 |  */ | 
 | ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, | 
 | 			int unack) | 
 | { | 
 | 	unsigned long long sector; | 
 | 	int length; | 
 | 	char newline; | 
 |  | 
 | 	switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { | 
 | 	case 3: | 
 | 		if (newline != '\n') | 
 | 			return -EINVAL; | 
 | 		fallthrough; | 
 | 	case 2: | 
 | 		if (length <= 0) | 
 | 			return -EINVAL; | 
 | 		break; | 
 | 	default: | 
 | 		return -EINVAL; | 
 | 	} | 
 |  | 
 | 	if (badblocks_set(bb, sector, length, !unack)) | 
 | 		return -ENOSPC; | 
 | 	else | 
 | 		return len; | 
 | } | 
 | EXPORT_SYMBOL_GPL(badblocks_store); | 
 |  | 
 | static int __badblocks_init(struct device *dev, struct badblocks *bb, | 
 | 		int enable) | 
 | { | 
 | 	bb->dev = dev; | 
 | 	bb->count = 0; | 
 | 	if (enable) | 
 | 		bb->shift = 0; | 
 | 	else | 
 | 		bb->shift = -1; | 
 | 	if (dev) | 
 | 		bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL); | 
 | 	else | 
 | 		bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL); | 
 | 	if (!bb->page) { | 
 | 		bb->shift = -1; | 
 | 		return -ENOMEM; | 
 | 	} | 
 | 	seqlock_init(&bb->lock); | 
 |  | 
 | 	return 0; | 
 | } | 
 |  | 
 | /** | 
 |  * badblocks_init() - initialize the badblocks structure | 
 |  * @bb:		the badblocks structure that holds all badblock information | 
 |  * @enable:	weather to enable badblocks accounting | 
 |  * | 
 |  * Return: | 
 |  *  0: success | 
 |  *  -ve errno: on error | 
 |  */ | 
 | int badblocks_init(struct badblocks *bb, int enable) | 
 | { | 
 | 	return __badblocks_init(NULL, bb, enable); | 
 | } | 
 | EXPORT_SYMBOL_GPL(badblocks_init); | 
 |  | 
 | int devm_init_badblocks(struct device *dev, struct badblocks *bb) | 
 | { | 
 | 	if (!bb) | 
 | 		return -EINVAL; | 
 | 	return __badblocks_init(dev, bb, 1); | 
 | } | 
 | EXPORT_SYMBOL_GPL(devm_init_badblocks); | 
 |  | 
 | /** | 
 |  * badblocks_exit() - free the badblocks structure | 
 |  * @bb:		the badblocks structure that holds all badblock information | 
 |  */ | 
 | void badblocks_exit(struct badblocks *bb) | 
 | { | 
 | 	if (!bb) | 
 | 		return; | 
 | 	if (bb->dev) | 
 | 		devm_kfree(bb->dev, bb->page); | 
 | 	else | 
 | 		kfree(bb->page); | 
 | 	bb->page = NULL; | 
 | } | 
 | EXPORT_SYMBOL_GPL(badblocks_exit); |