| /* SPDX-License-Identifier: GPL-2.0-or-later */ |
| /* |
| * Queued spinlock defines |
| * |
| * This file contains macro definitions and functions shared between different |
| * qspinlock slow path implementations. |
| */ |
| #ifndef __LINUX_QSPINLOCK_H |
| #define __LINUX_QSPINLOCK_H |
| |
| #include <asm-generic/percpu.h> |
| #include <linux/percpu-defs.h> |
| #include <asm-generic/qspinlock.h> |
| #include <asm-generic/mcs_spinlock.h> |
| |
| #define _Q_MAX_NODES 4 |
| |
| /* |
| * The pending bit spinning loop count. |
| * This heuristic is used to limit the number of lockword accesses |
| * made by atomic_cond_read_relaxed when waiting for the lock to |
| * transition out of the "== _Q_PENDING_VAL" state. We don't spin |
| * indefinitely because there's no guarantee that we'll make forward |
| * progress. |
| */ |
| #ifndef _Q_PENDING_LOOPS |
| #define _Q_PENDING_LOOPS 1 |
| #endif |
| |
| /* |
| * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in |
| * size and four of them will fit nicely in one 64-byte cacheline. For |
| * pvqspinlock, however, we need more space for extra data. To accommodate |
| * that, we insert two more long words to pad it up to 32 bytes. IOW, only |
| * two of them can fit in a cacheline in this case. That is OK as it is rare |
| * to have more than 2 levels of slowpath nesting in actual use. We don't |
| * want to penalize pvqspinlocks to optimize for a rare case in native |
| * qspinlocks. |
| */ |
| struct qnode { |
| struct mcs_spinlock mcs; |
| #ifdef CONFIG_PARAVIRT_SPINLOCKS |
| long reserved[2]; |
| #endif |
| }; |
| |
| /* |
| * We must be able to distinguish between no-tail and the tail at 0:0, |
| * therefore increment the cpu number by one. |
| */ |
| |
| static inline __pure u32 encode_tail(int cpu, int idx) |
| { |
| u32 tail; |
| |
| tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET; |
| tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */ |
| |
| return tail; |
| } |
| |
| static inline __pure struct mcs_spinlock *decode_tail(u32 tail, |
| struct qnode __percpu *qnodes) |
| { |
| int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; |
| int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; |
| |
| return per_cpu_ptr(&qnodes[idx].mcs, cpu); |
| } |
| |
| static inline __pure |
| struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx) |
| { |
| return &((struct qnode *)base + idx)->mcs; |
| } |
| |
| #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) |
| |
| #if _Q_PENDING_BITS == 8 |
| /** |
| * clear_pending - clear the pending bit. |
| * @lock: Pointer to queued spinlock structure |
| * |
| * *,1,* -> *,0,* |
| */ |
| static __always_inline void clear_pending(struct qspinlock *lock) |
| { |
| WRITE_ONCE(lock->pending, 0); |
| } |
| |
| /** |
| * clear_pending_set_locked - take ownership and clear the pending bit. |
| * @lock: Pointer to queued spinlock structure |
| * |
| * *,1,0 -> *,0,1 |
| * |
| * Lock stealing is not allowed if this function is used. |
| */ |
| static __always_inline void clear_pending_set_locked(struct qspinlock *lock) |
| { |
| WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); |
| } |
| |
| /* |
| * xchg_tail - Put in the new queue tail code word & retrieve previous one |
| * @lock : Pointer to queued spinlock structure |
| * @tail : The new queue tail code word |
| * Return: The previous queue tail code word |
| * |
| * xchg(lock, tail), which heads an address dependency |
| * |
| * p,*,* -> n,*,* ; prev = xchg(lock, node) |
| */ |
| static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) |
| { |
| /* |
| * We can use relaxed semantics since the caller ensures that the |
| * MCS node is properly initialized before updating the tail. |
| */ |
| return (u32)xchg_relaxed(&lock->tail, |
| tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; |
| } |
| |
| #else /* _Q_PENDING_BITS == 8 */ |
| |
| /** |
| * clear_pending - clear the pending bit. |
| * @lock: Pointer to queued spinlock structure |
| * |
| * *,1,* -> *,0,* |
| */ |
| static __always_inline void clear_pending(struct qspinlock *lock) |
| { |
| atomic_andnot(_Q_PENDING_VAL, &lock->val); |
| } |
| |
| /** |
| * clear_pending_set_locked - take ownership and clear the pending bit. |
| * @lock: Pointer to queued spinlock structure |
| * |
| * *,1,0 -> *,0,1 |
| */ |
| static __always_inline void clear_pending_set_locked(struct qspinlock *lock) |
| { |
| atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); |
| } |
| |
| /** |
| * xchg_tail - Put in the new queue tail code word & retrieve previous one |
| * @lock : Pointer to queued spinlock structure |
| * @tail : The new queue tail code word |
| * Return: The previous queue tail code word |
| * |
| * xchg(lock, tail) |
| * |
| * p,*,* -> n,*,* ; prev = xchg(lock, node) |
| */ |
| static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) |
| { |
| u32 old, new; |
| |
| old = atomic_read(&lock->val); |
| do { |
| new = (old & _Q_LOCKED_PENDING_MASK) | tail; |
| /* |
| * We can use relaxed semantics since the caller ensures that |
| * the MCS node is properly initialized before updating the |
| * tail. |
| */ |
| } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new)); |
| |
| return old; |
| } |
| #endif /* _Q_PENDING_BITS == 8 */ |
| |
| /** |
| * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending |
| * @lock : Pointer to queued spinlock structure |
| * Return: The previous lock value |
| * |
| * *,*,* -> *,1,* |
| */ |
| #ifndef queued_fetch_set_pending_acquire |
| static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) |
| { |
| return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); |
| } |
| #endif |
| |
| /** |
| * set_locked - Set the lock bit and own the lock |
| * @lock: Pointer to queued spinlock structure |
| * |
| * *,*,0 -> *,0,1 |
| */ |
| static __always_inline void set_locked(struct qspinlock *lock) |
| { |
| WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); |
| } |
| |
| #endif /* __LINUX_QSPINLOCK_H */ |