include/linux/rseq_entry.h - linux/kernel/git/tiwai/sound - Git at Google

 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_RSEQ_ENTRY_H
 #define _LINUX_RSEQ_ENTRY_H

 /* Must be outside the CONFIG_RSEQ guard to resolve the stubs */
 #ifdef CONFIG_RSEQ_STATS
 #include <linux/percpu.h>

 struct rseq_stats {
 	unsigned long	exit;
 	unsigned long	signal;
 	unsigned long	slowpath;
 	unsigned long	fastpath;
 	unsigned long	ids;
 	unsigned long	cs;
 	unsigned long	clear;
 	unsigned long	fixup;
 	unsigned long	s_granted;
 	unsigned long	s_expired;
 	unsigned long	s_revoked;
 	unsigned long	s_yielded;
 	unsigned long	s_aborted;
 };

 DECLARE_PER_CPU(struct rseq_stats, rseq_stats);

 /*
  * Slow path has interrupts and preemption enabled, but the fast path
  * runs with interrupts disabled so there is no point in having the
  * preemption checks implied in __this_cpu_inc() for every operation.
  */
 #ifdef RSEQ_BUILD_SLOW_PATH
 #define rseq_stat_inc(which)	this_cpu_inc((which))
 #else
 #define rseq_stat_inc(which)	raw_cpu_inc((which))
 #endif

 #else /* CONFIG_RSEQ_STATS */
 #define rseq_stat_inc(x)	do { } while (0)
 #endif /* !CONFIG_RSEQ_STATS */

 #ifdef CONFIG_RSEQ
 #include <linux/jump_label.h>
 #include <linux/rseq.h>
 #include <linux/sched/signal.h>
 #include <linux/uaccess.h>

 #include <linux/tracepoint-defs.h>

 #ifdef CONFIG_TRACEPOINTS
 DECLARE_TRACEPOINT(rseq_update);
 DECLARE_TRACEPOINT(rseq_ip_fixup);
 void __rseq_trace_update(struct task_struct *t);
 void __rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
 			   unsigned long offset, unsigned long abort_ip);

 static inline void rseq_trace_update(struct task_struct *t, struct rseq_ids *ids)
 {
 	if (tracepoint_enabled(rseq_update) && ids)
 		__rseq_trace_update(t);
 }

 static inline void rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
 				       unsigned long offset, unsigned long abort_ip)
 {
 	if (tracepoint_enabled(rseq_ip_fixup))
 		__rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
 }

 #else /* CONFIG_TRACEPOINT */
 static inline void rseq_trace_update(struct task_struct *t, struct rseq_ids *ids) { }
 static inline void rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
 				       unsigned long offset, unsigned long abort_ip) { }
 #endif /* !CONFIG_TRACEPOINT */

 DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);

 #ifdef RSEQ_BUILD_SLOW_PATH
 #define rseq_inline
 #else
 #define rseq_inline __always_inline
 #endif

 #ifdef CONFIG_RSEQ_SLICE_EXTENSION
 DECLARE_STATIC_KEY_TRUE(rseq_slice_extension_key);

 static __always_inline bool rseq_slice_extension_enabled(void)
 {
 	return static_branch_likely(&rseq_slice_extension_key);
 }

 extern unsigned int rseq_slice_ext_nsecs;
 bool __rseq_arm_slice_extension_timer(void);

 static __always_inline bool rseq_arm_slice_extension_timer(void)
 {
 	if (!rseq_slice_extension_enabled())
 		return false;

 	if (likely(!current->rseq.slice.state.granted))
 		return false;

 	return __rseq_arm_slice_extension_timer();
 }

 static __always_inline void rseq_slice_clear_grant(struct task_struct *t)
 {
 	if (IS_ENABLED(CONFIG_RSEQ_STATS) && t->rseq.slice.state.granted)
 		rseq_stat_inc(rseq_stats.s_revoked);
 	t->rseq.slice.state.granted = false;
 }

 static __always_inline bool rseq_grant_slice_extension(bool work_pending)
 {
 	struct task_struct *curr = current;
 	struct rseq_slice_ctrl usr_ctrl;
 	union rseq_slice_state state;
 	struct rseq __user *rseq;

 	if (!rseq_slice_extension_enabled())
 		return false;

 	/* If not enabled or not a return from interrupt, nothing to do. */
 	state = curr->rseq.slice.state;
 	state.enabled &= curr->rseq.event.user_irq;
 	if (likely(!state.state))
 		return false;

 	rseq = curr->rseq.usrptr;
 	scoped_user_rw_access(rseq, efault) {

 		/*
 		 * Quick check conditions where a grant is not possible or
 		 * needs to be revoked.
 		 *
 		 *  1) Any TIF bit which needs to do extra work aside of
 		 *     rescheduling prevents a grant.
 		 *
 		 *  2) A previous rescheduling request resulted in a slice
 		 *     extension grant.
 		 */
 		if (unlikely(work_pending || state.granted)) {
 			/* Clear user control unconditionally. No point for checking */
 			unsafe_put_user(0U, &rseq->slice_ctrl.all, efault);
 			rseq_slice_clear_grant(curr);
 			return false;
 		}

 		unsafe_get_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault);
 		if (likely(!(usr_ctrl.request)))
 			return false;

 		/* Grant the slice extention */
 		usr_ctrl.request = 0;
 		usr_ctrl.granted = 1;
 		unsafe_put_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault);
 	}

 	rseq_stat_inc(rseq_stats.s_granted);

 	curr->rseq.slice.state.granted = true;
 	/* Store expiry time for arming the timer on the way out */
 	curr->rseq.slice.expires = data_race(rseq_slice_ext_nsecs) + ktime_get_mono_fast_ns();
 	/*
 	 * This is racy against a remote CPU setting TIF_NEED_RESCHED in
 	 * several ways:
 	 *
 	 * 1)
 	 *	CPU0			CPU1
 	 *	clear_tsk()
 	 *				set_tsk()
 	 *	clear_preempt()
 	 *				Raise scheduler IPI on CPU0
 	 *	--> IPI
 	 *	    fold_need_resched() -> Folds correctly
 	 * 2)
 	 *	CPU0			CPU1
 	 *				set_tsk()
 	 *	clear_tsk()
 	 *	clear_preempt()
 	 *				Raise scheduler IPI on CPU0
 	 *	--> IPI
 	 *	    fold_need_resched() <- NOOP as TIF_NEED_RESCHED is false
 	 *
 	 * #1 is not any different from a regular remote reschedule as it
 	 *    sets the previously not set bit and then raises the IPI which
 	 *    folds it into the preempt counter
 	 *
 	 * #2 is obviously incorrect from a scheduler POV, but it's not
 	 *    differently incorrect than the code below clearing the
 	 *    reschedule request with the safety net of the timer.
 	 *
 	 * The important part is that the clearing is protected against the
 	 * scheduler IPI and also against any other interrupt which might
 	 * end up waking up a task and setting the bits in the middle of
 	 * the operation:
 	 *
 	 *	clear_tsk()
 	 *	---> Interrupt
 	 *		wakeup_on_this_cpu()
 	 *		set_tsk()
 	 *		set_preempt()
 	 *	clear_preempt()
 	 *
 	 * which would be inconsistent state.
 	 */
 	scoped_guard(irq) {
 		clear_tsk_need_resched(curr);
 		clear_preempt_need_resched();
 	}
 	return true;

 efault:
 	force_sig(SIGSEGV);
 	return false;
 }

 #else /* CONFIG_RSEQ_SLICE_EXTENSION */
 static __always_inline bool rseq_slice_extension_enabled(void) { return false; }
 static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; }
 static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { }
 static __always_inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
 #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */

 bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
 bool rseq_debug_validate_ids(struct task_struct *t);

 static __always_inline void rseq_note_user_irq_entry(void)
 {
 	if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
 		current->rseq.event.user_irq = true;
 }

 /*
  * Check whether there is a valid critical section and whether the
  * instruction pointer in @regs is inside the critical section.
  *
  *  - If the critical section is invalid, terminate the task.
  *
  *  - If valid and the instruction pointer is inside, set it to the abort IP.
  *
  *  - If valid and the instruction pointer is outside, clear the critical
  *    section address.
  *
  * Returns true, if the section was valid and either fixup or clear was
  * done, false otherwise.
  *
  * In the failure case task::rseq_event::fatal is set when a invalid
  * section was found. It's clear when the failure was an unresolved page
  * fault.
  *
  * If inlined into the exit to user path with interrupts disabled, the
  * caller has to protect against page faults with pagefault_disable().
  *
  * In preemptible task context this would be counterproductive as the page
  * faults could not be fully resolved. As a consequence unresolved page
  * faults in task context are fatal too.
  */

 #ifdef RSEQ_BUILD_SLOW_PATH
 /*
  * The debug version is put out of line, but kept here so the code stays
  * together.
  *
  * @csaddr has already been checked by the caller to be in user space
  */
 bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs,
 			       unsigned long csaddr)
 {
 	struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
 	u64 start_ip, abort_ip, offset, cs_end, head, tasksize = TASK_SIZE;
 	unsigned long ip = instruction_pointer(regs);
 	u64 __user *uc_head = (u64 __user *) ucs;
 	u32 usig, __user *uc_sig;

 	scoped_user_rw_access(ucs, efault) {
 		/*
 		 * Evaluate the user pile and exit if one of the conditions
 		 * is not fulfilled.
 		 */
 		unsafe_get_user(start_ip, &ucs->start_ip, efault);
 		if (unlikely(start_ip >= tasksize))
 			goto die;
 		/* If outside, just clear the critical section. */
 		if (ip < start_ip)
 			goto clear;

 		unsafe_get_user(offset, &ucs->post_commit_offset, efault);
 		cs_end = start_ip + offset;
 		/* Check for overflow and wraparound */
 		if (unlikely(cs_end >= tasksize || cs_end < start_ip))
 			goto die;

 		/* If not inside, clear it. */
 		if (ip >= cs_end)
 			goto clear;

 		unsafe_get_user(abort_ip, &ucs->abort_ip, efault);
 		/* Ensure it's "valid" */
 		if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
 			goto die;
 		/* Validate that the abort IP is not in the critical section */
 		if (unlikely(abort_ip - start_ip < offset))
 			goto die;

 		/*
 		 * Check version and flags for 0. No point in emitting
 		 * deprecated warnings before dying. That could be done in
 		 * the slow path eventually, but *shrug*.
 		 */
 		unsafe_get_user(head, uc_head, efault);
 		if (unlikely(head))
 			goto die;

 		/* abort_ip - 4 is >= 0. See abort_ip check above */
 		uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
 		unsafe_get_user(usig, uc_sig, efault);
 		if (unlikely(usig != t->rseq.sig))
 			goto die;

 		/* rseq_event.user_irq is only valid if CONFIG_GENERIC_IRQ_ENTRY=y */
 		if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
 			/* If not in interrupt from user context, let it die */
 			if (unlikely(!t->rseq.event.user_irq))
 				goto die;
 		}
 		unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
 		instruction_pointer_set(regs, (unsigned long)abort_ip);
 		rseq_stat_inc(rseq_stats.fixup);
 		break;
 	clear:
 		unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
 		rseq_stat_inc(rseq_stats.clear);
 		abort_ip = 0ULL;
 	}

 	if (unlikely(abort_ip))
 		rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
 	return true;
 die:
 	t->rseq.event.fatal = true;
 efault:
 	return false;
 }

 /*
  * On debug kernels validate that user space did not mess with it if the
  * debug branch is enabled.
  */
 bool rseq_debug_validate_ids(struct task_struct *t)
 {
 	struct rseq __user *rseq = t->rseq.usrptr;
 	u32 cpu_id, uval, node_id;

 	/*
 	 * On the first exit after registering the rseq region CPU ID is
 	 * RSEQ_CPU_ID_UNINITIALIZED and node_id in user space is 0!
 	 */
 	node_id = t->rseq.ids.cpu_id != RSEQ_CPU_ID_UNINITIALIZED ?
 		  cpu_to_node(t->rseq.ids.cpu_id) : 0;

 	scoped_user_read_access(rseq, efault) {
 		unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault);
 		if (cpu_id != t->rseq.ids.cpu_id)
 			goto die;
 		unsafe_get_user(uval, &rseq->cpu_id, efault);
 		if (uval != cpu_id)
 			goto die;
 		unsafe_get_user(uval, &rseq->node_id, efault);
 		if (uval != node_id)
 			goto die;
 		unsafe_get_user(uval, &rseq->mm_cid, efault);
 		if (uval != t->rseq.ids.mm_cid)
 			goto die;
 	}
 	return true;
 die:
 	t->rseq.event.fatal = true;
 efault:
 	return false;
 }

 #endif /* RSEQ_BUILD_SLOW_PATH */

 /*
  * This only ensures that abort_ip is in the user address space and
  * validates that it is preceded by the signature.
  *
  * No other sanity checks are done here, that's what the debug code is for.
  */
 static rseq_inline bool
 rseq_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr)
 {
 	struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
 	unsigned long ip = instruction_pointer(regs);
 	unsigned long tasksize = TASK_SIZE;
 	u64 start_ip, abort_ip, offset;
 	u32 usig, __user *uc_sig;

 	rseq_stat_inc(rseq_stats.cs);

 	if (unlikely(csaddr >= tasksize)) {
 		t->rseq.event.fatal = true;
 		return false;
 	}

 	if (static_branch_unlikely(&rseq_debug_enabled))
 		return rseq_debug_update_user_cs(t, regs, csaddr);

 	scoped_user_rw_access(ucs, efault) {
 		unsafe_get_user(start_ip, &ucs->start_ip, efault);
 		unsafe_get_user(offset, &ucs->post_commit_offset, efault);
 		unsafe_get_user(abort_ip, &ucs->abort_ip, efault);

 		/*
 		 * No sanity checks. If user space screwed it up, it can
 		 * keep the pieces. That's what debug code is for.
 		 *
 		 * If outside, just clear the critical section.
 		 */
 		if (ip - start_ip >= offset)
 			goto clear;

 		/*
 		 * Two requirements for @abort_ip:
 		 *   - Must be in user space as x86 IRET would happily return to
 		 *     the kernel.
 		 *   - The four bytes preceding the instruction at @abort_ip must
 		 *     contain the signature.
 		 *
 		 * The latter protects against the following attack vector:
 		 *
 		 * An attacker with limited abilities to write, creates a critical
 		 * section descriptor, sets the abort IP to a library function or
 		 * some other ROP gadget and stores the address of the descriptor
 		 * in TLS::rseq::rseq_cs. An RSEQ abort would then evade ROP
 		 * protection.
 		 */
 		if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
 			goto die;

 		/* The address is guaranteed to be >= 0 and < TASK_SIZE */
 		uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
 		unsafe_get_user(usig, uc_sig, efault);
 		if (unlikely(usig != t->rseq.sig))
 			goto die;

 		/* Invalidate the critical section */
 		unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
 		/* Update the instruction pointer */
 		instruction_pointer_set(regs, (unsigned long)abort_ip);
 		rseq_stat_inc(rseq_stats.fixup);
 		break;
 	clear:
 		unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
 		rseq_stat_inc(rseq_stats.clear);
 		abort_ip = 0ULL;
 	}

 	if (unlikely(abort_ip))
 		rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
 	return true;
 die:
 	t->rseq.event.fatal = true;
 efault:
 	return false;
 }

 /*
  * Updates CPU ID, Node ID and MM CID and reads the critical section
  * address, when @csaddr != NULL. This allows to put the ID update and the
  * read under the same uaccess region to spare a separate begin/end.
  *
  * As this is either invoked from a C wrapper with @csaddr = NULL or from
  * the fast path code with a valid pointer, a clever compiler should be
  * able to optimize the read out. Spares a duplicate implementation.
  *
  * Returns true, if the operation was successful, false otherwise.
  *
  * In the failure case task::rseq_event::fatal is set when invalid data
  * was found on debug kernels. It's clear when the failure was an unresolved page
  * fault.
  *
  * If inlined into the exit to user path with interrupts disabled, the
  * caller has to protect against page faults with pagefault_disable().
  *
  * In preemptible task context this would be counterproductive as the page
  * faults could not be fully resolved. As a consequence unresolved page
  * faults in task context are fatal too.
  */
 static rseq_inline
 bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids,
 			     u32 node_id, u64 *csaddr)
 {
 	struct rseq __user *rseq = t->rseq.usrptr;

 	if (static_branch_unlikely(&rseq_debug_enabled)) {
 		if (!rseq_debug_validate_ids(t))
 			return false;
 	}

 	scoped_user_rw_access(rseq, efault) {
 		unsafe_put_user(ids->cpu_id, &rseq->cpu_id_start, efault);
 		unsafe_put_user(ids->cpu_id, &rseq->cpu_id, efault);
 		unsafe_put_user(node_id, &rseq->node_id, efault);
 		unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault);
 		if (csaddr)
 			unsafe_get_user(*csaddr, &rseq->rseq_cs, efault);

 		/* Open coded, so it's in the same user access region */
 		if (rseq_slice_extension_enabled()) {
 			/* Unconditionally clear it, no point in conditionals */
 			unsafe_put_user(0U, &rseq->slice_ctrl.all, efault);
 		}
 	}

 	rseq_slice_clear_grant(t);
 	/* Cache the new values */
 	t->rseq.ids.cpu_cid = ids->cpu_cid;
 	rseq_stat_inc(rseq_stats.ids);
 	rseq_trace_update(t, ids);
 	return true;
 efault:
 	return false;
 }

 /*
  * Update user space with new IDs and conditionally check whether the task
  * is in a critical section.
  */
 static rseq_inline bool rseq_update_usr(struct task_struct *t, struct pt_regs *regs,
 					struct rseq_ids *ids, u32 node_id)
 {
 	u64 csaddr;

 	if (!rseq_set_ids_get_csaddr(t, ids, node_id, &csaddr))
 		return false;

 	/*
 	 * On architectures which utilize the generic entry code this
 	 * allows to skip the critical section when the entry was not from
 	 * a user space interrupt, unless debug mode is enabled.
 	 */
 	if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
 		if (!static_branch_unlikely(&rseq_debug_enabled)) {
 			if (likely(!t->rseq.event.user_irq))
 				return true;
 		}
 	}
 	if (likely(!csaddr))
 		return true;
 	/* Sigh, this really needs to do work */
 	return rseq_update_user_cs(t, regs, csaddr);
 }

 /*
  * If you want to use this then convert your architecture to the generic
  * entry code. I'm tired of building workarounds for people who can't be
  * bothered to make the maintenance of generic infrastructure less
  * burdensome. Just sucking everything into the architecture code and
  * thereby making others chase the horrible hacks and keep them working is
  * neither acceptable nor sustainable.
  */
 #ifdef CONFIG_GENERIC_ENTRY

 /*
  * This is inlined into the exit path because:
  *
  * 1) It's a one time comparison in the fast path when there is no event to
  *    handle
  *
  * 2) The access to the user space rseq memory (TLS) is unlikely to fault
  *    so the straight inline operation is:
  *
  *	- Four 32-bit stores only if CPU ID/ MM CID need to be updated
  *	- One 64-bit load to retrieve the critical section address
  *
  * 3) In the unlikely case that the critical section address is != NULL:
  *
  *     - One 64-bit load to retrieve the start IP
  *     - One 64-bit load to retrieve the offset for calculating the end
  *     - One 64-bit load to retrieve the abort IP
  *     - One 64-bit load to retrieve the signature
  *     - One store to clear the critical section address
  *
  * The non-debug case implements only the minimal required checking. It
  * provides protection against a rogue abort IP in kernel space, which
  * would be exploitable at least on x86, and also against a rogue CS
  * descriptor by checking the signature at the abort IP. Any fallout from
  * invalid critical section descriptors is a user space problem. The debug
  * case provides the full set of checks and terminates the task if a
  * condition is not met.
  *
  * In case of a fault or an invalid value, this sets TIF_NOTIFY_RESUME and
  * tells the caller to loop back into exit_to_user_mode_loop(). The rseq
  * slow path there will handle the failure.
  */
 static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct task_struct *t)
 {
 	/*
 	 * Page faults need to be disabled as this is called with
 	 * interrupts disabled
 	 */
 	guard(pagefault)();
 	if (likely(!t->rseq.event.ids_changed)) {
 		struct rseq __user *rseq = t->rseq.usrptr;
 		/*
 		 * If IDs have not changed rseq_event::user_irq must be true
 		 * See rseq_sched_switch_event().
 		 */
 		u64 csaddr;

 		scoped_user_rw_access(rseq, efault) {
 			unsafe_get_user(csaddr, &rseq->rseq_cs, efault);

 			/* Open coded, so it's in the same user access region */
 			if (rseq_slice_extension_enabled()) {
 				/* Unconditionally clear it, no point in conditionals */
 				unsafe_put_user(0U, &rseq->slice_ctrl.all, efault);
 			}
 		}

 		rseq_slice_clear_grant(t);

 		if (static_branch_unlikely(&rseq_debug_enabled) || unlikely(csaddr)) {
 			if (unlikely(!rseq_update_user_cs(t, regs, csaddr)))
 				return false;
 		}
 		return true;
 	}

 	struct rseq_ids ids = {
 		.cpu_id = task_cpu(t),
 		.mm_cid = task_mm_cid(t),
 	};
 	u32 node_id = cpu_to_node(ids.cpu_id);

 	return rseq_update_usr(t, regs, &ids, node_id);
 efault:
 	return false;
 }

 static __always_inline bool __rseq_exit_to_user_mode_restart(struct pt_regs *regs)
 {
 	struct task_struct *t = current;

 	/*
 	 * If the task did not go through schedule or got the flag enforced
 	 * by the rseq syscall or execve, then nothing to do here.
 	 *
 	 * CPU ID and MM CID can only change when going through a context
 	 * switch.
 	 *
 	 * rseq_sched_switch_event() sets the rseq_event::sched_switch bit
 	 * only when rseq_event::has_rseq is true. That conditional is
 	 * required to avoid setting the TIF bit if RSEQ is not registered
 	 * for a task. rseq_event::sched_switch is cleared when RSEQ is
 	 * unregistered by a task so it's sufficient to check for the
 	 * sched_switch bit alone.
 	 *
 	 * A sane compiler requires three instructions for the nothing to do
 	 * case including clearing the events, but your mileage might vary.
 	 */
 	if (unlikely((t->rseq.event.sched_switch))) {
 		rseq_stat_inc(rseq_stats.fastpath);

 		if (unlikely(!rseq_exit_user_update(regs, t)))
 			return true;
 	}
 	/* Clear state so next entry starts from a clean slate */
 	t->rseq.event.events = 0;
 	return false;
 }

 /* Required to allow conversion to GENERIC_ENTRY w/o GENERIC_TIF_BITS */
 #ifdef CONFIG_HAVE_GENERIC_TIF_BITS
 static __always_inline bool test_tif_rseq(unsigned long ti_work)
 {
 	return ti_work & _TIF_RSEQ;
 }

 static __always_inline void clear_tif_rseq(void)
 {
 	static_assert(TIF_RSEQ != TIF_NOTIFY_RESUME);
 	clear_thread_flag(TIF_RSEQ);
 }
 #else
 static __always_inline bool test_tif_rseq(unsigned long ti_work) { return true; }
 static __always_inline void clear_tif_rseq(void) { }
 #endif

 static __always_inline bool
 rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
 {
 	if (unlikely(test_tif_rseq(ti_work))) {
 		if (unlikely(__rseq_exit_to_user_mode_restart(regs))) {
 			current->rseq.event.slowpath = true;
 			set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
 			return true;
 		}
 		clear_tif_rseq();
 	}
 	/*
 	 * Arm the slice extension timer if nothing to do anymore and the
 	 * task really goes out to user space.
 	 */
 	return rseq_arm_slice_extension_timer();
 }

 #else /* CONFIG_GENERIC_ENTRY */
 static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
 {
 	return false;
 }
 #endif /* !CONFIG_GENERIC_ENTRY */

 static __always_inline void rseq_syscall_exit_to_user_mode(void)
 {
 	struct rseq_event *ev = &current->rseq.event;

 	rseq_stat_inc(rseq_stats.exit);

 	/* Needed to remove the store for the !lockdep case */
 	if (IS_ENABLED(CONFIG_LOCKDEP)) {
 		WARN_ON_ONCE(ev->sched_switch);
 		ev->events = 0;
 	}
 }

 static __always_inline void rseq_irqentry_exit_to_user_mode(void)
 {
 	struct rseq_event *ev = &current->rseq.event;

 	rseq_stat_inc(rseq_stats.exit);

 	lockdep_assert_once(!ev->sched_switch);

 	/*
 	 * Ensure that event (especially user_irq) is cleared when the
 	 * interrupt did not result in a schedule and therefore the
 	 * rseq processing could not clear it.
 	 */
 	ev->events = 0;
 }

 /* Required to keep ARM64 working */
 static __always_inline void rseq_exit_to_user_mode_legacy(void)
 {
 	struct rseq_event *ev = &current->rseq.event;

 	rseq_stat_inc(rseq_stats.exit);

 	if (static_branch_unlikely(&rseq_debug_enabled))
 		WARN_ON_ONCE(ev->sched_switch);

 	/*
 	 * Ensure that event (especially user_irq) is cleared when the
 	 * interrupt did not result in a schedule and therefore the
 	 * rseq processing did not clear it.
 	 */
 	ev->events = 0;
 }

 void __rseq_debug_syscall_return(struct pt_regs *regs);

 static __always_inline void rseq_debug_syscall_return(struct pt_regs *regs)
 {
 	if (static_branch_unlikely(&rseq_debug_enabled))
 		__rseq_debug_syscall_return(regs);
 }
 #else /* CONFIG_RSEQ */
 static inline void rseq_note_user_irq_entry(void) { }
 static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
 {
 	return false;
 }
 static inline void rseq_syscall_exit_to_user_mode(void) { }
 static inline void rseq_irqentry_exit_to_user_mode(void) { }
 static inline void rseq_exit_to_user_mode_legacy(void) { }
 static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
 static inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
 #endif /* !CONFIG_RSEQ */

 #endif /* _LINUX_RSEQ_ENTRY_H */