rcu: New rcu_user_enter() and rcu_user_exit() APIs
RCU currently insists that only idle tasks can enter RCU idle mode, which
prohibits an adaptive tickless kernel (AKA nohz cpusets), which in turn
would mean that usermode execution would always take scheduling-clock
interrupts, even when there is only one task runnable on the CPU in
question.
This commit therefore adds rcu_user_enter() and rcu_user_exit(), which
allow non-idle tasks to enter RCU idle mode. These are quite similar
to rcu_idle_enter() and rcu_idle_exit(), respectively, except that they
omit the idle-task checks.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9fae8db..61fd80d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -191,6 +191,8 @@
extern void rcu_idle_exit(void);
extern void rcu_irq_enter(void);
extern void rcu_irq_exit(void);
+extern void rcu_user_enter(void);
+extern void rcu_user_exit(void);
extern void exit_rcu(void);
/**
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 25874a3..7dacd33 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -343,6 +343,29 @@
return 0;
}
+static void rcu_check_idle_entry(void)
+{
+ struct task_struct *idle;
+ struct rcu_dynticks *rdtp;
+ unsigned long flags;
+
+ if (is_idle_task(current))
+ return;
+
+ local_irq_save(flags);
+
+ rdtp = &__get_cpu_var(rcu_dynticks);
+ idle = idle_task(smp_processor_id());
+
+ trace_rcu_dyntick("Error on entry: not idle task", rdtp->dynticks_nesting, 0);
+ ftrace_dump(DUMP_ORIG);
+ WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
+ current->pid, current->comm,
+ idle->pid, idle->comm); /* must be idle task! */
+
+ local_irq_restore(flags);
+}
+
/*
* rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
*
@@ -353,15 +376,6 @@
static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
{
trace_rcu_dyntick("Start", oldval, 0);
- if (!is_idle_task(current)) {
- struct task_struct *idle = idle_task(smp_processor_id());
-
- trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
- ftrace_dump(DUMP_ALL);
- WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
- current->pid, current->comm,
- idle->pid, idle->comm); /* must be idle task! */
- }
rcu_prepare_for_idle(smp_processor_id());
/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
smp_mb__before_atomic_inc(); /* See above. */
@@ -382,7 +396,7 @@
}
/**
- * rcu_idle_enter - inform RCU that current CPU is entering idle
+ * __rcu_idle_enter - inform RCU that current CPU is entering RCU idle mode
*
* Enter idle mode, in other words, -leave- the mode in which RCU
* read-side critical sections can occur. (Though RCU read-side
@@ -393,7 +407,7 @@
* the possibility of usermode upcalls having messed up our count
* of interrupt nesting level during the prior busy period.
*/
-void rcu_idle_enter(void)
+static void __rcu_idle_enter(void)
{
unsigned long flags;
long long oldval;
@@ -410,9 +424,37 @@
rcu_idle_enter_common(rdtp, oldval);
local_irq_restore(flags);
}
+
+/**
+ * rcu_idle_enter - inform RCU that idle task is entering RCU idle mode
+ *
+ * Enter idle mode from the idle task before we put the CPU into
+ * low power mode. No use of RCU is permitted between this call and
+ * rcu_idle_exit(). This way the CPU doesn't need to keep the
+ * timer tick to report quiescent states, which is desired for energy
+ * savings.
+ */
+void rcu_idle_enter(void)
+{
+ rcu_check_idle_entry();
+ __rcu_idle_enter();
+}
EXPORT_SYMBOL_GPL(rcu_idle_enter);
/**
+ * rcu_user_enter - inform RCU that we are resuming userspace.
+ *
+ * Enter RCU idle mode right before resuming userspace. No use of RCU
+ * is permitted between this call and rcu_user_exit(). This way the
+ * CPU doesn't need to maintain the tick for RCU maintainance purpose
+ * when the CPU runs in userspace.
+ */
+void rcu_user_enter(void)
+{
+ __rcu_idle_enter();
+}
+
+/**
* rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
*
* Exit from an interrupt handler, which might possibly result in entering
@@ -446,6 +488,29 @@
local_irq_restore(flags);
}
+static void rcu_check_idle_exit(long long oldval)
+{
+ struct task_struct *idle;
+ struct rcu_dynticks *rdtp;
+ unsigned long flags;
+
+ if (is_idle_task(current))
+ return;
+
+ local_irq_save(flags);
+
+ idle = idle_task(smp_processor_id());
+ rdtp = &__get_cpu_var(rcu_dynticks);
+ trace_rcu_dyntick("Error on exit: not idle task",
+ oldval, rdtp->dynticks_nesting);
+ ftrace_dump(DUMP_ORIG);
+ WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
+ current->pid, current->comm,
+ idle->pid, idle->comm); /* must be idle task! */
+
+ local_irq_restore(flags);
+}
+
/*
* rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
*
@@ -462,20 +527,10 @@
WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
rcu_cleanup_after_idle(smp_processor_id());
trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
- if (!is_idle_task(current)) {
- struct task_struct *idle = idle_task(smp_processor_id());
-
- trace_rcu_dyntick("Error on exit: not idle task",
- oldval, rdtp->dynticks_nesting);
- ftrace_dump(DUMP_ALL);
- WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
- current->pid, current->comm,
- idle->pid, idle->comm); /* must be idle task! */
- }
}
/**
- * rcu_idle_exit - inform RCU that current CPU is leaving idle
+ * __rcu_idle_exit - inform RCU that current CPU is leaving RCU idle mode
*
* Exit idle mode, in other words, -enter- the mode in which RCU
* read-side critical sections can occur.
@@ -485,7 +540,7 @@
* of interrupt nesting level during the busy period that is just
* now starting.
*/
-void rcu_idle_exit(void)
+static long long __rcu_idle_exit(void)
{
unsigned long flags;
struct rcu_dynticks *rdtp;
@@ -501,10 +556,35 @@
rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
rcu_idle_exit_common(rdtp, oldval);
local_irq_restore(flags);
+ return oldval;
}
EXPORT_SYMBOL_GPL(rcu_idle_exit);
/**
+ * rcu_idle_exit - inform RCU that idle task is leaving RCU idle mode
+ *
+ * Exit idle mode from the idle task after we wake the CPU up from
+ * low power mode. The CPU can make use of RCU read side critical
+ * sections again after this call.
+ */
+void rcu_idle_exit(void)
+{
+ long long oldval = __rcu_idle_exit();
+ rcu_check_idle_exit(oldval);
+}
+
+/**
+ * rcu_user_exit - inform RCU that we are exiting userspace.
+ *
+ * Exit RCU idle mode while entering the kernel because it can
+ * run an RCU read side critical section anytime.
+ */
+void rcu_user_exit(void)
+{
+ __rcu_idle_exit();
+}
+
+/**
* rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
*
* Enter an interrupt handler, which might possibly result in exiting