blob: e18a2cc4e46a0291a2c7f70f01153d6e6d28a30a [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2018 Linaro Limited
*
* Author: Daniel Lezcano <daniel.lezcano@linaro.org>
*
* The idle injection framework provides a way to force CPUs to enter idle
* states for a specified fraction of time over a specified period.
*
* It relies on the smpboot kthreads feature providing common code for CPU
* hotplug and thread [un]parking.
*
* All of the kthreads used for idle injection are created at init time.
*
* Next, the users of the idle injection framework provide a cpumask via
* its register function. The kthreads will be synchronized with respect to
* this cpumask.
*
* The idle + run duration is specified via separate helpers and that allows
* idle injection to be started.
*
* The idle injection kthreads will call play_idle_precise() with the idle
* duration and max allowed latency specified as per the above.
*
* After all of them have been woken up, a timer is set to start the next idle
* injection cycle.
*
* The timer interrupt handler will wake up the idle injection kthreads for
* all of the CPUs in the cpumask provided by the user.
*
* Idle injection is stopped synchronously and no leftover idle injection
* kthread activity after its completion is guaranteed.
*
* It is up to the user of this framework to provide a lock for higher-level
* synchronization to prevent race conditions like starting idle injection
* while unregistering from the framework.
*/
#define pr_fmt(fmt) "ii_dev: " fmt
#include <linux/cpu.h>
#include <linux/hrtimer.h>
#include <linux/kthread.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/smpboot.h>
#include <linux/idle_inject.h>
#include <uapi/linux/sched/types.h>
/**
* struct idle_inject_thread - task on/off switch structure
* @tsk: task injecting the idle cycles
* @should_run: whether or not to run the task (for the smpboot kthread API)
*/
struct idle_inject_thread {
struct task_struct *tsk;
int should_run;
};
/**
* struct idle_inject_device - idle injection data
* @timer: idle injection period timer
* @idle_duration_us: duration of CPU idle time to inject
* @run_duration_us: duration of CPU run time to allow
* @latency_us: max allowed latency
* @update: Optional callback deciding whether or not to skip idle
* injection in the given cycle.
* @cpumask: mask of CPUs affected by idle injection
*
* This structure is used to define per instance idle inject device data. Each
* instance has an idle duration, a run duration and mask of CPUs to inject
* idle.
*
* Actual CPU idle time is injected by calling kernel scheduler interface
* play_idle_precise(). There is one optional callback that can be registered
* by calling idle_inject_register_full():
*
* update() - This callback is invoked just before waking up CPUs to inject
* idle. If it returns false, CPUs are not woken up to inject idle in the given
* cycle. It also allows the caller to readjust the idle and run duration by
* calling idle_inject_set_duration() for the next cycle.
*/
struct idle_inject_device {
struct hrtimer timer;
unsigned int idle_duration_us;
unsigned int run_duration_us;
unsigned int latency_us;
bool (*update)(void);
unsigned long cpumask[];
};
static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread);
static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device);
/**
* idle_inject_wakeup - Wake up idle injection threads
* @ii_dev: target idle injection device
*
* Every idle injection task associated with the given idle injection device
* and running on an online CPU will be woken up.
*/
static void idle_inject_wakeup(struct idle_inject_device *ii_dev)
{
struct idle_inject_thread *iit;
unsigned int cpu;
for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) {
iit = per_cpu_ptr(&idle_inject_thread, cpu);
iit->should_run = 1;
wake_up_process(iit->tsk);
}
}
/**
* idle_inject_timer_fn - idle injection timer function
* @timer: idle injection hrtimer
*
* This function is called when the idle injection timer expires. It wakes up
* idle injection tasks associated with the timer and they, in turn, invoke
* play_idle_precise() to inject a specified amount of CPU idle time.
*
* Return: HRTIMER_RESTART.
*/
static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
{
unsigned int duration_us;
struct idle_inject_device *ii_dev =
container_of(timer, struct idle_inject_device, timer);
if (!ii_dev->update || (ii_dev->update && ii_dev->update()))
idle_inject_wakeup(ii_dev);
duration_us = READ_ONCE(ii_dev->run_duration_us);
duration_us += READ_ONCE(ii_dev->idle_duration_us);
hrtimer_forward_now(timer, ns_to_ktime(duration_us * NSEC_PER_USEC));
return HRTIMER_RESTART;
}
/**
* idle_inject_fn - idle injection work function
* @cpu: the CPU owning the task
*
* This function calls play_idle_precise() to inject a specified amount of CPU
* idle time.
*/
static void idle_inject_fn(unsigned int cpu)
{
struct idle_inject_device *ii_dev;
struct idle_inject_thread *iit;
ii_dev = per_cpu(idle_inject_device, cpu);
iit = per_cpu_ptr(&idle_inject_thread, cpu);
/*
* Let the smpboot main loop know that the task should not run again.
*/
iit->should_run = 0;
play_idle_precise(READ_ONCE(ii_dev->idle_duration_us) * NSEC_PER_USEC,
READ_ONCE(ii_dev->latency_us) * NSEC_PER_USEC);
}
/**
* idle_inject_set_duration - idle and run duration update helper
* @ii_dev: idle injection control device structure
* @run_duration_us: CPU run time to allow in microseconds
* @idle_duration_us: CPU idle time to inject in microseconds
*/
void idle_inject_set_duration(struct idle_inject_device *ii_dev,
unsigned int run_duration_us,
unsigned int idle_duration_us)
{
if (run_duration_us + idle_duration_us) {
WRITE_ONCE(ii_dev->run_duration_us, run_duration_us);
WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us);
}
if (!run_duration_us)
pr_debug("CPU is forced to 100 percent idle\n");
}
EXPORT_SYMBOL_NS_GPL(idle_inject_set_duration, IDLE_INJECT);
/**
* idle_inject_get_duration - idle and run duration retrieval helper
* @ii_dev: idle injection control device structure
* @run_duration_us: memory location to store the current CPU run time
* @idle_duration_us: memory location to store the current CPU idle time
*/
void idle_inject_get_duration(struct idle_inject_device *ii_dev,
unsigned int *run_duration_us,
unsigned int *idle_duration_us)
{
*run_duration_us = READ_ONCE(ii_dev->run_duration_us);
*idle_duration_us = READ_ONCE(ii_dev->idle_duration_us);
}
EXPORT_SYMBOL_NS_GPL(idle_inject_get_duration, IDLE_INJECT);
/**
* idle_inject_set_latency - set the maximum latency allowed
* @ii_dev: idle injection control device structure
* @latency_us: set the latency requirement for the idle state
*/
void idle_inject_set_latency(struct idle_inject_device *ii_dev,
unsigned int latency_us)
{
WRITE_ONCE(ii_dev->latency_us, latency_us);
}
EXPORT_SYMBOL_NS_GPL(idle_inject_set_latency, IDLE_INJECT);
/**
* idle_inject_start - start idle injections
* @ii_dev: idle injection control device structure
*
* The function starts idle injection by first waking up all of the idle
* injection kthreads associated with @ii_dev to let them inject CPU idle time
* sets up a timer to start the next idle injection period.
*
* Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success.
*/
int idle_inject_start(struct idle_inject_device *ii_dev)
{
unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us);
unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us);
if (!(idle_duration_us + run_duration_us))
return -EINVAL;
pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n",
cpumask_pr_args(to_cpumask(ii_dev->cpumask)));
idle_inject_wakeup(ii_dev);
hrtimer_start(&ii_dev->timer,
ns_to_ktime((idle_duration_us + run_duration_us) *
NSEC_PER_USEC),
HRTIMER_MODE_REL);
return 0;
}
EXPORT_SYMBOL_NS_GPL(idle_inject_start, IDLE_INJECT);
/**
* idle_inject_stop - stops idle injections
* @ii_dev: idle injection control device structure
*
* The function stops idle injection and waits for the threads to finish work.
* If CPU idle time is being injected when this function runs, then it will
* wait until the end of the cycle.
*
* When it returns, there is no more idle injection kthread activity. The
* kthreads are scheduled out and the periodic timer is off.
*/
void idle_inject_stop(struct idle_inject_device *ii_dev)
{
struct idle_inject_thread *iit;
unsigned int cpu;
pr_debug("Stopping idle injection on CPUs '%*pbl'\n",
cpumask_pr_args(to_cpumask(ii_dev->cpumask)));
hrtimer_cancel(&ii_dev->timer);
/*
* Stopping idle injection requires all of the idle injection kthreads
* associated with the given cpumask to be parked and stay that way, so
* prevent CPUs from going online at this point. Any CPUs going online
* after the loop below will be covered by clearing the should_run flag
* that will cause the smpboot main loop to schedule them out.
*/
cpu_hotplug_disable();
/*
* Iterate over all (online + offline) CPUs here in case one of them
* goes offline with the should_run flag set so as to prevent its idle
* injection kthread from running when the CPU goes online again after
* the ii_dev has been freed.
*/
for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
iit = per_cpu_ptr(&idle_inject_thread, cpu);
iit->should_run = 0;
wait_task_inactive(iit->tsk, TASK_ANY);
}
cpu_hotplug_enable();
}
EXPORT_SYMBOL_NS_GPL(idle_inject_stop, IDLE_INJECT);
/**
* idle_inject_setup - prepare the current task for idle injection
* @cpu: not used
*
* Called once, this function is in charge of setting the current task's
* scheduler parameters to make it an RT task.
*/
static void idle_inject_setup(unsigned int cpu)
{
sched_set_fifo(current);
}
/**
* idle_inject_should_run - function helper for the smpboot API
* @cpu: CPU the kthread is running on
*
* Return: whether or not the thread can run.
*/
static int idle_inject_should_run(unsigned int cpu)
{
struct idle_inject_thread *iit =
per_cpu_ptr(&idle_inject_thread, cpu);
return iit->should_run;
}
/**
* idle_inject_register_full - initialize idle injection on a set of CPUs
* @cpumask: CPUs to be affected by idle injection
* @update: This callback is called just before waking up CPUs to inject
* idle
*
* This function creates an idle injection control device structure for the
* given set of CPUs and initializes the timer associated with it. This
* function also allows to register update()callback.
* It does not start any injection cycles.
*
* Return: NULL if memory allocation fails, idle injection control device
* pointer on success.
*/
struct idle_inject_device *idle_inject_register_full(struct cpumask *cpumask,
bool (*update)(void))
{
struct idle_inject_device *ii_dev;
int cpu, cpu_rb;
ii_dev = kzalloc(sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL);
if (!ii_dev)
return NULL;
cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask);
hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
ii_dev->timer.function = idle_inject_timer_fn;
ii_dev->latency_us = UINT_MAX;
ii_dev->update = update;
for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
if (per_cpu(idle_inject_device, cpu)) {
pr_err("cpu%d is already registered\n", cpu);
goto out_rollback;
}
per_cpu(idle_inject_device, cpu) = ii_dev;
}
return ii_dev;
out_rollback:
for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) {
if (cpu == cpu_rb)
break;
per_cpu(idle_inject_device, cpu_rb) = NULL;
}
kfree(ii_dev);
return NULL;
}
EXPORT_SYMBOL_NS_GPL(idle_inject_register_full, IDLE_INJECT);
/**
* idle_inject_register - initialize idle injection on a set of CPUs
* @cpumask: CPUs to be affected by idle injection
*
* This function creates an idle injection control device structure for the
* given set of CPUs and initializes the timer associated with it. It does not
* start any injection cycles.
*
* Return: NULL if memory allocation fails, idle injection control device
* pointer on success.
*/
struct idle_inject_device *idle_inject_register(struct cpumask *cpumask)
{
return idle_inject_register_full(cpumask, NULL);
}
EXPORT_SYMBOL_NS_GPL(idle_inject_register, IDLE_INJECT);
/**
* idle_inject_unregister - unregister idle injection control device
* @ii_dev: idle injection control device to unregister
*
* The function stops idle injection for the given control device,
* unregisters its kthreads and frees memory allocated when that device was
* created.
*/
void idle_inject_unregister(struct idle_inject_device *ii_dev)
{
unsigned int cpu;
idle_inject_stop(ii_dev);
for_each_cpu(cpu, to_cpumask(ii_dev->cpumask))
per_cpu(idle_inject_device, cpu) = NULL;
kfree(ii_dev);
}
EXPORT_SYMBOL_NS_GPL(idle_inject_unregister, IDLE_INJECT);
static struct smp_hotplug_thread idle_inject_threads = {
.store = &idle_inject_thread.tsk,
.setup = idle_inject_setup,
.thread_fn = idle_inject_fn,
.thread_comm = "idle_inject/%u",
.thread_should_run = idle_inject_should_run,
};
static int __init idle_inject_init(void)
{
return smpboot_register_percpu_thread(&idle_inject_threads);
}
early_initcall(idle_inject_init);